In [1]:
# Dependencies
import pandas as pd
from pymongo import MongoClient

In [2]:
# Specify path to dataset
dictionary_data_to_load = "Data/dictionary.csv"
summer_data_to_load = "Data/summer.csv"
winter_data_to_load = "Data/winter.csv"

In [3]:
# Read dictionary, summer and data files and store into dataFrames
dictionary_data = pd.read_csv(dictionary_data_to_load)
summer_data = pd.read_csv(summer_data_to_load)
winter_data = pd.read_csv(winter_data_to_load)
winter_data.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1924,Chamonix,Biathlon,Biathlon,"BERTHET, G.",FRA,Men,Military Patrol,Bronze
1,1924,Chamonix,Biathlon,Biathlon,"MANDRILLON, C.",FRA,Men,Military Patrol,Bronze
2,1924,Chamonix,Biathlon,Biathlon,"MANDRILLON, Maurice",FRA,Men,Military Patrol,Bronze
3,1924,Chamonix,Biathlon,Biathlon,"VANDELLE, André",FRA,Men,Military Patrol,Bronze
4,1924,Chamonix,Biathlon,Biathlon,"AUFDENBLATTEN, Adolf",SUI,Men,Military Patrol,Gold


In [4]:
# Combine the summer and winter data into a single dataset
# Construct hierarchical index using the "summer" and "winter" keys as the outermost level.
concat_frames = [summer_data, winter_data]
summer_winter_index = pd.concat(concat_frames, keys=['Summer', 'Winter'])
summer_winter_index

Unnamed: 0,Unnamed: 1,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
Summer,0,1896,Athens,Aquatics,Swimming,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold
Summer,1,1896,Athens,Aquatics,Swimming,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver
Summer,2,1896,Athens,Aquatics,Swimming,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze
Summer,3,1896,Athens,Aquatics,Swimming,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold
Summer,4,1896,Athens,Aquatics,Swimming,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver
...,...,...,...,...,...,...,...,...,...,...
Winter,5765,2014,Sochi,Skiing,Snowboard,"JONES, Jenny",GBR,Women,Slopestyle,Bronze
Winter,5766,2014,Sochi,Skiing,Snowboard,"ANDERSON, Jamie",USA,Women,Slopestyle,Gold
Winter,5767,2014,Sochi,Skiing,Snowboard,"MALTAIS, Dominique",CAN,Women,Snowboard Cross,Silver
Winter,5768,2014,Sochi,Skiing,Snowboard,"SAMKOVA, Eva",CZE,Women,Snowboard Cross,Gold


In [5]:
# summer_winter_data.index.levels
# Swap levels of hierarchical index and convert "season" index to a column
summer_winter_data = summer_winter_index.rename_axis(['Season','']).reset_index(level=[0])
summer_winter_data

Unnamed: 0,Season,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
,,,,,,,,,,
0,Summer,1896,Athens,Aquatics,Swimming,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold
1,Summer,1896,Athens,Aquatics,Swimming,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver
2,Summer,1896,Athens,Aquatics,Swimming,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze
3,Summer,1896,Athens,Aquatics,Swimming,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold
4,Summer,1896,Athens,Aquatics,Swimming,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver
...,...,...,...,...,...,...,...,...,...,...
5765,Winter,2014,Sochi,Skiing,Snowboard,"JONES, Jenny",GBR,Women,Slopestyle,Bronze
5766,Winter,2014,Sochi,Skiing,Snowboard,"ANDERSON, Jamie",USA,Women,Slopestyle,Gold
5767,Winter,2014,Sochi,Skiing,Snowboard,"MALTAIS, Dominique",CAN,Women,Snowboard Cross,Silver


In [6]:
# Merge dictionary data to the summer_winter dataset
merged_summer_winter_dictionary_data = pd.merge(summer_winter_data, dictionary_data[['Country', 'Code']], how='left', left_on='Country', right_on='Code')
merged_summer_winter_dictionary_data

Unnamed: 0,Season,Year,City,Sport,Discipline,Athlete,Country_x,Gender,Event,Medal,Country_y,Code
0,Summer,1896,Athens,Aquatics,Swimming,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold,Hungary,HUN
1,Summer,1896,Athens,Aquatics,Swimming,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver,Austria,AUT
2,Summer,1896,Athens,Aquatics,Swimming,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze,Greece,GRE
3,Summer,1896,Athens,Aquatics,Swimming,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold,Greece,GRE
4,Summer,1896,Athens,Aquatics,Swimming,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver,Greece,GRE
...,...,...,...,...,...,...,...,...,...,...,...,...
36930,Winter,2014,Sochi,Skiing,Snowboard,"JONES, Jenny",GBR,Women,Slopestyle,Bronze,United Kingdom,GBR
36931,Winter,2014,Sochi,Skiing,Snowboard,"ANDERSON, Jamie",USA,Women,Slopestyle,Gold,United States,USA
36932,Winter,2014,Sochi,Skiing,Snowboard,"MALTAIS, Dominique",CAN,Women,Snowboard Cross,Silver,Canada,CAN
36933,Winter,2014,Sochi,Skiing,Snowboard,"SAMKOVA, Eva",CZE,Women,Snowboard Cross,Gold,Czech Republic,CZE


In [7]:
# Remove non-essential columns
column_modification = merged_summer_winter_dictionary_data.drop(columns=['City', 'Discipline', 'Athlete',	'Country_x', 'Code'])
column_modification

Unnamed: 0,Season,Year,Sport,Gender,Event,Medal,Country_y
0,Summer,1896,Aquatics,Men,100M Freestyle,Gold,Hungary
1,Summer,1896,Aquatics,Men,100M Freestyle,Silver,Austria
2,Summer,1896,Aquatics,Men,100M Freestyle For Sailors,Bronze,Greece
3,Summer,1896,Aquatics,Men,100M Freestyle For Sailors,Gold,Greece
4,Summer,1896,Aquatics,Men,100M Freestyle For Sailors,Silver,Greece
...,...,...,...,...,...,...,...
36930,Winter,2014,Skiing,Women,Slopestyle,Bronze,United Kingdom
36931,Winter,2014,Skiing,Women,Slopestyle,Gold,United States
36932,Winter,2014,Skiing,Women,Snowboard Cross,Silver,Canada
36933,Winter,2014,Skiing,Women,Snowboard Cross,Gold,Czech Republic


In [8]:
# Rename "Country_y" column to "Country"
olympic_data = column_modification.rename(columns={'Country_y': 'Country'})
olympic_data

Unnamed: 0,Season,Year,Sport,Gender,Event,Medal,Country
0,Summer,1896,Aquatics,Men,100M Freestyle,Gold,Hungary
1,Summer,1896,Aquatics,Men,100M Freestyle,Silver,Austria
2,Summer,1896,Aquatics,Men,100M Freestyle For Sailors,Bronze,Greece
3,Summer,1896,Aquatics,Men,100M Freestyle For Sailors,Gold,Greece
4,Summer,1896,Aquatics,Men,100M Freestyle For Sailors,Silver,Greece
...,...,...,...,...,...,...,...
36930,Winter,2014,Skiing,Women,Slopestyle,Bronze,United Kingdom
36931,Winter,2014,Skiing,Women,Slopestyle,Gold,United States
36932,Winter,2014,Skiing,Women,Snowboard Cross,Silver,Canada
36933,Winter,2014,Skiing,Women,Snowboard Cross,Gold,Czech Republic


In [9]:
# Connect to MongoDB
client = MongoClient()

In [10]:
# Create database and collection
db = client['Olympics']
collection = db['SportsEventsAndMedals']

In [11]:
# Convert dataFrame into a dictionary
olympic_data_dict = olympic_data.to_dict("records")

In [12]:
# Insert collection
collection.insert_many(olympic_data_dict)

<pymongo.results.InsertManyResult at 0x1eb4b34ae88>