In [1]:
# Import dependencies:

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy.stats as st
import gmaps
from config import gkey 
import geopandas as gpd

# Read csv file and store into dataframe:

complete_df = pd.read_csv("Resources/athlete_events.csv")

In [2]:
# Import country names and geocoordinates and store into a dataframe:

coordinates_df = pd.read_csv("Resources/noc_regions.csv")
coordinates_df

Unnamed: 0,NOC,region,notes
0,AFG,Afghanistan,
1,AHO,Curacao,Netherlands Antilles
2,ALB,Albania,
3,ALG,Algeria,
4,AND,Andorra,
...,...,...,...
225,YEM,Yemen,
226,YMD,Yemen,South Yemen
227,YUG,Serbia,Yugoslavia
228,ZAM,Zambia,


In [3]:
# Access built-in Natural Earth data via GeoPandas
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
select_country = world.loc[world['name'].isin(coordinates_df["region"].values)]
select_US  = world.loc[world['name'] == "United States of America"]
select_country = pd.concat([select_country, select_US])

# Get a list (dataframe) of country centroids
centroids = select_country.centroid
centroid_list = pd.concat([select_country.name, centroids], axis=1)

# Export to csv
# centroid_list.to_csv(r'centroid_list.csv', index = False)

In [4]:
# Import centroid_list again:
coordinates = pd.read_csv("centroid_list.csv")

coordinates_df = pd.merge(coordinates, coordinates_df, on='region', how='inner')

# Drop na:
complete_df = complete_df.dropna(subset=["Medal"])

# Merge dataframes:
complete_df.to_csv(r'complete.csv', index = False)
complete_df = pd.merge(complete_df, coordinates_df, on='NOC' , how='inner')
complete_df

Unnamed: 0,ID,Name,Sex,Age,Height,Weight,Team,NOC,Games,Year,Season,City,Sport,Event,Medal,region,Longitude,Latitude,notes
0,4,Edgar Lindenau Aabye,M,34.0,,,Denmark/Sweden,DEN,1900 Summer,1900,Summer,Paris,Tug-Of-War,Tug-Of-War Men's Tug-Of-War,Gold,Denmark,9.876373,56.063934,
1,2347,Georg Albert Christian Albertsen,M,30.0,,,Denmark,DEN,1920 Summer,1920,Summer,Antwerpen,Gymnastics,"Gymnastics Men's Team All-Around, Free System",Gold,Denmark,9.876373,56.063934,
2,2882,Ejler Arild Emil Allert,M,30.0,,,Nykjbings paa Falster,DEN,1912 Summer,1912,Summer,Stockholm,Rowing,"Rowing Men's Coxed Fours, Inriggers",Gold,Denmark,9.876373,56.063934,
3,3684,Aage Jrgen Christian Andersen,M,22.0,,,Denmark,DEN,1906 Summer,1906,Summer,Athina,Football,Football Men's Football,Gold,Denmark,9.876373,56.063934,
4,3687,Anders Peter Andersen,M,26.0,,70.0,Denmark,DEN,1908 Summer,1908,Summer,London,Wrestling,"Wrestling Men's Middleweight, Greco-Roman",Bronze,Denmark,9.876373,56.063934,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31347,95868,Claudia Mara Poll Ahrens,F,27.0,191.0,76.0,Costa Rica,CRC,2000 Summer,2000,Summer,Sydney,Swimming,Swimming Women's 200 metres Freestyle,Bronze,Costa Rica,-84.175423,9.965671,
31348,95868,Claudia Mara Poll Ahrens,F,27.0,191.0,76.0,Costa Rica,CRC,2000 Summer,2000,Summer,Sydney,Swimming,Swimming Women's 400 metres Freestyle,Bronze,Costa Rica,-84.175423,9.965671,
31349,95871,Silvia Poll Ahrens,F,17.0,192.0,75.0,Costa Rica,CRC,1988 Summer,1988,Summer,Seoul,Swimming,Swimming Women's 200 metres Freestyle,Silver,Costa Rica,-84.175423,9.965671,
31350,104592,Ahmed Salah Houssein,M,31.0,180.0,64.0,Djibouti,DJI,1988 Summer,1988,Summer,Seoul,Athletics,Athletics Men's Marathon,Bronze,Djibouti,42.498020,11.773044,


In [5]:
# Create separate dataframes for summer and winter Olympic games:

medal_df = complete_df[["Medal", "region", "Season", "Latitude", "Longitude"]]
medal_summer = medal_df[medal_df.Season == 'Summer']
medal_winter = medal_df[medal_df.Season == 'Winter']

medal_summer = medal_summer.groupby(['region','Season', 'Latitude', 'Longitude'], as_index=False)['Medal'].count()
medal_winter = medal_winter.groupby(['region','Season', 'Latitude', 'Longitude'], as_index=False)['Medal'].count()

In [9]:
# Plot heatmap for winter Olympic medals:

figure_layout = {
    'width': '1400px',
    'height': '1000px',
    'border': '1px solid black',
    'padding': '0.5px',
    'margin': '0 auto 0 auto'
}

fig = gmaps.figure(layout=figure_layout,map_type="HYBRID")

locations = medal_winter[["Latitude", "Longitude"]]


medal_count = medal_winter["Medal"].astype(float)
heat_layer = gmaps.heatmap_layer(locations, weights=medal_count,
                                 dissipating=False, max_intensity=5,
                                 point_radius=1)
fig.add_layer(heat_layer)
fig

Figure(layout=FigureLayout(border='1px solid black', height='1000px', margin='0 auto 0 auto', padding='0.5px',…

In [13]:
# Plot heatmap for summer Olympic medals:

figure_layout = {
    'width': '1400px',
    'height': '1000px',
    'border': '1px solid black',
    'padding': '0.5px',
    'margin': '0 auto 0 auto'
}

fig = gmaps.figure(layout=figure_layout,map_type="HYBRID")

locations = medal_summer[["Latitude", "Longitude"]]


medal_count = medal_summer["Medal"].astype(float)
heat_layer = gmaps.heatmap_layer(locations, weights=medal_count,
                                 dissipating=False, max_intensity=5,
                                 point_radius=1)
fig.add_layer(heat_layer)
fig

Figure(layout=FigureLayout(border='1px solid black', height='1000px', margin='0 auto 0 auto', padding='0.5px',…

In [8]:
medal_winter.to_csv(r'medal_winter.csv', index = False)