# Loading Data

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from matplotlib.ticker import MaxNLocator, FuncFormatter, PercentFormatter


olymp = pd.read_csv("data/athlete_events.csv")
print(olymp.shape)
olymp.head(100)


(271116, 15)


Unnamed: 0,ID,Name,Sex,Age,Height,Weight,Team,NOC,Games,Year,Season,City,Sport,Event,Medal
0,1,A Dijiang,M,24.0,180.0,80.0,China,CHN,1992 Summer,1992,Summer,Barcelona,Basketball,Basketball Men's Basketball,
1,2,A Lamusi,M,23.0,170.0,60.0,China,CHN,2012 Summer,2012,Summer,London,Judo,Judo Men's Extra-Lightweight,
2,3,Gunnar Nielsen Aaby,M,24.0,,,Denmark,DEN,1920 Summer,1920,Summer,Antwerpen,Football,Football Men's Football,
3,4,Edgar Lindenau Aabye,M,34.0,,,Denmark/Sweden,DEN,1900 Summer,1900,Summer,Paris,Tug-Of-War,Tug-Of-War Men's Tug-Of-War,Gold
4,5,Christine Jacoba Aaftink,F,21.0,185.0,82.0,Netherlands,NED,1988 Winter,1988,Winter,Calgary,Speed Skating,Speed Skating Women's 500 metres,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,32,Olav Augunson Aarnes,M,23.0,,,Norway,NOR,1912 Summer,1912,Summer,Stockholm,Athletics,Athletics Men's High Jump,
96,33,Mika Lauri Aarnikka,M,24.0,187.0,76.0,Finland,FIN,1992 Summer,1992,Summer,Barcelona,Sailing,Sailing Men's Two Person Dinghy,
97,33,Mika Lauri Aarnikka,M,28.0,187.0,76.0,Finland,FIN,1996 Summer,1996,Summer,Atlanta,Sailing,Sailing Men's Two Person Dinghy,
98,34,Jamale (Djamel-) Aarrass (Ahrass-),M,30.0,187.0,76.0,France,FRA,2012 Summer,2012,Summer,London,Athletics,"Athletics Men's 1,500 metres",


In [None]:
noc_to_continent = {
    # Americas
    'USA':'Americas','CAN':'Americas','MEX':'Americas','BRA':'Americas','ARG':'Americas','COL':'Americas',
    'CHI':'Americas','URU':'Americas','PER':'Americas','VEN':'Americas','CUB':'Americas','JAM':'Americas',
    'BAH':'Americas','BAR':'Americas','BIZ':'Americas','BER':'Americas','BOL':'Americas','CRC':'Americas',
    'DMA':'Americas','DOM':'Americas','ECU':'Americas','GRN':'Americas','GUA':'Americas','GUY':'Americas',
    'HAI':'Americas','HON':'Americas','IVB':'Americas','LCA':'Americas','SKN':'Americas','SUR':'Americas',
    'TTO':'Americas','ANT':'Americas','PAR':'Americas','PAN':'Americas','PUR':'Americas','TRI':'Americas',
    'ARG':'Americas','ESA':'Americas',

    # Europe
    'GBR':'Europe','FRA':'Europe','GER':'Europe','ITA':'Europe','ESP':'Europe','POR':'Europe','SUI':'Europe',
    'AUT':'Europe','BEL':'Europe','NED':'Europe','SWE':'Europe','NOR':'Europe','DEN':'Europe','FIN':'Europe',
    'IRL':'Europe','ISL':'Europe','GRE':'Europe','CRO':'Europe','SRB':'Europe','SVK':'Europe','SLO':'Europe',
    'SVN':'Europe','ALB':'Europe','AND':'Europe','ARM':'Europe','AZE':'Europe','BIH':'Europe','BUL':'Europe',
    'CYP':'Europe','CZE':'Europe','EST':'Europe','GEO':'Europe','HUN':'Europe','KOS':'Europe','LAT':'Europe',
    'LTU':'Europe','LUX':'Europe','MKD':'Europe','MDA':'Europe','MON':'Europe','MNE':'Europe','POL':'Europe',
    'ROU':'Europe','RUS':'Europe','SMR':'Europe','SRB':'Europe','SVK':'Europe','SVN':'Europe','TUR':'Europe',
    'UKR':'Europe','URS':'Europe','YUG':'Europe','FRG':'Europe','GDR':'Europe','BOH':'Europe',

    # Asia
    'CHN':'Asia','JPN':'Asia','KOR':'Asia','PRK':'Asia','KAZ':'Asia','UZB':'Asia','TKM':'Asia','KGZ':'Asia',
    'TPE':'Asia','HKG':'Asia','MGL':'Asia','THA':'Asia','VIE':'Asia','MAS':'Asia','SIN':'Asia','PHI':'Asia',
    'BRN':'Asia','QAT':'Asia','UAE':'Asia','KUW':'Asia','KSA':'Asia','YEM':'Asia','IRQ':'Asia','IRI':'Asia',
    'IND':'Asia','PAK':'Asia','BAN':'Asia','SRI':'Asia','NPL':'Asia','AFG':'Asia','BHU':'Asia','MDV':'Asia',
    'LAO':'Asia','CAM':'Asia','INA':'Asia','JOR':'Asia','PLE':'Asia','TGA':'Asia',
    'BRU':'Asia', 
    'ISR':'Asia',      # Israel (competes in Europe for sports, but geographically Asia) 
    'MAL':'Asia', 'MYA':'Asia', 'NEP':'Asia', 'OMA':'Asia', 'SGP':'Asia', 'SYR':'Asia', 'TJK':'Asia', 
    'TLS':'Asia', 'VNM':'Asia',  


    # Africa
    'KEN':'Africa','ETH':'Africa','RSA':'Africa','ALG':'Africa','EGY':'Africa','NGR':'Africa','CMR':'Africa',
    'CIV':'Africa','GHA':'Africa','SEN':'Africa','TUN':'Africa','MAR':'Africa','ANG':'Africa','BOT':'Africa',
    'ZAM':'Africa','ZIM':'Africa','UGA':'Africa','TAN':'Africa','RWA':'Africa','BDI':'Africa','SOM':'Africa',
    'DJI':'Africa','ERI':'Africa','CPV':'Africa','GAB':'Africa','COD':'Africa','CGO':'Africa','GUI':'Africa',
    'SLE':'Africa','MLI':'Africa','NIG':'Africa','CHA':'Africa','CAF':'Africa','BEN':'Africa','TOG':'Africa',
    'ETH':'Africa','LBY':'Africa','MAD':'Africa','MRT':'Africa','SEY':'Africa','SUD':'Africa','SSD':'Africa',
    'COM':'Africa','LBR':'Africa','LES':'Africa','SWZ':'Africa','MAW':'Africa','GAM':'Africa','SYC':'Africa',
    'MAU':'Africa','CPV':'Africa',

    # Oceania
    'AUS':'Oceania','NZL':'Oceania','FIJ':'Oceania','SAM':'Oceania','TGA':'Oceania','VAN':'Oceania',
    'PNG':'Oceania','NRU':'Oceania','KIR':'Oceania','PLW':'Oceania','MHL':'Oceania','GUM':'Oceania',

    # Misc / historical codes that map logically
    'EUN':'Europe',  # Unified Team 1992
    'YAR':'Asia', 
    'UAR':'Africa', # United Arab Republic historically Egypt+Syria
}

In [None]:
continent_colors = {
    "Europe":   "#0085C7",  # Blue
    "Asia":     "#F4C300",  # Yellow
    "Africa":   "#000000",  # Black
    "Oceania":  "#009F3D",  # Green
    "Americas": "#DF0024",  # Red
}

df = olymp[
    (olymp['season'] == 'Summer') &
    (olymp['year'] >= 1912)]

df_unique = df.drop_duplicates(subset=['id','noc','year'])
          
athletes_by_year = (df_unique.groupby(['noc', 'year'])['id']).nunique().reset_index(name="num_athletes")
                 
ay = athletes_by_year.copy()
ay['Continent'] = ay['noc'].map(noc_to_continent)

missing = sorted(set(ay['noc']) - set(noc_to_continent.keys()))

cont_year = (
    ay.dropna(subset=['Continent'])
      .groupby(['Continent','year'], as_index=False)['num_athletes']
      .sum()
)
cont_year['pct'] = cont_year.groupby('year')['num_athletes'].transform(
    lambda x: x / x.sum()
)
sns.set_theme(style="whitegrid", context="talk")
plt.figure(figsize=(12,6))
ax = sns.lineplot(data=cont_year, x='year', y='pct', hue='Continent', marker='o', linewidth=2, palette=continent_colors)
ax.set_title("Share of Global Olympic Athletes by Continent\nSummer Olympics (1912+)", pad=12)
ax.set_xlabel("Year"); ax.set_ylabel("Share of Athletes")
ax.yaxis.set_major_formatter(PercentFormatter(1.0))
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
#plt.xticks(rotation=45)
ax.legend(title='Continent', bbox_to_anchor=(1.02,1), loc='upper left')
sns.despine(); plt.tight_layout(); plt.show()