In [1]:
import pandas as pd
import numpy as np

In [2]:
winterdata = pd.read_csv('data/winter.csv')
popdata = pd.read_csv('data/population.csv')

In [3]:
# From 1924-1994-2014
winterdata.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1924,Chamonix,Biathlon,Biathlon,"BERTHET, G.",FRA,Men,Military Patrol,Bronze
1,1924,Chamonix,Biathlon,Biathlon,"MANDRILLON, C.",FRA,Men,Military Patrol,Bronze
2,1924,Chamonix,Biathlon,Biathlon,"MANDRILLON, Maurice",FRA,Men,Military Patrol,Bronze
3,1924,Chamonix,Biathlon,Biathlon,"VANDELLE, André",FRA,Men,Military Patrol,Bronze
4,1924,Chamonix,Biathlon,Biathlon,"AUFDENBLATTEN, Adolf",SUI,Men,Military Patrol,Gold


In [4]:
gdpdata = pd.read_csv('data/gdp.csv')

In [5]:
# from 1990-2017
gdpdata.head()

Unnamed: 0,Entity,Code,Year,gdp
0,Afghanistan,AFG,2002,1063.635574
1,Afghanistan,AFG,2003,1099.194507
2,Afghanistan,AFG,2004,1062.24936
3,Afghanistan,AFG,2005,1136.123214
4,Afghanistan,AFG,2006,1161.124889


In [6]:
popdata.head()

Unnamed: 0,ctry,code,1960,1961,1962,1963,1964,1965,1966,1967,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Aruba,ABW,54211.0,55438.0,56225.0,56695.0,57032.0,57360.0,57715.0,58055.0,...,102046.0,102560.0,103159.0,103774.0,104341.0,104872.0,105366.0,105845.0,106314.0,
1,Afghanistan,AFG,8996973.0,9169410.0,9351441.0,9543205.0,9744781.0,9956320.0,10174836.0,10399926.0,...,30117413.0,31161376.0,32269589.0,33370794.0,34413603.0,35383128.0,36296400.0,37172386.0,38041754.0,
2,Angola,AGO,5454933.0,5531472.0,5608539.0,5679458.0,5735044.0,5770570.0,5781214.0,5774243.0,...,24220661.0,25107931.0,26015780.0,26941779.0,27884381.0,28842484.0,29816748.0,30809762.0,31825295.0,
3,Albania,ALB,1608800.0,1659800.0,1711319.0,1762621.0,1814135.0,1864791.0,1914573.0,1965598.0,...,2905195.0,2900401.0,2895092.0,2889104.0,2880703.0,2876101.0,2873457.0,2866376.0,2854191.0,
4,Andorra,AND,13411.0,14375.0,15370.0,16412.0,17469.0,18549.0,19647.0,20758.0,...,83747.0,82427.0,80774.0,79213.0,78011.0,77297.0,77001.0,77006.0,77142.0,


In [7]:
def aggbycountry(x):
    names = {
        'Gold': x[x['Medal']=='Gold']['Medal'].count(),
        'Silver': x[x['Medal']=='Silver']['Medal'].count(),
        'Bronze': x[x['Medal']=='Bronze']['Medal'].count(),
        }
    return pd.Series(names)

def get_data_by_year(y):
    wd_byyear = winterdata[winterdata['Year']==y]
    gdp_byyear = gdpdata[gdpdata['Year']==y]
    gdps = gdp_byyear[['Code','gdp']]
    medals = wd_byyear.groupby('Country').apply(aggbycountry)
    medals['ctry'] = medals.index
    pops = popdata[['ctry','code',str(y)]]
    merge1 = pd.merge(medals,pops,how='left',left_on='ctry',right_on='code')
    merge2 = pd.merge(merge1,gdps,how='left',left_on='ctry_x',right_on='Code')
    merge3 = merge2.rename(columns={'ctry_x':'abbr','ctry_y':'country',str(y):'population','Code':'c'})
    data = merge3[['country','abbr','Gold','Silver','Bronze','population','gdp']]
#     data['population_log'] = np.log10(data['population'])
    data = data.dropna()
    return data

In [71]:
data_2014 = get_data_by_year(2014)
data_1992 = get_data_by_year(1992)
data_2002 = get_data_by_year(2002)

In [9]:
def get_score(data):
    data['pop_log'] = np.log2(data['population'])
    data['gdp_log'] = np.log2(data['gdp'])
    fsl = 0.6
    fbr = 0.3
    data['Gold'] = fsl * data['Silver'] + fbr * data['Bronze'] + data['Gold']
    return data

In [72]:
data_2014 = get_score(data_2014)
data_1992 = get_score(data_1992)
data_2002 = get_score(data_2002)

In [11]:
import plotly.graph_objects as go
import plotly.express as px

In [74]:
# create color map
countries = list(data_2014['country'])
countries.extend(['Spain','Luxembourg','New Zealand','Estonia'])
colors =  list(px.colors.qualitative.Set3) + list(px.colors.qualitative.Set2)+ list(px.colors.qualitative.Set1)
map_dict = {}
for i in range(len(countries)):
    map_dict[countries[i]] = colors[i]


In [75]:
map_dict

{'Australia': 'rgb(141,211,199)',
 'Austria': 'rgb(255,255,179)',
 'Belarus': 'rgb(190,186,218)',
 'Canada': 'rgb(251,128,114)',
 'China': 'rgb(128,177,211)',
 'Czech Republic': 'rgb(253,180,98)',
 'Finland': 'rgb(179,222,105)',
 'France': 'rgb(252,205,229)',
 'United Kingdom': 'rgb(217,217,217)',
 'Italy': 'rgb(188,128,189)',
 'Japan': 'rgb(204,235,197)',
 'Kazakhstan': 'rgb(255,237,111)',
 'Korea, Rep.': 'rgb(102,194,165)',
 'Norway': 'rgb(252,141,98)',
 'Poland': 'rgb(141,160,203)',
 'Russian Federation': 'rgb(231,138,195)',
 'Slovak Republic': 'rgb(166,216,84)',
 'Sweden': 'rgb(255,217,47)',
 'Ukraine': 'rgb(229,196,148)',
 'United States': 'rgb(179,179,179)',
 'Spain': 'rgb(228,26,28)',
 'Luxembourg': 'rgb(55,126,184)',
 'New Zealand': 'rgb(77,175,74)',
 'Estonia': 'rgb(152,78,163)'}

In [76]:
data_colors_2014 = map(lambda x:map_dict[x],data_2014['country'])
data_colors_2014 = np.array(list(data_colors_2014))
data_colors_1992 = map(lambda x:map_dict[x],data_1992['country'])
data_colors_1992 = np.array(list(data_colors_1992))
data_colors_2002 = map(lambda x:map_dict[x],data_2002['country'])
data_colors_2002 = np.array(list(data_colors_2002))

In [80]:
fig = go.Figure(
    data=[go.Scatter(
            x=data_2014['gdp'],
            y=data_2014['Gold'],
            mode='markers',
            text=data_2014['country'],
            marker=dict(
                size=data_2014['population']/1200000,
                sizemode='area',
                sizeref=2.*100/(40.**2),
                sizemin=4,
#                 color = 'rgb(179,205,227)',
                color = data_colors_2014,
                opacity = 0.5
            )
        )]
)
# fig.add_trace(
#     go.Scatter(
#             x=data_2014['gdp'],
#             y=data_2014['Gold'],
#             mode='markers',
#             marker=dict(
#                 size=5,
#                 color = 'rgb(55,126,184)'
#             )
#         )
# )

fig.update_layout(width=800, height=560,template = 'plotly_white',yaxis=dict(
            range=[-20, 90]
        ), xaxis=dict(
            range=[-10*1000, 65*1000]
        ))
fig.show()

In [81]:
fig = go.Figure(
    data=[go.Scatter(
            x=data_2002['gdp'],
            y=data_2002['Gold'],
            mode='markers',
            text=data_2002['country'],
            marker=dict(
                size=data_2002['population']/1200000,
                sizemode='area',
                sizeref=2.*100/(40.**2),
                sizemin=4,
#                 color = 'rgb(179,205,227)',
                color = data_colors_2002,
                opacity = 0.5
            )
        )]
)

fig.update_layout(width=800, height=560,template = 'plotly_white', 
           yaxis=dict(
            range=[-20, 90]
        ), xaxis=dict(
            range=[-10*1000, 65*1000]
        ))
fig.show()

In [82]:
fig = go.Figure(
    data=[go.Scatter(
            x=data_1992['gdp'],
            y=data_1992['Gold'],
            mode='markers',
            text=data_1992['country'],
            marker=dict(
                size=data_1992['population']/1200000,
                sizemode='area',
                sizeref=2.*100/(40.**2),
                sizemin=4,
#                 color = 'rgb(179,205,227)',
                color = data_colors_1992,
                opacity = 0.5
            )
        )]
)
# fig.add_trace(
#     go.Scatter(
#             x=data_1992['gdp'],
#             y=data_1992['Gold'],
#             mode='markers',
#             marker=dict(
#                 size=5,
#                 color = 'rgb(55,126,184)'
#             )
#         )
# )

fig.update_layout(width=800, height=560,template = 'plotly_white', 
           yaxis=dict(
            range=[-20, 90]
        ), xaxis=dict(
            range=[-10*1000, 65*1000]
        ))
fig.show()

In [61]:
data_sorted = data_2014.sort_values(['Gold'],ascending=False)
data_colors_sorted = map(lambda x:map_dict[x],data_sorted['country'])
data_colors_sorted = np.array(list(data_colors_sorted))
fig = px.bar(data_sorted, x="Gold", y="country", color="country",color_discrete_sequence = data_colors_sorted,opacity=0.7)
fig.update_layout(width=600, height=560,template = 'plotly_white')
fig.show()