In [3]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

In [4]:
orgdata = pd.read_csv('data/winter.csv')

In [5]:
orgdata.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1924,Chamonix,Biathlon,Biathlon,"BERTHET, G.",FRA,Men,Military Patrol,Bronze
1,1924,Chamonix,Biathlon,Biathlon,"MANDRILLON, C.",FRA,Men,Military Patrol,Bronze
2,1924,Chamonix,Biathlon,Biathlon,"MANDRILLON, Maurice",FRA,Men,Military Patrol,Bronze
3,1924,Chamonix,Biathlon,Biathlon,"VANDELLE, André",FRA,Men,Military Patrol,Bronze
4,1924,Chamonix,Biathlon,Biathlon,"AUFDENBLATTEN, Adolf",SUI,Men,Military Patrol,Gold


In [16]:
def aggbyname(x):
    names = {
        'Gold': x[x['Medal']=='Gold']['Medal'].count(),
        'Silver': x[x['Medal']=='Silver']['Medal'].count(),
        'Bronze': x[x['Medal']=='Bronze']['Medal'].count(),
        'Total':x['Medal'].count(),
        '#domains': x['Discipline'].nunique(),
        'domains': list(x['Discipline'].unique()),
        'Year': int(x['Year'].median())
        }
    return pd.Series(names)

In [17]:
grouped = orgdata.groupby(['Athlete','Country']).apply(aggbyname)

In [18]:
fsl = 0.6
fbr = 0.3
grouped['Score'] = fsl * grouped['Silver'] + fbr * grouped['Bronze'] + grouped['Gold']

In [19]:
sorted_data = grouped.sort_values(['Score','Total','Gold'],ascending=False)

In [20]:
athlete_data = sorted_data.reset_index()

In [21]:
athlete_data2 = athlete_data.head(60)

In [22]:
athlete_data2.to_csv('athlete.csv',index=False)

In [23]:
# athlete_data2['Country'].unique()

In [24]:
# athlete_data2

In [25]:
athdata = pd.read_csv('athlete.csv')
athdata.head()

Unnamed: 0,Athlete,Country,Gold,Silver,Bronze,Total,#domains,domains,Year,Score
0,"BJOERNDALEN, Ole Einar",NOR,8,4,1,13,1,['Biathlon'],2006,10.7
1,"DAEHLIE, Björn",NOR,8,4,0,12,1,['Cross Country Skiing'],1994,10.4
2,"BJOERGEN, Marit",NOR,6,2,1,9,1,['Cross Country Skiing'],2010,7.5
3,"PECHSTEIN, Claudia",GER,5,2,2,9,1,['Speed skating'],1998,6.8
4,"JERNBERG, Sixten",SWE,4,3,2,9,1,['Cross Country Skiing'],1960,6.4


In [10]:
countries = athdata['Country'].unique()
colors =  list(px.colors.qualitative.Set3) + list(px.colors.qualitative.Set2)+ list(px.colors.qualitative.Set1)+ list(px.colors.qualitative.Set1) +  list(px.colors.qualitative.Set2)
map_dict = {}
for i in range(len(countries)):
    map_dict[countries[i]] = colors[i]

In [12]:
athdata_colors = map(lambda x:map_dict[x],athdata['Country'])
athdata_colors = np.array(list(athdata_colors))

In [47]:
bubble = go.Scatter(
            x=athdata['Year'],
            y=athdata['Score'],
            mode='markers',
            text=athdata['Athlete'],
            marker=dict(
                size=athdata['Score']**3*1.2,
                sizemode='area',
                sizemin=4,
                color = athdata_colors
            )
        )
fig = go.Figure(
    data= bubble
)
# fig.show()

In [48]:
grouped = athdata.groupby(['Country']).sum().reset_index()

In [49]:
sorted_data = grouped.sort_values(['Score','Total','Gold'])

In [50]:
data_colors = map(lambda x:map_dict[x],sorted_data['Country'])
data_colors = np.array(list(data_colors))

In [51]:
bar = go.Bar(
            x=sorted_data['Score'],
            y=sorted_data['Country'],
            orientation='h',
            marker = dict(
                 color = data_colors,
            ),
            opacity = 0.8
        )

fig = go.Figure(
    data = bar
)

fig.show()

In [53]:
from plotly.subplots import make_subplots
fig_all = make_subplots(
    rows=1, cols=2,
    column_widths=[0.7, 0.3])

fig_all.add_trace(
        bubble,row=1, col=1)
fig_all.add_trace(bar,row=1, col=2)
fig_all.update_layout(template = 'plotly_white',showlegend=False)