In [3]:
import pandas as pd
import numpy as np

In [4]:
orgdata = pd.read_csv('data/winter.csv')

In [5]:
orgdata['Sport'].unique()

array(['Biathlon', 'Bobsleigh', 'Curling', 'Ice Hockey', 'Skating',
       'Skiing', 'Luge'], dtype=object)

In [30]:
def aggbyname(x):
    names = {
        'Gold': x[x['Medal']=='Gold']['Medal'].count(),
        'Silver': x[x['Medal']=='Silver']['Medal'].count(),
        'Bronze': x[x['Medal']=='Bronze']['Medal'].count(),
        'Total':x['Medal'].count(),
        'domains': x['Discipline'].nunique(),
        }
    return pd.Series(names)

In [31]:
grouped = orgdata.groupby(['Athlete','Country']).apply(aggbyname)

In [32]:
fsl = 0.6
fbr = 0.3
grouped['Score'] = fsl * grouped['Silver'] + fbr * grouped['Bronze'] + grouped['Gold']
sorted_data = grouped.sort_values(['Score','Total','Gold'],ascending=False)

In [33]:
sorted_data_domain = grouped.sort_values(['domains'],ascending=False)
sorted_data_domain.head(30)

Unnamed: 0_level_0,Unnamed: 1_level_0,Gold,Silver,Bronze,Total,domains,Score
Athlete,Country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"STRÖMSTAD, Thoralf",NOR,0,2,0,2,2,1.2
"FLAIM, Eric",USA,0,2,0,2,2,1.2
"BRODAHL, Sverre",NOR,0,1,1,2,2,0.9
"KÄLIN, Alois",SUI,0,1,1,2,2,0.9
"HOFFSBAKKEN, Olaf",NOR,0,2,0,2,2,1.2
"ERDMANN, Susi-Lisa",GER,0,1,2,3,2,1.2
"HEATON, John",USA,0,2,1,3,2,1.5
"HEATON, Jennison",USA,1,1,0,2,2,1.6
"WEISSENSTEINER, Gerda",ITA,1,0,1,2,2,1.3
"GROTTUMSBRAATEN, Johan",NOR,3,1,2,6,2,4.2


In [34]:
idx = list(sorted_data.index)

In [35]:
athnames = [each[0] for each in idx]
ctynames = [each[1] for each in idx]

In [36]:
data = sorted_data.reset_index()
data

Unnamed: 0,Athlete,Country,Gold,Silver,Bronze,Total,domains,Score
0,"BJOERNDALEN, Ole Einar",NOR,8,4,1,13,1,10.7
1,"DAEHLIE, Björn",NOR,8,4,0,12,1,10.4
2,"BJOERGEN, Marit",NOR,6,2,1,9,1,7.5
3,"PECHSTEIN, Claudia",GER,5,2,2,9,1,6.8
4,"JERNBERG, Sixten",SWE,4,3,2,9,1,6.4
...,...,...,...,...,...,...,...,...
3815,"ZURBRIGGEN, Silvan",SUI,0,0,1,1,1,0.3
3816,"van RIESSEN, Laurine",NED,0,0,1,1,1,0.3
3817,"ÖBERG, Hans Andreas",SWE,0,0,1,1,1,0.3
3818,"ÖDEGAARD, Reidar",NOR,0,0,1,1,1,0.3


In [37]:
# change country to code
ctys = data['Country'].unique()
cdict = {}
for i in range(len(ctys)):
    cdict[ctys[i]] = i

In [38]:
data['ccode'] = data['Country'].map(cdict)
data_top20 = data.head(35)
data_top20

Unnamed: 0,Athlete,Country,Gold,Silver,Bronze,Total,domains,Score,ccode
0,"BJOERNDALEN, Ole Einar",NOR,8,4,1,13,1,10.7,0
1,"DAEHLIE, Björn",NOR,8,4,0,12,1,10.4,0
2,"BJOERGEN, Marit",NOR,6,2,1,9,1,7.5,0
3,"PECHSTEIN, Claudia",GER,5,2,2,9,1,6.8,1
4,"JERNBERG, Sixten",SWE,4,3,2,9,1,6.4,2
5,"SMETANINA, Raisa",URS,3,5,1,9,1,6.3,3
6,"GROSS, Ricco",GER,4,3,1,8,1,6.1,1
7,"SKOBLIKOVA, Lidiya",URS,6,0,0,6,1,6.0,3
8,"THUNBERG, Clas",FIN,5,1,1,7,1,5.9,4
9,"AAMODT, Kjetil Andre",NOR,4,2,2,8,1,5.8,0


In [39]:
import plotly.graph_objects as go
import plotly.express as px

In [40]:
fig = go.Figure(
    data=[go.Scatter(
            x=data_top20['Total'] + data_top20['Score'],
            y=data_top20['Gold'] + data_top20['Score'],
            mode='markers',
            text=data_top20['Athlete'],
            marker=dict(
                size=data_top20['Score']**2*10,
                sizemode='area',
                sizemin=4,
                color = data_top20['ccode']
            )
        )]
)
fig.update_layout(width=600, height=600,template = 'plotly_white')
# fig.show()

In [41]:
data_top20['Total2'] = data_top20['Total'] + data_top20['Score']
data_top20['Gold2'] = data_top20['Gold'] + data_top20['Score']



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [42]:
fig = px.scatter(data_top20,x='Total2',y='Gold2',size=data_top20['Score']**2*30,color='Country',color_discrete_sequence= px.colors.qualitative.Set1,opacity=0.6)
fig.update_layout(width=500, height=450,template = 'plotly_white')
fig.update_xaxes(nticks=10,showline=True, linewidth=1, linecolor='black')
fig.update_yaxes(nticks=10,showline=True, linewidth=1, linecolor='black')
fig.show()

In [24]:
cdata = data_top20.groupby(['Country']).sum().reset_index()

In [57]:
cdata = cdata.sort_values(['Score'])
cdata

Unnamed: 0,Country,Gold,Silver,Bronze,Total,domains,Score,ccode,Total2,Gold2
0,CAN,4,1,0,5,1,4.6,12,23.0,18.4
7,NED,3,2,2,7,1,4.8,11,33.6,14.4
1,CHN,4,1,1,6,1,4.9,10,29.4,19.6
9,RUS,4,1,1,6,1,4.9,9,29.4,19.6
2,CRO,4,2,0,6,1,5.2,8,31.2,20.8
6,ITA,2,3,5,10,1,5.3,6,53.0,10.6
4,GDR,3,4,1,8,1,5.7,5,45.6,17.1
12,USA,10,0,1,11,2,10.3,14,56.8,51.5
10,SWE,8,4,3,15,2,11.3,4,87.0,45.2
3,FIN,15,7,4,26,4,20.4,16,133.6,77.6


In [58]:
fig = px.bar(cdata,x='Score',y='Country',color_discrete_sequence= px.colors.qualitative.Set1,color='Country',opacity=0.6)
fig.update_layout(width=500, height=450,template = 'plotly_white')
fig.update_xaxes(nticks=10)
fig.show()

In [1]:
countries = orgdata['Country'].unique()
len(countries)

NameError: name 'orgdata' is not defined

In [60]:
all_c_data = data.groupby(['Country']).sum().reset_index()
all_c_data2 = all_c_data.sort_values(['Score'],ascending=False)
countries = all_c_data2['Country'].unique()

In [61]:
colors =  list(px.colors.qualitative.Set3) + list(px.colors.qualitative.Set2)+ list(px.colors.qualitative.Set1)+ list(px.colors.qualitative.Set1) +  list(px.colors.qualitative.Set2)
map_dict = {}
for i in range(len(countries)):
    map_dict[countries[i]] = colors[i]

In [62]:
data_colors = map(lambda x:map_dict[x],cdata['Country'])
data_colors = np.array(list(data_colors))

In [64]:
bar = go.Bar(
            x=cdata['Score'],
            y=cdata['Country'],
            orientation='h',
            marker = dict(
                 color = data_colors,
            ),
            opacity = 0.8
        )

fig = go.Figure(
    data = bar
)

fig.show()