# Uppgift 1: Theodors område

### import and create DataFrames

In [187]:
import pandas as pd
import numpy as np
import plotly.express as px
from pathlib import Path

print("Reading file...")
olympics_csv = next(Path.cwd().parent.rglob("olympics_clean.csv"), None)

if olympics_csv:
    olympics = pd.read_csv(olympics_csv).drop('Name_hash', axis=1)
    #olympics['Medal_binary'] = np.where(olympics['Medal'].isna(), 0, 1)
    print("file found.")
else:
    print("file not found.")

olympics.head(1)

Reading file...
file found.


Unnamed: 0,ID,Sex,Age,Height,Weight,Team,NOC,Games,Year,Season,City,Sport,Event,Medal,region,notes
0,1,M,24.0,180.0,80.0,China,CHN,1992 Summer,1992,Summer,Barcelona,Basketball,Basketball Men's Basketball,,China,


Create functions:

In [188]:
def summer(df):
    """Returns df with only summer games. Requires 'Year' column"""
    return df[df['Year'].isin(olympics.loc[olympics['Season'] == 'Summer', 'Year'])]

def winter(df):
    """Returns df with only winter games. Requires 'Year' column"""
    return df[df['Year'].isin(olympics.loc[olympics['Season'] == 'Winter', 'Year'])]

def get_total_medals(df):
    """Returns a the DataFrame sorted by total medals won"""
    df = (
        df.groupby('NOC')['Medal']
        .sum()
        .sort_values(ascending=False)
        .reset_index()
    )
    return df

def get_top_5(df): #top_5 = [i + f" ({df.loc[df['NOC'] == i].index + 1})" for i in top_5]
    """Returns a list of the top 5 countries with the most total medals won + Norway"""
    top_5 = list(get_total_medals(df).head()['NOC'])

    if not 'NOR' in top_5:
        top_5 = top_5 + ['NOR']
    
    return top_5

Create useful DataFrames:

In [189]:
countries_medals = (
    olympics.groupby(['Year', 'NOC'])['Medal']
    .count()
    .reset_index()
    .sort_values(by=['Year', 'Medal'], ascending=[True, False])
)

countries_total_medals = get_total_medals(countries_medals)
countries_total_summer_medals = get_total_medals(summer(countries_medals))
countries_total_winter_medals = get_total_medals(winter(countries_medals))

top_5_countries = get_top_5(countries_total_medals)
top_5_summer_countries = get_top_5(countries_total_summer_medals)
top_5_winter_countries = get_top_5(countries_total_winter_medals)

countries_total_medals

Unnamed: 0,NOC,Medal
0,USA,5637
1,URS,2503
2,GER,2165
3,GBR,2068
4,FRA,1777
...,...,...
225,VAN,0
226,YEM,0
227,YAR,0
228,VNM,0


## Norges medaljer över tid

In [190]:
px.line(data_frame=countries_medals.loc[countries_medals['NOC'] == 'NOR'], x='Year', y='Medal')

In [191]:
px.line(
    data_frame=countries_medals[countries_medals['NOC'].isin(top_5_countries)], 
    x='Year', 
    y='Medal',
    color='NOC',
    category_orders={'NOC': top_5_countries},
    title='Top 5 countries with the most total medals and Norway',
    labels={'Medal': 'Medals', 'NOC': 'Country'}
)

In [192]:
px.line(
    data_frame=summer(countries_medals[countries_medals['NOC'].isin(top_5_summer_countries)]), 
    x='Year', 
    y='Medal',
    color='NOC',
    category_orders={'NOC': top_5_countries},
    title='Top 5 countries with the most total medals and Norway (Summer)',
    labels={'Medal': 'Medals', 'NOC': 'Country'}
)

In [193]:
px.line(
    data_frame=winter(countries_medals[countries_medals['NOC'].isin(top_5_winter_countries)]), 
    x='Year', 
    y='Medal',
    color='NOC',
    category_orders={'NOC': top_5_countries},
    title='Top 5 countries with the most total medals and Norway (Winter)',
    labels={'Medal': 'Medals', 'NOC': 'Country'}
)