In [1]:
import pandas as pd
import plotly_express as px
import numpy as np
import matplotlib.pyplot as plt
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

athlete_events = pd.read_csv("../Projekt-Databehandling/Data/athlete_events.csv")
noc_regions = pd.read_csv("../Projekt-Databehandling/Data/noc_regions.csv")

pipenv install gunicorn
pipenv install dash plotly_express requests dash_bootstrap_components pandas

# Graf 1 - Ski Jumping Top 3 Countries

In [2]:
# sorted in descending order, the countries with most medals, took out the top 3

ski_jumping = athlete_events[(athlete_events["Sport"] == "Ski Jumping")]

skiing_olympics = (
    ski_jumping.groupby(["Team"])["Medal"]
    .count()
    .reset_index(name="Count") # new name for medal column
    .sort_values(["Count"], ascending=False)
)

skiing_olympics.head(3)

Unnamed: 0,Team,Count
0,Austria,43
21,Norway,39
9,Finland,34


- https://www.geeksforgeeks.org/olympics-data-analysis-using-python/
- https://www.kaggle.com/code/gpreda/plotly-tutorial-120-years-of-olympic-games/notebook - choropleth

In [3]:
top_countries_medals = ski_jumping[(ski_jumping["Team"] == "Austria") | (ski_jumping["Team"] == "Norway") | (ski_jumping["Team"] == "Finland")]

bronze_ski_jumping = top_countries_medals[(top_countries_medals["Medal"] == "Bronze")]
silver_ski_jumping = top_countries_medals[(top_countries_medals["Medal"] == "Silver")]
gold_ski_jumping = top_countries_medals[(top_countries_medals["Medal"] == "Gold")]

medals_per_country = ski_jumping.groupby("Team")["Medal"].value_counts(dropna=True)
bronze_ski_jumping = bronze_ski_jumping.groupby("Team")["Medal"].value_counts(
    dropna=False
)
silver_ski_jumping = silver_ski_jumping.groupby("Team")["Medal"].value_counts(
    dropna=False
)
gold_ski_jumping = gold_ski_jumping.groupby("Team")["Medal"].value_counts(dropna=False)


def plotly_bar_plot_with_labels_sublabels(
    x, y, title, labels, sublabels,
):

    fig = px.bar(
        x=x,
        y=y,
        barmode="group",  # groups the bars next to eachother instead of stacking on eachother
        labels=labels,
        title=title,
    )
    newnames = sublabels
    # To be able to change the sub titles for 'Antal doser' without changing the data source,
    # you can switch the legendgroups name with a dict and map it onto existing subtitle names.
    # I had to do this since I couldn't change it through 'labels=' like the other titles
    # source: https://stackoverflow.com/questions/64371174/plotly-how-to-change-variable-label-names-for-the-legend-in-a-plotly-express-li
    fig.for_each_trace(lambda t: t.update(name=newnames[t.name]))

    fig.show()


labels_skiing = {
    "value": "Medals won",
    "variable": "Medals",
    "x": "Teams",
}

sublabels_skiing = {
    "wide_variable_0": "Bronze",
    "wide_variable_1": "Silver",
    "wide_variable_2": "Gold",
}

plotly_bar_plot_with_labels_sublabels(
    top_countries_medals.Team.unique(),
    [bronze_ski_jumping, silver_ski_jumping, gold_ski_jumping],
    'Top 3 Countries With Most Medals won in Ski Jumping',
    labels_skiing,
    sublabels_skiing

)


# Graf 2 - Snowboarding Medals per country

In [4]:
def most_medals_per_country_sports(sport, df):
    # pick out every row that has snowboarding in it
    medals_olympics = df[(df["Sport"] == sport)]

    # counts number of medals for each country in total, then is sorted in descending order
    medals_olympics = (
        medals_olympics.groupby(["Team"])["Medal"]
        .count()
        .reset_index(name="Count") # new name for medal column
        .sort_values(["Count"], ascending=False)
    )

    fig = px.bar(
        medals_olympics.head(10),
        x='Team',
        y='Count',
        title='Antal Medaljer per land inom '+sport
        
    )
    fig.update_xaxes(tickangle=40)
    return fig.show()

most_medals_per_country_sports("Ski Jumping", athlete_events)

# Graf 3 - Amount of Football Players For Each Olympics

In [5]:
def amount_of_athlets(sport, df):
    df_sport = df[(df['Sport'] == sport)]

    tmp = df_sport.groupby(['Year', 'City'])['Season'].value_counts()
    df_sport = pd.DataFrame(data={'Athlets': tmp.values}, index=tmp.index).reset_index()

    fig = px.scatter(df_sport,
            x='Year',
            y='Athlets',
            title='Amount of athlets for '+sport+' Each Olympics',
            
    )

    return fig.show()

amount_of_athlets("Football", athlete_events)

In [6]:
football_df = athlete_events[(athlete_events['Sport'] == "Football")]

tmp = football_df.groupby(['Year', 'City'])['Season'].value_counts()
football_df = pd.DataFrame(data={'Athlets': tmp.values}, index=tmp.index).reset_index()

fig = px.scatter(football_df,
        x='Year',
        y='Athlets',
        size='Year',
        title='Amount of Football Players For Each Olympics',
        
)

fig.show()

# Graf 4 - Amount of Athlets in Snowboarding, Ski Jumping & Bobsleigh Per Olympics

In [None]:
df_snowboarding = athlete_events[athlete_events['Sport']=='Snowboarding']
df_ski_jumping = athlete_events[athlete_events['Sport']=='Ski Jumping']
df_bobsleigh = athlete_events[athlete_events['Sport']== 'Bobsleigh']

tmp = df_snowboarding.groupby(['Year', 'City'])['Season'].value_counts()
snowboarding_df = pd.DataFrame(data={'Athlets': tmp.values}, index=tmp.index).reset_index()

tmp1 = df_ski_jumping.groupby(['Year', 'City'])['Season'].value_counts()
ski_jumping_df = pd.DataFrame(data={'Athlets': tmp1.values}, index=tmp1.index).reset_index()

tmp2 = df_bobsleigh.groupby(['Year', 'City'])['Season'].value_counts()
bobsleigh_df = pd.DataFrame(data={'Athlets': tmp2.values}, index=tmp2.index).reset_index()

fig = px.line(snowboarding_df, x='Year', y='Athlets')
fig.add_scatter(ski_jumping_df, x='Year', y='Athlets')
fig.show()

In [10]:

ski_jumping = athlete_events[(athlete_events["Sport"] == "Ski Jumping")]

skiing_olympics = (
    ski_jumping.groupby(["Team"])["Medal"]
    .count()
    .reset_index(name="Count") # new name for medal column
    .sort_values(["Count"], ascending=False)
)

skiing_olympics.head(5)

# top_countries_medals = ski_jumping[(ski_jumping["Team"] == "Austria") | (ski_jumping["Team"] == "Norway") | (ski_jumping["Team"] == "Finland")]

bronze_ski_jumping = ski_jumping[(ski_jumping["Medal"] == "Bronze")]
silver_ski_jumping = ski_jumping[(ski_jumping["Medal"] == "Silver")]
gold_ski_jumping = ski_jumping[(ski_jumping["Medal"] == "Gold")]

medals_per_country = ski_jumping.groupby("Team")["Medal"].value_counts(dropna=True)
bronze_ski_jumping = bronze_ski_jumping.groupby("Team")["Medal"].value_counts(
    dropna=False
)
silver_ski_jumping = silver_ski_jumping.groupby("Team")["Medal"].value_counts(
    dropna=False
)
gold_ski_jumping = gold_ski_jumping.groupby("Team")["Medal"].value_counts(dropna=False)


def plotly_bar_plot_with_labels_sublabels(
    x, y, title, labels, sublabels,
):

    fig = px.bar(
        x=x,
        y=y,
        barmode="group",  # groups the bars next to eachother instead of stacking on eachother
        labels=labels,
        title=title,
    )
    newnames = sublabels
    # To be able to change the sub titles for 'Antal doser' without changing the data source,
    # you can switch the legendgroups name with a dict and map it onto existing subtitle names.
    # I had to do this since I couldn't change it through 'labels=' like the other titles
    # source: https://stackoverflow.com/questions/64371174/plotly-how-to-change-variable-label-names-for-the-legend-in-a-plotly-express-li
    fig.for_each_trace(lambda t: t.update(name=newnames[t.name]))

    fig.show()


labels_skiing = {
    "value": "Medals won",
    "variable": "Medals",
    "x": "Teams",
}

sublabels_skiing = {
    "wide_variable_0": "Bronze",
    "wide_variable_1": "Silver",
    "wide_variable_2": "Gold",
}

plotly_bar_plot_with_labels_sublabels(
    top_countries_medals.Team.unique(),
    [bronze_ski_jumping, silver_ski_jumping, gold_ski_jumping],
    'Top 3 Countries With Most Medals won in Ski Jumping',
    labels_skiing,
    sublabels_skiing
)

ValueError: All arguments should have the same length. The length of argument `wide_variable_0` is 13, whereas the length of  previously-processed arguments ['x'] is 3

In [20]:
bronze_ski_jumping = ski_jumping[(ski_jumping["Medal"] == "Bronze")]
silver_ski_jumping = ski_jumping[(ski_jumping["Medal"] == "Silver")]
gold_ski_jumping = ski_jumping[(ski_jumping["Medal"] == "Gold")]

bronze_ski_jumping = (
    bronze_ski_jumping.groupby(["Team"])["Medal"]
    .count()
    .reset_index(name="Count") # new name for medal column
    .sort_values(["Count"], ascending=False)
)

silver_ski_jumping = (
    silver_ski_jumping.groupby(["Team"])["Medal"]
    .count()
    .reset_index(name="Count") # new name for medal column
    .sort_values(["Count"], ascending=False)
)

gold_ski_jumping = (
    gold_ski_jumping.groupby(["Team"])["Medal"]
    .count()
    .reset_index(name="Count") # new name for medal column
    .sort_values(["Count"], ascending=False)
)


Unnamed: 0,Team,Count
7,Norway,21
0,Austria,16
1,Czechoslovakia,7
6,Japan,6
9,Slovenia,5
3,Finland,4
2,East Germany,2
5,Germany,2
4,France,1
8,Poland,1


In [None]:
import functools as ft

bronze_ski_jumping = ski_jumping[(ski_jumping["Medal"] == "Bronze")]
silver_ski_jumping = ski_jumping[(ski_jumping["Medal"] == "Silver")]
gold_ski_jumping = ski_jumping[(ski_jumping["Medal"] == "Gold")]

bronze_ski_jumping = (
    bronze_ski_jumping.groupby(["Team"])["Medal"]
    .count()
    .reset_index(name="Bronze") # new name for medal column
    .sort_values(["Bronze"], ascending=False)
)

silver_ski_jumping = (
    silver_ski_jumping.groupby(["Team"])["Medal"]
    .count()
    .reset_index(name="Silver") # new name for medal column
    .sort_values(["Silver"], ascending=False)
)

gold_ski_jumping = (
    gold_ski_jumping.groupby(["Team"])["Medal"]
    .count()
    .reset_index(name="Gold") # new name for medal column
    .sort_values(["Gold"], ascending=False)
)

medal_total = [bronze_ski_jumping, silver_ski_jumping, gold_ski_jumping]
df_final = ft.reduce(lambda left, right: pd.merge(left, right), medal_total)
df_final['Sum'] = df_final.sum(axis=1)

def plotly_bar_plot_with_labels_sublabels(
    x, y, title, labels, sublabels,
):

    fig = px.bar(
        x=x,
        y=y,
        barmode="group",  # groups the bars next to eachother instead of stacking on eachother
        labels=labels,
        title=title,
    )
    newnames = sublabels
    # To be able to change the sub titles for 'Antal doser' without changing the data source,
    # you can switch the legendgroups name with a dict and map it onto existing subtitle names.
    # I had to do this since I couldn't change it through 'labels=' like the other titles
    # source: https://stackoverflow.com/questions/64371174/plotly-how-to-change-variable-label-names-for-the-legend-in-a-plotly-express-li
    fig.for_each_trace(lambda t: t.update(name=newnames[t.name]))

    fig.show()


labels_skiing = {
    "value": "Medals won",
    "variable": "Medals",
    "x": "Teams",
}

sublabels_skiing = {
    "wide_variable_0": "Bronze",
    "wide_variable_1": "Silver",
    "wide_variable_2": "Gold",
}

plotly_bar_plot_with_labels_sublabels(
    [],
    [bronze_ski_jumping, silver_ski_jumping, gold_ski_jumping],
    'Top 3 Countries With Most Medals won in Ski Jumping',
    labels_skiing,
    sublabels_skiing
)

In [49]:
import functools as ft
medal_total = [bronze_ski_jumping, silver_ski_jumping, gold_ski_jumping]
df_final = ft.reduce(lambda left, right: pd.merge(left, right), medal_total)
# df_final[['Bronze', 'Silver', 'Bronze']].sort_values(by=('Bronze'))
df_final['Sum'] = df_final.sum(axis=1)
df_final['Sum'].sort_values(ascending=False, inplace=True)



Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.



ValueError: This Series is a view of some other array, to sort in-place you must create a copy

Unnamed: 0,Team,Bronze,Silver,Gold,Sum
0,Norway,21,9,9,39
1,Austria,16,15,12,43
2,Czechoslovakia,7,2,1,10
3,Japan,6,8,6,20
4,Finland,4,14,16,34
5,East Germany,2,3,2,7
6,Germany,2,9,15,26
7,Poland,1,3,3,7
