# Sports Statistics

Below you'll find statistics for the following sports:
- Ice Hockey
- Athletics
- Shooting
- Cross Country Skiing

(The four sports that Sweden got the most medals in)

In [1]:
import pandas as pd
import plotly_express as px

In [2]:
# Reading the athlete_events.csv file
athlete_events = pd.read_csv("data/athlete_events.csv")

In [3]:
# NOTE: Create a new DataFrame instance named df_icehockey using pandas.
# Filter the rows from the existing DataFrame athlete_events based on the condition 'Sport == "XX"'.
# The resulting DataFrame, df_icehockey, contains data specific to athletes and events in the sport "XX".
df_icehockey = pd.DataFrame(athlete_events.query('Sport == "Ice Hockey"'))
df_athletics = pd.DataFrame(athlete_events.query('Sport == "Athletics"'))
df_shooting = pd.DataFrame(athlete_events.query('Sport == "Shooting"'))
df_cross_county_skiing = pd.DataFrame(athlete_events.query('Sport == "Cross Country Skiing"'))

selected_sports = ['Ice Hockey', 'Athletics', 'Shooting', 'Cross Country Skiing']
df_all_selected_sports = pd.DataFrame(athlete_events.query('Sport in @selected_sports'))

# NOTE: Displaying the length of the df:s using display method
display(f"{len(df_icehockey)=}")
display(f"{len(df_athletics)=}")
display(f"{len(df_shooting)=}")
display(f"{len(df_cross_county_skiing)=}")
display(f"{len(df_all_selected_sports)=}")

'len(df_icehockey)=5516'

'len(df_athletics)=38624'

'len(df_shooting)=11448'

'len(df_cross_county_skiing)=9133'

'len(df_all_selected_sports)=64721'

In [4]:
df_all_selected_sports.head()

Unnamed: 0,ID,Name,Sex,Age,Height,Weight,Team,NOC,Games,Year,Season,City,Sport,Event,Medal
10,6,Per Knut Aaland,M,31.0,188.0,75.0,United States,USA,1992 Winter,1992,Winter,Albertville,Cross Country Skiing,Cross Country Skiing Men's 10 kilometres,
11,6,Per Knut Aaland,M,31.0,188.0,75.0,United States,USA,1992 Winter,1992,Winter,Albertville,Cross Country Skiing,Cross Country Skiing Men's 50 kilometres,
12,6,Per Knut Aaland,M,31.0,188.0,75.0,United States,USA,1992 Winter,1992,Winter,Albertville,Cross Country Skiing,Cross Country Skiing Men's 10/15 kilometres Pu...,
13,6,Per Knut Aaland,M,31.0,188.0,75.0,United States,USA,1992 Winter,1992,Winter,Albertville,Cross Country Skiing,Cross Country Skiing Men's 4 x 10 kilometres R...,
14,6,Per Knut Aaland,M,33.0,188.0,75.0,United States,USA,1994 Winter,1994,Winter,Lillehammer,Cross Country Skiing,Cross Country Skiing Men's 10 kilometres,


#### Medaljfördelning mellan länder i sporterna

In [5]:
medal_distribution = (
    df_all_selected_sports.groupby(['NOC', 'Sport', 'Medal'])
    .size()
    .reset_index(name="Number of Medals")
)

# Plotting a histogram of ages of athletes from SWE in all olympic games around the mean age
fig = px.histogram(
    medal_distribution, 
    x="NOC",
    y="Number of Medals",
    log_y=True,
    color='Sport',
    title="Medal Distribution in All Olympic Games",
    barmode='group',
)

# Making spaces between bars
fig.update_layout(bargap=0.01)

fig.show()

In [15]:
px.sunburst(medal_distribution, values='Number of Medals', path=['Sport', 'NOC'])

#### Åldersfördelning i sporterna

In [6]:
mean_age_all = round(df_all_selected_sports["Age"].mean(), 1)

age_distribution = (
    df_all_selected_sports.groupby(['Sport', 'Age'])
    .size()
    .reset_index(name="Number of Athletes")
)

# Plotting a histogram of ages of athletes from SWE in all olympic games around the mean age
fig = px.histogram(
    age_distribution, 
    x="Age",
    y="Number of Athletes",
    log_y=True,
    color='Sport',
    title="Age distribution in All Olympic Games",
    range_x=[mean_age_all-20, mean_age_all+20],
    barmode='group',
    nbins=80
)

# Mark the mean age in the x-axis
fig.add_vline(x=mean_age_all, line_width=3, line_dash="dash", line_color="green", annotation_text="Mean age all sports")

# Making spaces between bars
fig.update_layout(bargap=0.01)

fig.show()

In [13]:
px.sunburst(age_distribution, values='Number of Athletes', path=['Sport', 'Age'])

#### Skapa fler plots för att visualisera olika aspekter kring sporterna.