In [None]:
from dash import dash,html, dcc, Output, Input
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import numpy as np


noc_region_df = pd.read_csv('../Data/noc_regions.csv')
athlete_events_df = pd.read_csv('../Data/athlete_events.csv')

merged_df = pd.merge(athlete_events_df, noc_region_df, how='inner', on='NOC')

ice_hockey_df = merged_df[merged_df['Sport'] == 'Ice Hockey']
ice_hockey_df['Medal'] = ice_hockey_df['Medal'].replace('nan', np.nan)

# Age distribution in the sport
sns.histplot(ice_hockey_df['Age'], bins=20, kde=True)
plt.title('Age Distribution in Ice Hockey')
plt.xlabel('Age')
plt.ylabel('Count')
plt.show()

# Medal distribution between countries
medal_distribution = ice_hockey_df.groupby(['region', 'Medal']).size().unstack(fill_value=0)
medal_distribution.plot(kind='bar', stacked=True, title='Medal Distribution in Ice Hockey')
plt.xlabel('Country')
plt.ylabel('Number of Medals')
plt.show()

# Distribution of age to gender
sns.boxplot(x='Sex', y='Age', data=ice_hockey_df)
plt.title('Age Distribution in Ice Hockey by Gender')
plt.xlabel('Gender')
plt.ylabel('Age')
plt.show()

# Number of events in hockey
sns.countplot(x='Year', data=ice_hockey_df, palette='viridis')
plt.title('Number of Ice Hockey Events by Year')
plt.xlabel('Year')
plt.xticks(rotation=45)
plt.ylabel('Number of Events')
plt.show()

# Filtering for ice hockey events without NaN medals
ice_hockey_non_nan_df = ice_hockey_df.dropna(subset=['Medal'])

# Creating a sunburst chart
fig = px.sunburst(ice_hockey_non_nan_df, path=['Year', 'Medal', 'region'],
                  title='Medal Distribution in Ice Hockey by Year and Country',
                  color='Medal')
fig.show()