In [42]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import ipywidgets as widgets
from IPython.display import display
import plotly.subplots as sp
import numpy as np

In [43]:
# Import data, change column header from Sex to Gender
df = pd.read_csv('../../Resources/Cleaning2/merged_events_bio.csv')
df = df.rename(columns={'Sex': 'Gender'})
df.head(5)

Unnamed: 0,Athlete_ID,Name,Age,Gender,Height(cm),Weight(kg),Sport,Country_Name,Country_Code,Year,Season,isTeamSport,Medal,Result_ID
0,16809,Károly Teppert,21.0,M,,,Cycling Road,Hungary,HUN,1912,Summer,False,na,153233
1,16809,Károly Teppert,21.0,M,,,Cycling Road,Hungary,HUN,1912,Summer,True,na,153234
2,43737,Andrzej Socharski,29.0,M,173.0,72.0,Shooting,Poland,POL,1976,Summer,False,na,51745
3,50147,Nathalie Wunderlich,21.0,F,170.0,50.0,Swimming,Switzerland,SUI,1992,Summer,False,na,7860
4,50147,Nathalie Wunderlich,21.0,F,170.0,50.0,Swimming,Switzerland,SUI,1992,Summer,False,na,7870


In [44]:
# Define the medal colors
color_scale = {'Gold': 'gold', 'Silver': 'silver', 'Bronze': 'brown'}

In [45]:
# Define the function to filter the data 
def filter_data(df, gender, sport=None, country=None):
    filtered_df = df.copy()
    if gender != 'Both':
        filtered_df = filtered_df[filtered_df['Gender'] == gender]
    if sport != 'ALL':
        filtered_df = filtered_df[filtered_df['Sport'] == sport]
    if country != 'ALL':
        filtered_df = filtered_df[filtered_df['Country_Name'] == country]

   
    # filter out rows where no medal was received
    #filtered_df = filtered_df[~(filtered_df['Medal'] == 'na')]
        
    return filtered_df

In [103]:
# Define a function to update the scatter plot based on the dropdown menus
def update_scatter_plot(gender, sport, country):
    # Filter the data 
    filtered_df = filter_data(df, gender, sport, country)
    
    # Create the scatter plot
    fig = px.scatter(filtered_df, x='Weight(kg)', y='Height(cm)', color='Medal', symbol='Gender', color_discrete_map=color_scale,
                     symbol_map={'M': 'square', 'F': 'circle'}, title=f'Olympic Medalists Height & Weight')
    
    # Set the axis labels
    fig.update_layout(xaxis_title='Weight (kg)', yaxis_title='Height (cm)')
    
    # Show the plot
    fig.show()

In [34]:
# Define the function to update the medal bar chart
def update_bar_chart(gender, sport, country):
    filtered_df = filter_data(df, gender, sport, country)
    filtered_df = filtered_df[filtered_df['Age'].notnull()] # Filter out rows where Age is null
    filtered_df = filtered_df[~(filtered_df['Medal'] == 'na')]  # Filter out rows where no medal was received
    filtered_df['Age'] = filtered_df['Age'].astype(int) # Convert Age column to integer
    medal_counts = pd.pivot_table(filtered_df, index=['Age'], columns=['Medal'], values='Name', aggfunc='count', fill_value=0)

    # Add a new column to medal_counts with the total count for each age
    medal_counts['Total'] = medal_counts['Gold'] + medal_counts['Silver'] + medal_counts['Bronze']

    age_order = sorted(filtered_df['Age'].unique())
    fig = px.bar(medal_counts, x=medal_counts.index, y=['Gold', 'Silver', 'Bronze'], barmode='stack',
                 color_discrete_map={'Gold': 'gold', 'Silver': 'silver', 'Bronze': 'brown'},
                 labels={'value': 'Medal Count', 'variable': 'Medal'},
                 category_orders={'x': age_order},
                 hover_data={'Total': True}) # Add the total count to the hover label

    fig.update_layout(title='Medal Count by Age', xaxis_title='Age', yaxis_title='Medal Count', legend_title='Medal')
    
    # Update x-axis ticks based on number of age groups
    num_age_groups = len(age_order)
    if num_age_groups <= 20:
        fig.update_xaxes(tickmode='array', tickvals=medal_counts.index, ticktext=medal_counts.index)
    elif num_age_groups > 20 and num_age_groups <= 50:
        fig.update_xaxes(tickmode='linear', dtick=5)
    else:
        fig.update_xaxes(tickmode='linear', dtick=10)      
    
    fig.show()

In [35]:
# Define dropdown menus, alphabetize sports and countries
gender_dropdown = widgets.Dropdown(
    options=['Both', 'M', 'F'],
    value='Both',
    description='Gender:')

sports = ["ALL"] + sorted(df['Sport'].unique().tolist())
sports.sort()
sport_dropdown = widgets.Dropdown(options=sports, value="ALL", description="Sport:")

countries = ["ALL"] + sorted(df['Country_Name'].unique().tolist())
countries.sort()
country_dropdown = widgets.Dropdown(options=countries, value="ALL", description='Country:')

In [36]:
# Define the callback functions
def on_gender_change(change):
    update_scatter_plot(change, sport_dropdown.value, country_dropdown.value)
    
def on_sport_change(change):
    update_scatter_plot(gender_dropdown.value, change, country_dropdown.value)
    
def on_country_change(change):
    update_scatter_plot(gender_dropdown.value, sport_dropdown.value, change)

In [37]:
# Register the callback functions
gender_dropdown.observe(on_gender_change, names='value')
sport_dropdown.observe(on_sport_change, names='value')
country_dropdown.observe(on_country_change, names='value')

In [38]:
# Define the function to reset the dropdown menus
#def reset_dropdowns():
#    gender_dropdown.value = 'Both'
#    sport_dropdown.value = 'ALL'
#    country_dropdown.value = 'ALL'

In [39]:
# Display the dropdown menus
display(gender_dropdown)
display(sport_dropdown)
display(country_dropdown)

Dropdown(description='Gender:', options=('Both', 'M', 'F'), value='Both')

Dropdown(description='Sport:', index=1, options=('3x3 Basketball', 'ALL', 'Aeronautics', 'Alpine Skiing', 'Alp…

Dropdown(description='Country:', index=697, options=(' Afghanistan', ' Albania', ' Albania  Australia', ' Alba…

In [97]:
filtered_df = df[(df['Gender'] == gender) & (df['Sport'] == sport) & (df['Country_Name'] == country)]

In [120]:
fig = go.Figure()
gender = 'both'
sport = 'All'
country = 'All'
def filter_data(df, gender, sport=None, country=None):
    filtered_df = df.copy()
    if gender != 'Both':
        filtered_df = filtered_df[filtered_df['Gender'] == gender]
    if sport != 'ALL':
        filtered_df = filtered_df[filtered_df['Sport'] == sport]
    if country != 'ALL':
        filtered_df = filtered_df[filtered_df['Country_Name'] == country]

    return filtered_df

filtered_df = df[(df['Gender'] == gender) & (df['Sport'] == sport) & (df['Country_Name'] == country)]

fig = go.Scatter(filtered_df, x='Weight(kg)', y='Height(cm)', color='Medal', symbol='Gender', color_discrete_map=color_scale)
    
# Set the axis labels
fig.update_layout(xaxis_title='Weight (kg)', yaxis_title='Height (cm)')
    
# Show the plot
fig.show()
dropdown_buttons = []

for option in options:
    filtered_df = filter_data(df, gender, sport=None, country=None)
    dropdown_buttons.append(dict(
    method = 'update',
    args=[{
        'x': [filtered_df['Weight(kg)']],
        'y': [filtered_df['Height(cm)']]
    }]))
        
fig.update_layout(
    updatemenus=[dict(
        buttons=dropdown_buttons,
        direction="down",
        showactive=True,
        x=0.1,
        xanchor="left",
        y=1.1,
        yanchor="top"
    )]
)
# Add annotation
fig.update_layout(
    annotations=[
        dict(text="Select Year:", showarrow=False,
        x=0, y=1.085, yref="paper", align="left")
    ]
)
fig.show()

ValueError: The first argument to the plotly.graph_objs.Scatter
constructor must be a dict or
an instance of :class:`plotly.graph_objs.Scatter`

In [104]:
filtered_df = filter_data(df, gender, sport, country)
    
# Create the scatter plot
fig = px.scatter(filtered_df, x='Weight(kg)', y='Height(cm)', color='Medal', symbol='Gender', color_discrete_map=color_scale,
                     symbol_map={'M': 'square', 'F': 'circle'}, title=f'Olympic Medalists Height & Weight')
    
# Set the axis labels
fig.update_layout(xaxis_title='Weight (kg)', yaxis_title='Height (cm)')
    
# Show the plot
fig.show()

In [102]:
filtered_df = df[(df['Gender'] == gender) & (df['Sport'] == sport) & (df['Country_Name'] == country)]

fig.add_trace(go.Scatter(
    x=filtered_df['Weight(kg)'],
    y=filtered_df['Height(cm)'],
    mode='markers',
    symbol=gender)

SyntaxError: unexpected EOF while parsing (1681805999.py, line 7)

In [92]:
import plotly.graph_objects as go
import plotly.subplots as sp

# Set default values for the filters
gender = 'Both'
sport = 'All'
country = 'All'

def filter_data(df, gender, sport, country):
    filtered_data = df[(df['Gender'] == gender) & (df['Sport'] == sport) & (df['Country_Name'] == country)]
    return filtered_data

fig = sp.make_subplots(rows=1, cols=2)

filtered_data = filter_data(df, gender, sport, country)

fig.add_trace(
    go.Scatter(
        x=filtered_data['Weight(kg)'],
        y=filtered_data['Height(cm)'],
        symbol=gender)
        ),
    row=1, col=1
)

def generate_dropdown_options(df):
    genders = df['Gender'].unique().tolist()
    sports = df['Sport'].unique().tolist()
    countries = df['Country_Name'].unique().tolist()
    options = [
        dict(
            label=f'Gender: {gender}, Sport: {sport}, Country: {country}',
            method='update',
            args=[{
                'x': [filter_data(df, gender, sport, country)['Weight(kg)']],
                'y': [filter_data(df, gender, sport, country)['Height(cm)']]
            }]
        )
        for gender in genders for sport in sports for country in countries
    ]
    options.insert(0, dict(
        label='All',
        method='update',
        args=[{
            'x': [df['Weight(kg)']],
            'y': [df['Height(cm)']]
        }]
    ))
    return options

dropdown_buttons = generate_dropdown_options(df)

fig.update_layout(
    updatemenus=[dict(
        buttons=dropdown_buttons,
        direction='down',
        showactive=True,
        x=0.1,
        xanchor='left',
        y=1.1,
        yanchor='top'
    )]
)

fig.update_xaxes(title_text='Weight (kg)', row=1, col=1)
fig.update_yaxes(title_text='Height (cm)', row=1, col=1)
fig.update_layout(title='Olympic Medalists Height & Weight')

fig.show()

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 40)

In [126]:
import plotly.express as px
import pandas as pd

# create sample dataframe
df = pd.DataFrame({
    'Athlete_ID': [16809, 16809, 43737, 50147, 50147],
    'Name': ['Károly Teppert', 'Károly Teppert', 'Andrzej Socharski', 'Nathalie Wunderlich', 'Nathalie Wunderlich'],
    'Age': [21.0, 21.0, 29.0, 21.0, 21.0],
    'Gender': ['M', 'M', 'M', 'F', 'F'],
    'Height(cm)': [0,0, 173.0, 170.0, 170.0],
    'Weight(kg)': [0, 0, 72.0, 50.0, 50.0],
    'Sport': ['Cycling Road', 'Cycling Road', 'Shooting', 'Swimming', 'Swimming'],
    'Country_Name': ['Hungary', 'Hungary', 'Poland', 'Switzerland', 'Switzerland'],
    'Country_Code': ['HUN', 'HUN', 'POL', 'SUI', 'SUI'],
    'Year': [1912, 1912, 1976, 1992, 1992],
    'Season': ['Summer', 'Summer', 'Summer', 'Summer', 'Summer'],
    'isTeamSport': [False, True, False, False, False],
    'Medal': ['na', 'na', 'na', 'na', 'na'],
    'Result_ID': [153233, 153234, 51745, 7860, 7870]
})

# create unique lists for dropdown options
gender_list = df['Gender'].unique().tolist()
country_list = df['Country_Name'].unique().tolist()
sport_list = df['Sport'].unique().tolist()

# create scatter plot with dropdown options
fig = px.scatter(df, x='Age', y='Height(cm)', color='Sport',
                 hover_data=['Name', 'Country_Name', 'Year'],
                 labels={'Age': 'Age (years)', 'Height(cm)': 'Height (cm)'})

# add dropdown menus for gender, country, and sport
fig.update_layout(
updatemenus=[
dict(
buttons=[
dict(
label='Gender',
method='update',
args=[{'visible': [True if gender == val else False for gender in df['Gender']],
'yaxis': {'title': 'Height (cm)'},
'xaxis': {'title': 'Age (years)'},
'legend_title': 'Sport',
'showlegend': True}]
),
dict(
label='Country',
method='update',
args=[{'visible': [True if country == val else False for country in df['Country_Name']],
'yaxis': {'title': 'Height (cm)'},
'xaxis': {'title': 'Age (years)'},
'legend_title': 'Sport',
'showlegend': True}]
),
dict(
label='Sport',
method='update',
args=[{'visible': [True if sport == val else False for sport in df['Sport']],
'yaxis': {'title': 'Height (cm)'},
'xaxis': {'title': 'Age (years)'},
'legend_title': 'Country',
'showlegend': True}]
),
],
direction='down',
showactive=True,
x=0.1,
y=1.2,
),
dict(
buttons=list([
dict(
args=["marker.color", df['Sport']],
label="Sport",
method="restyle"
),
dict(
args=["marker.color", df['Country_Name']],
label="Country",
method="restyle"
),
dict(
args=["marker.color", df['Gender']],
label="Gender",
method="restyle"
)
]),
direction="down",
pad={"r": 10, "t": 10},
showactive=True,
x=0.1,
xanchor="left",
y=1.1,
yanchor="top"
),
]
)
fig.show()

NameError: name 'val' is not defined