In [2]:
import pandas as pd
%pip install ace_tools

# Correct raw file link
file_path = 'https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-08-31/bird_baths.csv'

olympics_df = pd.read_csv(file_path)

# Display the columns of the DataFrame to inspect the structure
print(olympics_df.columns)

# Check if 'medal' column exists
if 'medal' in olympics_df.columns:
    # Filter rows with medals and clean up necessary columns
    medals_data = olympics_df[olympics_df['medal'].notna()]
    medals_data = medals_data[['year', 'noc', 'medal', 'season']]
else:
    raise KeyError("The 'medal' column does not exist in the DataFrame.")

# Initialize an empty DataFrame to store the results
medals_by_year = (
    medals_data
    .groupby(['noc', 'year', 'season', 'medal'])
    .size()
    .reset_index(name='count')
)

# Pivot table to simplify analysis
medals_summary = medals_by_year.pivot_table(
    index=['noc', 'year', 'season'], 
    columns='medal', 
    values='count', 
    aggfunc='sum',
    fill_value=0
).reset_index()

# Calculate total medals for each year, season, and NOC
medals_summary['total_medals'] = (
    medals_summary.get('Gold', 0) +
    medals_summary.get('Silver', 0) +
    medals_summary.get('Bronze', 0)
)

# Add missing columns if they do not exist
for medal in ['Gold', 'Silver', 'Bronze']:
    if medal not in medals_summary:
        medals_summary[medal] = 0

# Save the results for the user to explore
medals_summary.rename(columns={'Gold': 'gold_medals', 'Silver': 'silver_medals', 'Bronze': 'bronze_medals'}, inplace=True)
import ace_tools as tools
tools.display_dataframe_to_user(name="Olympic Medals Summary by Year", dataframe=medals_summary)


Defaulting to user installation because normal site-packages is not writeable
Collecting ace_tools
  Downloading ace_tools-0.0-py3-none-any.whl (1.1 kB)
Installing collected packages: ace-tools
Successfully installed ace-tools-0.0
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.
Index(['survey_year', 'urban_rural', 'bioregions', 'bird_type', 'bird_count'], dtype='object')


KeyError: "The 'medal' column does not exist in the DataFrame."