# Import data

In [1]:
import pandas as pd
from sqlalchemy import create_engine, text, inspect

In [2]:
engine = create_engine('sqlite:///olympics.db')

# Exercise 1: Detailed Medal Analysis

In [None]:
'''
Task 1: Identify competitors who have won at least one medal in events spanning 
both Summer and Winter Olympics. Create a temporary table to store these competitors
 and their medal counts for each season, and then display the contents of this table.
'''

# Drop the table if it exists
with engine.connect() as conn:
    conn.execute(text('DROP TABLE IF EXISTS medals_both_seasons'))

# Execute the SQL statement to create the temporary table
with engine.connect() as conn:
    conn.execute(text('''
        CREATE TEMPORARY TABLE medals_both_seasons AS
        SELECT 
            gc.person_id, 
            SUM(CASE WHEN g.season = 'Summer' THEN 1 ELSE 0 END) AS summer_medals,
            SUM(CASE WHEN g.season = 'Winter' THEN 1 ELSE 0 END) AS winter_medals
        FROM competitor_event ce
        JOIN games_competitor gc ON ce.competitor_id = gc.id
        JOIN games g ON gc.games_id = g.id
        WHERE ce.medal_id IS NOT NULL
        GROUP BY gc.person_id
        HAVING summer_medals > 0 AND winter_medals > 0;
    '''))

# Retrieve the data from the temporary table
df_medals_both_seasons = pd.read_sql('SELECT * FROM medals_both_seasons', engine)

df_medals_both_seasons

Unnamed: 0,person_id,summer_medals,winter_medals
0,770,3,1
1,4660,3,1
2,5429,1,1
3,5605,2,1
4,7978,1,2
...,...,...,...
153,129491,2,4
154,130148,1,5
155,130626,5,1
156,131183,1,9


In [7]:
'''
Task 2: Create a temporary table to store competitors who have won medals in exactly
 two different sports, and then use a subquery to identify the top 3 competitors with 
 the highest total number of medals across all sports. Display the contents of this table.
'''

# Drop the table if it exists
with engine.connect() as conn:
    conn.execute(text('DROP TABLE IF EXISTS medals_two_sports'))

# Execute the SQL statement to create the temporary table
with engine.connect() as conn:
    conn.execute(text("""
    CREATE TEMPORARY TABLE medals_two_sports AS
    SELECT 
        gc.person_id,
        COUNT(DISTINCT s.id) AS different_sports,
        COUNT(ce.medal_id) AS total_medals
    FROM competitor_event ce
    JOIN games_competitor gc ON ce.competitor_id = gc.id
    JOIN event e ON ce.event_id = e.id
    JOIN sport s ON e.sport_id = s.id
    WHERE ce.medal_id IS NOT NULL
    GROUP BY gc.person_id
    HAVING different_sports = 2;
"""))

# Retrieve the data from the temporary table
df_top_3_competitors  = pd.read_sql('''
        SELECT person_id, total_medals
            FROM medals_two_sports
            ORDER BY total_medals DESC
            LIMIT 3;
''', engine)

df_top_3_competitors 

Unnamed: 0,person_id,total_medals
0,44875,32
1,74532,28
2,11951,27


# Exercise 2: Region and Competitor Performance

In [8]:
'''
Task 1: Retrieve the regions that have competitors who have won the highest number of 
medals in a single Olympic event. Use a subquery to determine the event 
with the highest number of medals for each competitor, and then display the top 5 regions with the highest total medals.
'''

df_top_5_regions = pd.read_sql('''
    WITH competitor_max_event_medals AS (
        SELECT 
            gc.person_id,
            e.id AS event_id,
            COUNT(ce.medal_id) AS event_medals
        FROM competitor_event ce
        JOIN games_competitor gc ON ce.competitor_id = gc.id
        JOIN event e ON ce.event_id = e.id
        WHERE ce.medal_id IS NOT NULL
        GROUP BY gc.person_id, e.id
    ),
    top_event_medals_per_competitor AS (
        SELECT person_id, MAX(event_medals) AS max_medals
        FROM competitor_max_event_medals
        GROUP BY person_id
    )
    SELECT nr.region_name, SUM(tempc.max_medals) AS total_medals
    FROM top_event_medals_per_competitor tempc
    JOIN person_region pr ON tempc.person_id = pr.person_id
    JOIN noc_region nr ON pr.region_id = nr.id
    GROUP BY nr.region_name
    ORDER BY total_medals DESC
    LIMIT 5;
''', engine)

df_top_5_regions

Unnamed: 0,region_name,total_medals
0,USA,12166
1,UK,7669
2,France,7126
3,Germany,6925
4,Italy,6700


In [11]:
'''
Task 2: Create a temporary table to store competitors who have participated 
in more than three Olympic Games but have not won any medals. 
Retrieve and display the contents of this table, including their full names and the number of games they participated in.
'''

# Drop the table if it exists
with engine.connect() as conn:
    conn.execute(text('DROP TABLE IF EXISTS no_medals_three_games'))

# Execute the SQL statement to create the temporary table
with engine.connect() as conn:
    conn.execute(text("""
    CREATE TEMPORARY TABLE no_medals_three_games AS
    SELECT 
        gc.person_id, 
        COUNT(DISTINCT gc.games_id) AS games_participated
    FROM games_competitor gc
    LEFT JOIN competitor_event ce ON gc.id = ce.competitor_id AND ce.medal_id IS NOT NULL
    WHERE ce.medal_id IS NULL
    GROUP BY gc.person_id
    HAVING games_participated > 3;
"""))

# Retrieve the data from the temporary table
df_no_medals_three_games  = pd.read_sql("""
    SELECT 
        nmtg.person_id, 
        p.full_name, 
        nmtg.games_participated
    FROM no_medals_three_games nmtg
    JOIN person p ON nmtg.person_id = p.id;
""", engine)

df_no_medals_three_games

Unnamed: 0,person_id,full_name,games_participated
