## Question 1: **Answer**

In [1]:
import pandas as pd
import sqlite3

In [None]:
def add_data_to_database(input_data):
    try:
        df = pd.read_csv(input_data)
        engine = sqlite3.connect("runners_stats_data.db")
        df.to_sql('runners',con=engine, index = False)
    except Exception as e:
            print(f"Error: {e}")

data= "/content/long_distance_runners.csv"
add_data_to_database(data)

In [3]:
# Loading SQL extension
%load_ext sql

In [4]:
# Connecting to the database
%sql sqlite:///runners_stats_data.db

In [5]:
# Default to duplicated style
%config SqlMagic.style = '_DEPRECATED_DEFAULT'

#### 1. View the first 5 rows

In [6]:
%%sql
SELECT * FROM runners LIMIT 5;

 * sqlite:///runners_stats_data.db
Done.


name,nationality,distance,best_time,year,championships,world_records,olympic_gold_medals,olympic_silver_medals,olympic_bronze_medals
Eliud Kipchoge,Kenya,Marathon,02:01:39,2018,6,4,3,0,1
Brigid Kosgei,Kenya,Marathon,02:14:04,2019,2,1,1,0,0
Paula Radcliffe,UK,Marathon,02:15:25,2003,3,3,0,0,0
Haile Gebrselassie,Ethiopia,Marathon,02:03:59,2008,10,4,3,1,1
Kenenisa Bekele,Ethiopia,Marathon,02:01:41,2016,4,3,2,1,0


## Question 2: **Answer**

In [7]:
%%sql
-- Ranking runners based on medals achievement
WITH ranked_runners AS (
    SELECT
        name,
        nationality,
        world_records,
        (olympic_gold_medals + world_records) AS total_medals,
        DENSE_RANK() OVER (ORDER BY (world_records +
                                     olympic_gold_medals) DESC) AS rank
    FROM runners
)
-- Main query identifying top three ranked runners
SELECT
    name,
    nationality,
    total_medals,
    rank
FROM ranked_runners
WHERE rank <= 3;

 * sqlite:///runners_stats_data.db
Done.


name,nationality,total_medals,rank
Mary Keitany,Kenya,8,1
Vivian Cheruiyot,Kenya,8,1
Tirunesh Dibaba,Ethiopia,8,1
Eliud Kipchoge,Kenya,7,2
Haile Gebrselassie,Ethiopia,7,2
Haile Gebrselassie,Ethiopia,7,2
Joshua Cheptegei,Uganda,6,3
Kenenisa Bekele,Ethiopia,6,3


## Question 3: **Answer**

In [8]:
%%sql
-- Filter marathon runners
WITH successful_runners AS (
    SELECT
        nationality,
        (championships + olympic_gold_medals + olympic_silver_medals)
        AS total_medals
    FROM runners
    WHERE championships > 0 AND olympic_gold_medals > 0 AND olympic_silver_medals > 0
    AND distance = 'Marathon'
),
--Aggregate and group by nationality
totals_per_nation AS (
    SELECT
        nationality,
        SUM(total_medals) AS total_medals,
        COUNT(*) AS count_of_winners
    FROM successful_runners
    GROUP BY nationality
),
-- Identify maximum count of eligible runners
max_value AS (
    SELECT
        MAX(count_of_winners) AS max_winners_count
    FROM totals_per_nation
)
-- Main query to retrive nation with max count
SELECT
    nationality,
    count_of_winners,
    total_medals
FROM totals_per_nation
WHERE count_of_winners = (SELECT max_winners_count FROM max_value)
ORDER BY total_medals DESC;

 * sqlite:///runners_stats_data.db
Done.


nationality,count_of_winners,total_medals
Kenya,5,31


## Question 4: **Answer**

In [9]:
%%sql
-- Using INNER JOINs to find the fastest runners in 5000m and Marathon
SELECT
    r.distance,
    r.name,
    r.best_time
FROM runners r
JOIN (
    SELECT distance, MIN(best_time) AS best_time
    FROM runners
    WHERE distance IN ('5000m', 'Marathon')
    GROUP BY distance
) fastest_times
ON r.distance = fastest_times.distance AND r.best_time = fastest_times.best_time;

 * sqlite:///runners_stats_data.db
Done.


distance,name,best_time
Marathon,Eliud Kipchoge,02:01:39
5000m,Sifan Hassan,00:10:10


## Question 5: **Answer**

In [10]:
%%sql
-- Ranking of marathon runners for each year using ROW_NUMBER
CREATE VIEW IF NOT EXISTS ranked_marathon_runners AS
SELECT
    year,
    name AS runner_name,
    best_time AS finish_time,
    ROW_NUMBER() OVER (PARTITION BY year ORDER BY best_time) AS rank
FROM runners
WHERE distance = 'Marathon';

-- Returning best runner in each year
SELECT
    year,
    runner_name,
    finish_time
FROM ranked_marathon_runners
WHERE rank = 1
ORDER BY finish_time;

 * sqlite:///runners_stats_data.db
Done.
Done.


year,runner_name,finish_time
2018,Eliud Kipchoge,02:01:39
2016,Kenenisa Bekele,02:01:41
2014,Dennis Kipruto Kimetto,02:02:57
2011,Geoffrey Mutai,02:03:02
2013,Wilson Kiprop,02:03:03
2008,Haile Gebrselassie,02:03:59
2012,Abel Kirui,02:05:04
2019,Brigid Kosgei,02:14:04
2003,Paula Radcliffe,02:15:25
2017,Mary Keitany,02:17:01


## Question 6 **Answer**

In [11]:
%%sql
SELECT
    STRFTIME('%H:%M:%S', AVG(
        CASE
            WHEN year <= 2013
            THEN
                CAST(SUBSTR(finish_time, 1, 2) AS INTEGER) * 3600 +
                CAST(SUBSTR(finish_time, 4, 2) AS INTEGER) * 60 +
                CAST(SUBSTR(finish_time, 7, 2) AS INTEGER)
            ELSE NULL
        END), 'unixepoch') AS best_average_time_to_2013,
    STRFTIME('%H:%M:%S', AVG(
        CASE
            WHEN year >= 2014
            THEN
                CAST(SUBSTR(finish_time, 1, 2) AS INTEGER) * 3600 +
                CAST(SUBSTR(finish_time, 4, 2) AS INTEGER) * 60 +
                CAST(SUBSTR(finish_time, 7, 2) AS INTEGER)
            ELSE NULL
        END), 'unixepoch') AS best_average_time_from_2014
FROM ranked_marathon_runners
WHERE rank = 1;

 * sqlite:///runners_stats_data.db
Done.


best_average_time_to_2013,best_average_time_from_2014
02:06:06,02:07:28
