## Question 1: **Answer**

In [1]:
import pandas as pd
import sqlite3

In [None]:
def add_data_to_database(input_data):
    try:
        df = pd.read_csv(input_data)
        engine = sqlite3.connect("soccer_data.db")
        df.to_sql('soccer', con=engine, index = False)
        print("Table added successfully")
    except Exception as e:
            print(f"Error: {e}")

# Loading data
data = "/content/soccer_stats.csv"
add_data_to_database(data)

Table added successfully


In [3]:
# Loading SQL extension
%load_ext sql

In [4]:
# Connecting to the database
%sql sqlite:///soccer_data.db

In [5]:
# Default to duplicated style
%config SqlMagic.style = '_DEPRECATED_DEFAULT'

In [6]:
%%sql
SELECT * FROM soccer
LIMIT 5;

 * sqlite:///soccer_data.db
Done.


player_name,position,nationality,club,league,goals_scored,assists,shots_on_target,minutes_played,clean_sheets,yellow_cards,red_cards
Lionel Messi,Forward,Argentina,Paris Saint-Germain,Ligue 1,40,24,46,3420,0,10,0
Robert Lewandowski,Forward,Poland,Bayern Munich,Bundesliga,50,10,66,3240,0,7,0
Kylian Mbappé,Forward,France,Paris Saint-Germain,Ligue 1,28,17,41,2790,0,9,0
Kevin De Bruyne,Midfielder,Belgium,Manchester City,Premier League,15,14,30,2880,0,4,0
Mohamed Salah,Forward,Egypt,Liverpool,Premier League,32,14,42,3060,0,3,0


In [7]:
%%sql
SELECT
    COUNT(DISTINCT(nationality)) AS
    number_of_nationalities
FROM soccer;

 * sqlite:///soccer_data.db
Done.


number_of_nationalities
15


## Question 2: **Answer**

In [8]:
%%sql
-- Ranking the top scorers within each league
WITH top_scorers AS (
    SELECT
        player_name,
        league,
        goals_scored,
        RANK() OVER (PARTITION BY league ORDER BY goals_scored DESC)
        AS rank
    FROM soccer
    WHERE league IN ('Bundesliga', 'Ligue 1', 'Premier League')
)
-- Selecting top ranked players per league
SELECT
    league,
    player_name,
    goals_scored
FROM top_scorers
WHERE rank = 1
ORDER BY goals_scored DESC;

 * sqlite:///soccer_data.db
Done.


league,player_name,goals_scored
Bundesliga,Robert Lewandowski,50
Ligue 1,Lionel Messi,40
Premier League,Mohamed Salah,32


## Question 3: **Answer**

In [9]:
%%sql
-- CTE to rank players based on goals scored
WITH ranked_players AS (
    SELECT
        league,
        player_name,
        goals_scored,
        RANK() OVER (ORDER BY goals_scored DESC) AS rank
    FROM soccer
),
-- CTE to filter the top 5 players
top_5_players AS (
    SELECT
        league,
        player_name,
        goals_scored
    FROM ranked_players
    WHERE rank <= 5
),
-- CTE to calculate league-specific statistics for top 5 players
league_statistics AS (
    SELECT
        league,
        SUM(goals_scored) AS total_goals_per_league,
        COUNT(*) AS player_count_in_top_5
    FROM top_5_players
    GROUP BY league
),
-- CTE to find the maximum player count among leagues
max_player_count AS (
    SELECT
        MAX(player_count_in_top_5) AS max_count
    FROM league_statistics
)
/*
Main query to return the league(s) with the most players
in the top 5 and their goal percentage
*/

SELECT
    league,
    total_goals_per_league,
     ROUND((total_goals_per_league /
       (SELECT CAST(SUM(goals_scored) AS REAL) FROM top_5_players)) * 100, 2)
       AS percentage_of_total_goals
FROM league_statistics
WHERE player_count_in_top_5 = (SELECT max_count FROM max_player_count);

 * sqlite:///soccer_data.db
Done.


league,total_goals_per_league,percentage_of_total_goals
Bundesliga,83,43.92
Ligue 1,74,39.15


## Question 4: **Answer**

In [10]:
%%sql
SELECT
    player_name,
    position,
    clean_sheets,
    CASE
        WHEN 'Premier League' IN (SELECT league FROM soccer) THEN
        'Defender in the Premiership' ELSE 'Defender not in the Premiership'
    END AS defender_league
FROM soccer
WHERE position = 'Defender'
      AND clean_sheets = (SELECT MAX(clean_sheets) FROM soccer);

 * sqlite:///soccer_data.db
Done.


player_name,position,clean_sheets,defender_league
Virgil van Dijk,Defender,10,Defender in the Premiership
João Cancelo,Defender,10,Defender in the Premiership


## Question 5: **Answer**

In [11]:
%%sql
--CTE to calculate efficiency and assign ranks
WITH player_efficiency_and_rank AS (
    SELECT
        player_name,
        goals_scored,
        shots_on_target,
        CAST(goals_scored AS REAL) / shots_on_target * 100
        AS goal_conversion_percentage,
        RANK() OVER (ORDER BY CAST(goals_scored AS REAL) / shots_on_target * 100 DESC)
        AS highest_rank,
        RANK() OVER (ORDER BY CAST(goals_scored AS REAL) / shots_on_target * 100 ASC)
        AS lowest_rank
    FROM soccer
    WHERE shots_on_target > 0
)
-- Returning player name and conversion rates
SELECT
    player_name,
    printf('%20s', ROUND(goal_conversion_percentage, 2)) AS conversion_percentage
FROM player_efficiency_and_rank
WHERE highest_rank = 1 OR lowest_rank = 1;

 * sqlite:///soccer_data.db
Done.


player_name,conversion_percentage
Lionel Messi,86.96
Thiago Alcântara,0.0


## Question 6: **Answer**

In [12]:
%%sql
-- CTE to retrive two high scores from La Liga
WITH top_two_la_liga AS (
    SELECT
        league,
        SUM(goals_scored) AS total_goals
    FROM (
        SELECT
            league,
            goals_scored
        FROM soccer
        WHERE league = 'La Liga'
        ORDER BY goals_scored DESC
        LIMIT 2
    ) subquery_la_liga
),
-- CTE to retrive two high scores from Premier League
top_two_premier_league AS (
    SELECT
        league,
        SUM(goals_scored) AS total_goals
    FROM (
        SELECT
            league,
            goals_scored
        FROM soccer
        WHERE league = 'Premier League'
        ORDER BY goals_scored DESC
        LIMIT 2
    ) subquery_premier_league
)
-- Main query to combine the results of two CTEs.
SELECT
    'La Liga' AS spanish_league,
    (SELECT total_goals FROM top_two_la_liga) AS total_goals_la_liga,
    'Premier League' AS english_league,
    (SELECT total_goals FROM top_two_premier_league) AS total_goals_premier_league,
    (SELECT total_goals FROM top_two_la_liga) -
    (SELECT total_goals FROM top_two_premier_league) AS goal_difference;

 * sqlite:///soccer_data.db
Done.


spanish_league,total_goals_la_liga,english_league,total_goals_premier_league,goal_difference
La Liga,51,Premier League,60,-9


In [13]:
# %%sql
# DROP TABLE soccer;