In [1]:
import pandas as pd

TBL_UEFA_2020 = pd.read_csv('uefa_2020.csv', sep=',', encoding='latin1')
TBL_UEFA_2021 = pd.read_csv('uefa_2021.csv', sep=',', encoding='latin1')
TBL_UEFA_2022 = pd.read_csv('uefa_2022.csv', sep=',', encoding='latin1')

In [4]:
%%sql
SELECT 
    CASE 
        WHEN TEAM_HOME_SCORE > TEAM_AWAY_SCORE THEN 'Home Team Win'
        WHEN TEAM_HOME_SCORE < TEAM_AWAY_SCORE THEN 'Away Team Win'
        ELSE 'Draw'
    END AS Match_Outcome,
    COUNT(*) AS Frequency_2020
FROM 
    TBL_UEFA_2020
GROUP BY 
    Match_Outcome;

Unnamed: 0,Match_Outcome,Frequency_2020
0,Away Team Win,50
1,Home Team Win,51
2,Draw,24


In [5]:
%%sql
SELECT 
    CASE 
        WHEN TEAM_HOME_SCORE > TEAM_AWAY_SCORE THEN 'Home Team Win'
        WHEN TEAM_HOME_SCORE < TEAM_AWAY_SCORE THEN 'Away Team Win'
        ELSE 'Draw'
    END AS Match_Outcome,
    COUNT(*) AS Frequency_2021
FROM 
    TBL_UEFA_2021
GROUP BY 
    Match_Outcome;

Unnamed: 0,Match_Outcome,Frequency_2021
0,Home Team Win,56
1,Draw,26
2,Away Team Win,43


In [6]:
%%sql
SELECT 
    CASE 
        WHEN TEAM_HOME_SCORE > TEAM_AWAY_SCORE THEN 'Home Team Win'
        WHEN TEAM_HOME_SCORE < TEAM_AWAY_SCORE THEN 'Away Team Win'
        ELSE 'Draw'
    END AS Match_Outcome,
    COUNT(*) AS Frequency_2022
FROM 
    TBL_UEFA_2022
GROUP BY 
    Match_Outcome;

Unnamed: 0,Match_Outcome,Frequency_2022
0,Away Team Win,38
1,Home Team Win,61
2,Draw,26


In [8]:
%%sql
-- Average ball possession for home and away teams across three years
SELECT 
    'Home' AS Team_Location,
    AVG(CAST(REPLACE(POSSESSION_HOME, '%', '') AS FLOAT)) AS Avg_Possession
FROM 
    TBL_UEFA_2020
UNION ALL
SELECT 
    'Away' AS Team_Location,
    AVG(CAST(REPLACE(POSSESSION_AWAY, '%', '') AS FLOAT)) AS Avg_Possession
FROM 
    TBL_UEFA_2020
UNION ALL
SELECT 
    'Home' AS Team_Location,
    AVG(CAST(REPLACE(POSSESSION_HOME, '%', '') AS FLOAT)) AS Avg_Possession
FROM 
    TBL_UEFA_2021
UNION ALL
SELECT 
    'Away' AS Team_Location,
    AVG(CAST(REPLACE(POSSESSION_AWAY, '%', '') AS FLOAT)) AS Avg_Possession
FROM 
    TBL_UEFA_2021
UNION ALL
SELECT 
    'Home' AS Team_Location,
    AVG(CAST(REPLACE(POSSESSION_HOME, '%', '') AS FLOAT)) AS Avg_Possession
FROM 
    TBL_UEFA_2022
UNION ALL
SELECT 
    'Away' AS Team_Location,
    AVG(CAST(REPLACE(POSSESSION_AWAY, '%', '') AS FLOAT)) AS Avg_Possession
FROM 
    TBL_UEFA_2022;

Unnamed: 0,Team_Location,Avg_Possession
0,Home,49.808
1,Away,50.192
2,Home,46.88
3,Away,53.12
4,Home,50.952
5,Away,48.968


###### For TBL_UEFA_2020:
 Home Team: Average possession is approximately 49.808% 
Away Team: Average possession is approximately 50.192%.

###### For TBL_UEFA_2021:
Home Team: Average possession is approximately 46.880%
Away Team: Average possession is approximately 53.120%

###### For TBL_UEFA_2022:
Home Team: Average possession is approximately 50.952%
Away Team: Average possession is approximately 48.968%

In [11]:
%%sql
-- Correlation for home teams
SELECT 
    'Home' AS Team_Location,
    CORR(CAST(REPLACE(POSSESSION_HOME, '%', '') AS FLOAT), TOTAL_SHOTS_HOME) AS Correlation
FROM 
    TBL_UEFA_2020
UNION ALL
SELECT 
    'Home' AS Team_Location,
    CORR(CAST(REPLACE(POSSESSION_HOME, '%', '') AS FLOAT), TOTAL_SHOTS_HOME) AS Correlation
FROM 
    TBL_UEFA_2021
UNION ALL
SELECT 
    'Home' AS Team_Location,
    CORR(CAST(REPLACE(POSSESSION_HOME, '%', '') AS FLOAT), TOTAL_SHOTS_HOME) AS Correlation
FROM 
    TBL_UEFA_2022
UNION ALL
-- Correlation for away teams
SELECT 
    'Away' AS Team_Location,
    CORR(CAST(REPLACE(POSSESSION_AWAY, '%', '') AS FLOAT), TOTAL_SHOTS_AWAY) AS Correlation
FROM 
    TBL_UEFA_2020
UNION ALL
SELECT 
    'Away' AS Team_Location,
    CORR(CAST(REPLACE(POSSESSION_AWAY, '%', '') AS FLOAT), TOTAL_SHOTS_AWAY) AS Correlation
FROM 
    TBL_UEFA_2021
UNION ALL
SELECT 
    'Away' AS Team_Location,
    CORR(CAST(REPLACE(POSSESSION_AWAY, '%', '') AS FLOAT), TOTAL_SHOTS_AWAY) AS Correlation
FROM 
    TBL_UEFA_2022;

Unnamed: 0,Team_Location,Correlation
0,Home,0.557674
1,Home,-0.001806
2,Home,0.530543
3,Away,0.515041
4,Away,0.517382
5,Away,0.471057


For Home Teams:

In TBL_UEFA_2020: The correlation coefficient between possession and the number of shots taken by home teams is approximately 0.5577, indicating a moderately positive correlation.
In TBL_UEFA_2021: The correlation coefficient is approximately -0.0018, indicating a very weak or negligible correlation.
In TBL_UEFA_2022: The correlation coefficient is approximately 0.5305, indicating a moderately positive correlation.
For Away Teams:

In TBL_UEFA_2020: The correlation coefficient between possession and the number of shots taken by away teams is approximately 0.5150, indicating a moderately positive correlation.
In TBL_UEFA_2021: The correlation coefficient is approximately 0.5174, indicating a moderately positive correlation.
In TBL_UEFA_2022: The correlation coefficient is approximately 0.4711, indicating a moderately positive correlation.

##### Highest average possession and shots on target for UEFA_2020:

In [17]:
%%sql
-- Highest average possession and shots on target for home teams
SELECT 
    'Home' AS Team_Location,
    TEAM_NAME_HOME AS Team_Name,
    AVG(CAST(REPLACE(POSSESSION_HOME, '%', '') AS FLOAT)) AS Avg_Possession,
    AVG(SHOTS_ON_TARGET_HOME) AS Avg_Shots_On_Target
FROM 
    TBL_UEFA_2020
GROUP BY 
    Team_Name
ORDER BY 
    Avg_Possession DESC,
    Avg_Shots_On_Target DESC
LIMIT 5;

Unnamed: 0,Team_Location,Team_Name,Avg_Possession,Avg_Shots_On_Target
0,Home,Sevilla,62.75,7.0
1,Home,Bayern Munich,62.4,6.6
2,Home,Barcelona,59.75,9.0
3,Home,Manchester City,59.428571,5.428571
4,Home,Juventus,57.5,6.75


In [18]:
%%sql
-- Highest average possession and shots on target for away teams
SELECT 
    'Away' AS Team_Location,
    TEAM_NAME_AWAY AS Team_Name,
    AVG(CAST(REPLACE(POSSESSION_AWAY, '%', '') AS FLOAT)) AS Avg_Possession,
    AVG(SHOTS_ON_TARGET_AWAY) AS Avg_Shots_On_Target
FROM 
    TBL_UEFA_2020
GROUP BY 
    Team_Name
ORDER BY 
    Avg_Possession DESC,
    Avg_Shots_On_Target DESC
LIMIT 5;

Unnamed: 0,Team_Location,Team_Name,Avg_Possession,Avg_Shots_On_Target
0,Away,Barcelona,66.25,6.5
1,Away,Manchester City,62.666667,6.0
2,Away,Real Madrid,61.333333,5.0
3,Away,Sevilla,59.0,5.75
4,Away,Bayern Munich,58.6,5.8


##### Highest average possession and shots on target for UEFA_2021:

In [19]:
%%sql
-- Highest average possession and shots on target for home teams
SELECT 
    'Home' AS Team_Location,
    TEAM_NAME_HOME AS Team_Name,
    AVG(CAST(REPLACE(POSSESSION_HOME, '%', '') AS FLOAT)) AS Avg_Possession,
    AVG(SHOTS_ON_TARGET_HOME) AS Avg_Shots_On_Target
FROM 
    TBL_UEFA_2021
GROUP BY 
    Team_Name
ORDER BY 
    Avg_Possession DESC,
    Avg_Shots_On_Target DESC
LIMIT 5;

Unnamed: 0,Team_Location,Team_Name,Avg_Possession,Avg_Shots_On_Target
0,Home,Atalanta\n,65.0,5.0
1,Home,Bayern,60.75,6.25
2,Home,Liverpool,57.0,7.0
3,Home,Man City,55.833333,5.166667
4,Home,Ajax,55.25,4.0


In [20]:
%%sql
-- Highest average possession and shots on target for away teams
SELECT 
    'Away' AS Team_Location,
    TEAM_NAME_AWAY AS Team_Name,
    AVG(CAST(REPLACE(POSSESSION_AWAY, '%', '') AS FLOAT)) AS Avg_Possession,
    AVG(SHOTS_ON_TARGET_AWAY) AS Avg_Shots_On_Target
FROM 
    TBL_UEFA_2021
GROUP BY 
    Team_Name
ORDER BY 
    Avg_Possession DESC,
    Avg_Shots_On_Target DESC
LIMIT 5;

Unnamed: 0,Team_Location,Team_Name,Avg_Possession,Avg_Shots_On_Target
0,Away,Man City,63.0,5.5
1,Away,Dynamo Hyiv,63.0,3.0
2,Away,Sevilla,62.0,3.333333
3,Away,Zenit,61.0,6.0
4,Away,Ajax,60.5,6.25


##### Highest average possession and shots on target for UEFA_2022:

In [21]:
%%sql
-- Highest average possession and shots on target for home teams
SELECT 
    'Home' AS Team_Location,
    TEAM_NAME_HOME AS Team_Name,
    AVG(CAST(REPLACE(POSSESSION_HOME, '%', '') AS FLOAT)) AS Avg_Possession,
    AVG(SHOTS_ON_TARGET_HOME) AS Avg_Shots_On_Target
FROM 
    TBL_UEFA_2022
GROUP BY 
    Team_Name
ORDER BY 
    Avg_Possession DESC,
    Avg_Shots_On_Target DESC
LIMIT 5;

Unnamed: 0,Team_Location,Team_Name,Avg_Possession,Avg_Shots_On_Target
0,Home,FC Porto,68.0,7.0
1,Home,Barcelona,63.666667,7.333333
2,Home,Manchester City,61.714286,8.714286
3,Home,Atletico Madrid,61.333333,7.333333
4,Home,Marseille,60.333333,5.0


In [22]:
%%sql
-- Highest average possession and shots on target for away teams
SELECT 
    'Away' AS Team_Location,
    TEAM_NAME_AWAY AS Team_Name,
    AVG(CAST(REPLACE(POSSESSION_AWAY, '%', '') AS FLOAT)) AS Avg_Possession,
    AVG(SHOTS_ON_TARGET_AWAY) AS Avg_Shots_On_Target
FROM 
    TBL_UEFA_2022
GROUP BY 
    Team_Name
ORDER BY 
    Avg_Possession DESC,
    Avg_Shots_On_Target DESC
LIMIT 5;

Unnamed: 0,Team_Location,Team_Name,Avg_Possession,Avg_Shots_On_Target
0,Away,Barcelona,67.666667,4.333333
1,Away,Dortmund,61.0,4.0
2,Away,Chelsea,60.6,5.8
3,Away,Manchester City,59.0,4.666667
4,Away,Paris Saint-Germain,57.5,5.5


##### Most common match venues:

In [31]:
%%sql
-- Most common match venues
SELECT 
    LOCATION AS Match_Venue,
    COUNT(*) AS Frequency
FROM 
    TBL_UEFA_2020
GROUP BY 
    Match_Venue
ORDER BY 
    Frequency DESC
LIMIT 5;

Unnamed: 0,Match_Venue,Frequency
0,Parc des Princes,6
1,Kiev Olympic Stadium,6
2,Alfredo Di StÃ©fano,6
3,RamÃ³n SÃ¡nchez-PizjuÃ¡n,6
4,PuskÃ¡s ArÃ©na,6


In [29]:
%%sql
-- Most common match venues
SELECT 
    LOCATION AS Match_Venue,
    COUNT(*) AS Frequency
FROM 
    TBL_UEFA_2021
GROUP BY 
    Match_Venue
ORDER BY 
    Frequency DESC
LIMIT 5;

Unnamed: 0,Match_Venue,Frequency
0,Red Bull Arena Salzburg,15
1,Signal Iduna Park,12
2,Stamford Bridge,9
3,Santiago Bernabeu,8
4,Estádio do Sport Lisboa e Benfica,8


In [30]:
%%sql
-- Most common match venues
SELECT 
    LOCATION AS Match_Venue,
    COUNT(*) AS Frequency
FROM 
    TBL_UEFA_2022
GROUP BY 
    Match_Venue
ORDER BY 
    Frequency DESC
LIMIT 5;

Unnamed: 0,Match_Venue,Frequency
0,San Siro,12
1,Red Bull Arena,7
2,Santiago Bernabeu,6
3,Etihad,6
4,Stadion da Luz,5


In [None]:
%%sql


In [None]:
%%sql
