# Importing Data & Libraries

In [16]:
import pandas as pd
import matplotlib.pylab as plt
import seaborn as sns
from config import *

competitions_df = pd.read_csv(f"{folder_path}/competitions.csv")

# Data Preparation

In [17]:
competitions_df.shape

(43, 10)

In [18]:
competitions_df.columns

Index(['competition_id', 'competition_code', 'name', 'sub_type', 'type',
       'country_id', 'country_name', 'domestic_league_code', 'confederation',
       'url'],
      dtype='object')

In [19]:
competitions_df.head()

Unnamed: 0,competition_id,competition_code,name,sub_type,type,country_id,country_name,domestic_league_code,confederation,url
0,CIT,italy-cup,italy-cup,domestic_cup,domestic_cup,75,Italy,IT1,europa,https://www.transfermarkt.co.uk/italy-cup/star...
1,NLSC,johan-cruijff-schaal,johan-cruijff-schaal,domestic_super_cup,other,122,Netherlands,NL1,europa,https://www.transfermarkt.co.uk/johan-cruijff-...
2,GRP,kypello-elladas,kypello-elladas,domestic_cup,domestic_cup,56,Greece,GR1,europa,https://www.transfermarkt.co.uk/kypello-ellada...
3,POSU,supertaca-candido-de-oliveira,supertaca-candido-de-oliveira,domestic_super_cup,other,136,Portugal,PO1,europa,https://www.transfermarkt.co.uk/supertaca-cand...
4,RUSS,russian-super-cup,russian-super-cup,domestic_super_cup,other,141,Russia,RU1,europa,https://www.transfermarkt.co.uk/russian-super-...


In [20]:
competitions_df.isna().sum()

competition_id          0
competition_code        0
name                    0
sub_type                0
type                    0
country_id              0
country_name            7
domestic_league_code    7
confederation           0
url                     0
dtype: int64

In [21]:
# data types
competitions_df.dtypes

competition_id          object
competition_code        object
name                    object
sub_type                object
type                    object
country_id               int64
country_name            object
domestic_league_code    object
confederation           object
url                     object
dtype: object

In [22]:
competitions_df['country_name']

0           Italy
1     Netherlands
2          Greece
3        Portugal
4          Russia
5           Spain
6             NaN
7         Denmark
8             NaN
9           Spain
10         France
11          Italy
12    Netherlands
13         Russia
14       Portugal
15        Belgium
16        England
17            NaN
18        England
19        Denmark
20            NaN
21        England
22    Netherlands
23        Ukraine
24        Ukraine
25          Spain
26            NaN
27         Greece
28         Turkey
29       Portugal
30        England
31            NaN
32        Germany
33         Russia
34       Scotland
35          Italy
36        Belgium
37            NaN
38        Germany
39       Scotland
40        Ukraine
41        Germany
42         France
Name: country_name, dtype: object

In [23]:
# Non eliminiamo i valori "Nan" perché appartengono a competizioni non nazionali (ad esempio europee)
competitions_df.loc[competitions_df['country_name'].isna()]

Unnamed: 0,competition_id,competition_code,name,sub_type,type,country_id,country_name,domestic_league_code,confederation,url
6,USC,uefa-super-cup,uefa-super-cup,uefa_super_cup,other,-1,,,europa,https://www.transfermarkt.co.uk/uefa-super-cup...
8,EL,europa-league,europa-league,europa_league,international_cup,-1,,,europa,https://www.transfermarkt.co.uk/europa-league/...
17,ELQ,europa-league-qualifikation,europa-league-qualifikation,europa_league_qualifying,international_cup,-1,,,europa,https://www.transfermarkt.co.uk/europa-league-...
20,ECLQ,uefa-europa-conference-league-qualifikation,uefa-europa-conference-league-qualifikation,uefa_europa_conference_league_qualifiers,international_cup,-1,,,europa,https://www.transfermarkt.co.uk/uefa-europa-co...
26,CL,uefa-champions-league,uefa-champions-league,uefa_champions_league,international_cup,-1,,,europa,https://www.transfermarkt.co.uk/uefa-champions...
31,KLUB,fifa-klub-wm,fifa-klub-wm,fifa_club_world_cup,other,-1,,,europa,https://www.transfermarkt.co.uk/fifa-klub-wm/s...
37,CLQ,uefa-champions-league-qualifikation,uefa-champions-league-qualifikation,uefa_champions_league_qualifying,international_cup,-1,,,europa,https://www.transfermarkt.co.uk/uefa-champions...


In [24]:
# sostituiamo NaN con International
competitions_df.loc[competitions_df['country_name'].isna(), 'country_name'] = 'International'

competitions_df

Unnamed: 0,competition_id,competition_code,name,sub_type,type,country_id,country_name,domestic_league_code,confederation,url
0,CIT,italy-cup,italy-cup,domestic_cup,domestic_cup,75,Italy,IT1,europa,https://www.transfermarkt.co.uk/italy-cup/star...
1,NLSC,johan-cruijff-schaal,johan-cruijff-schaal,domestic_super_cup,other,122,Netherlands,NL1,europa,https://www.transfermarkt.co.uk/johan-cruijff-...
2,GRP,kypello-elladas,kypello-elladas,domestic_cup,domestic_cup,56,Greece,GR1,europa,https://www.transfermarkt.co.uk/kypello-ellada...
3,POSU,supertaca-candido-de-oliveira,supertaca-candido-de-oliveira,domestic_super_cup,other,136,Portugal,PO1,europa,https://www.transfermarkt.co.uk/supertaca-cand...
4,RUSS,russian-super-cup,russian-super-cup,domestic_super_cup,other,141,Russia,RU1,europa,https://www.transfermarkt.co.uk/russian-super-...
5,SUC,supercopa,supercopa,domestic_super_cup,other,157,Spain,ES1,europa,https://www.transfermarkt.co.uk/supercopa/star...
6,USC,uefa-super-cup,uefa-super-cup,uefa_super_cup,other,-1,International,,europa,https://www.transfermarkt.co.uk/uefa-super-cup...
7,DK1,superligaen,superligaen,first_tier,domestic_league,39,Denmark,DK1,europa,https://www.transfermarkt.co.uk/superligaen/st...
8,EL,europa-league,europa-league,europa_league,international_cup,-1,International,,europa,https://www.transfermarkt.co.uk/europa-league/...
9,ES1,laliga,laliga,first_tier,domestic_league,157,Spain,ES1,europa,https://www.transfermarkt.co.uk/laliga/startse...


In [25]:
competitions_df.loc[competitions_df['domestic_league_code'].isna(), 'domestic_league_code'] = 'INT'

competitions_df

Unnamed: 0,competition_id,competition_code,name,sub_type,type,country_id,country_name,domestic_league_code,confederation,url
0,CIT,italy-cup,italy-cup,domestic_cup,domestic_cup,75,Italy,IT1,europa,https://www.transfermarkt.co.uk/italy-cup/star...
1,NLSC,johan-cruijff-schaal,johan-cruijff-schaal,domestic_super_cup,other,122,Netherlands,NL1,europa,https://www.transfermarkt.co.uk/johan-cruijff-...
2,GRP,kypello-elladas,kypello-elladas,domestic_cup,domestic_cup,56,Greece,GR1,europa,https://www.transfermarkt.co.uk/kypello-ellada...
3,POSU,supertaca-candido-de-oliveira,supertaca-candido-de-oliveira,domestic_super_cup,other,136,Portugal,PO1,europa,https://www.transfermarkt.co.uk/supertaca-cand...
4,RUSS,russian-super-cup,russian-super-cup,domestic_super_cup,other,141,Russia,RU1,europa,https://www.transfermarkt.co.uk/russian-super-...
5,SUC,supercopa,supercopa,domestic_super_cup,other,157,Spain,ES1,europa,https://www.transfermarkt.co.uk/supercopa/star...
6,USC,uefa-super-cup,uefa-super-cup,uefa_super_cup,other,-1,International,INT,europa,https://www.transfermarkt.co.uk/uefa-super-cup...
7,DK1,superligaen,superligaen,first_tier,domestic_league,39,Denmark,DK1,europa,https://www.transfermarkt.co.uk/superligaen/st...
8,EL,europa-league,europa-league,europa_league,international_cup,-1,International,INT,europa,https://www.transfermarkt.co.uk/europa-league/...
9,ES1,laliga,laliga,first_tier,domestic_league,157,Spain,ES1,europa,https://www.transfermarkt.co.uk/laliga/startse...


In [26]:
# rimuoviamo la colonna name (è uguale a competition-code)
competitions_df = competitions_df.drop(columns='name')
competitions_df

Unnamed: 0,competition_id,competition_code,sub_type,type,country_id,country_name,domestic_league_code,confederation,url
0,CIT,italy-cup,domestic_cup,domestic_cup,75,Italy,IT1,europa,https://www.transfermarkt.co.uk/italy-cup/star...
1,NLSC,johan-cruijff-schaal,domestic_super_cup,other,122,Netherlands,NL1,europa,https://www.transfermarkt.co.uk/johan-cruijff-...
2,GRP,kypello-elladas,domestic_cup,domestic_cup,56,Greece,GR1,europa,https://www.transfermarkt.co.uk/kypello-ellada...
3,POSU,supertaca-candido-de-oliveira,domestic_super_cup,other,136,Portugal,PO1,europa,https://www.transfermarkt.co.uk/supertaca-cand...
4,RUSS,russian-super-cup,domestic_super_cup,other,141,Russia,RU1,europa,https://www.transfermarkt.co.uk/russian-super-...
5,SUC,supercopa,domestic_super_cup,other,157,Spain,ES1,europa,https://www.transfermarkt.co.uk/supercopa/star...
6,USC,uefa-super-cup,uefa_super_cup,other,-1,International,INT,europa,https://www.transfermarkt.co.uk/uefa-super-cup...
7,DK1,superligaen,first_tier,domestic_league,39,Denmark,DK1,europa,https://www.transfermarkt.co.uk/superligaen/st...
8,EL,europa-league,europa_league,international_cup,-1,International,INT,europa,https://www.transfermarkt.co.uk/europa-league/...
9,ES1,laliga,first_tier,domestic_league,157,Spain,ES1,europa,https://www.transfermarkt.co.uk/laliga/startse...


In [27]:
#making a dataframe subset and reordering columns
competitions_df = competitions_df [[
    'competition_id', 
    'competition_code', 
    'sub_type', 
    'type',
    'country_id', 
    'country_name', 
    'domestic_league_code', 
    'confederation',
    #'url'
]]

competitions_df.sort_values("competition_id", ascending= True)

Unnamed: 0,competition_id,competition_code,sub_type,type,country_id,country_name,domestic_league_code,confederation
36,BE1,jupiler-pro-league,first_tier,domestic_league,19,Belgium,BE1,europa
15,BESC,belgian-supercup,domestic_super_cup,other,19,Belgium,BE1,europa
25,CDR,copa-del-rey,domestic_cup,domestic_cup,157,Spain,ES1,europa
18,CGB,efl-cup,league_cup,other,189,England,GB1,europa
0,CIT,italy-cup,domestic_cup,domestic_cup,75,Italy,IT1,europa
26,CL,uefa-champions-league,uefa_champions_league,international_cup,-1,International,INT,europa
37,CLQ,uefa-champions-league-qualifikation,uefa_champions_league_qualifying,international_cup,-1,International,INT,europa
41,DFB,dfb-pokal,domestic_cup,domestic_cup,40,Germany,L1,europa
38,DFL,dfl-supercup,domestic_super_cup,other,40,Germany,L1,europa
7,DK1,superligaen,first_tier,domestic_league,39,Denmark,DK1,europa


In [31]:
competitions_df['competition_code'].value_counts()

competition_code
premier-liga                                   2
italy-cup                                      1
fifa-klub-wm                                   1
ukrainian-super-cup                            1
copa-del-rey                                   1
uefa-champions-league                          1
super-league-1                                 1
super-lig                                      1
allianz-cup                                    1
community-shield                               1
bundesliga                                     1
johan-cruijff-schaal                           1
scottish-premiership                           1
supercoppa-italiana                            1
jupiler-pro-league                             1
uefa-champions-league-qualifikation            1
dfl-supercup                                   1
sfa-cup                                        1
ukrainian-cup                                  1
dfb-pokal                                      1
tot

In [33]:
# changing ukraine league name (it was the same as russia)
competitions_df.loc[competitions_df['competition_id'] == 'UKR1', 'competition_code'] = 'ukraine-premier-liga'

Unnamed: 0,competition_id,competition_code,sub_type,type,country_id,country_name,domestic_league_code,confederation
0,CIT,italy-cup,domestic_cup,domestic_cup,75,Italy,IT1,europa
1,NLSC,johan-cruijff-schaal,domestic_super_cup,other,122,Netherlands,NL1,europa
2,GRP,kypello-elladas,domestic_cup,domestic_cup,56,Greece,GR1,europa
3,POSU,supertaca-candido-de-oliveira,domestic_super_cup,other,136,Portugal,PO1,europa
4,RUSS,russian-super-cup,domestic_super_cup,other,141,Russia,RU1,europa
5,SUC,supercopa,domestic_super_cup,other,157,Spain,ES1,europa
6,USC,uefa-super-cup,uefa_super_cup,other,-1,International,INT,europa
7,DK1,superligaen,first_tier,domestic_league,39,Denmark,DK1,europa
8,EL,europa-league,europa_league,international_cup,-1,International,INT,europa
9,ES1,laliga,first_tier,domestic_league,157,Spain,ES1,europa


In [35]:
competitions_df.query('sub_type == "first_tier"')

Unnamed: 0,competition_id,competition_code,sub_type,type,country_id,country_name,domestic_league_code,confederation
7,DK1,superligaen,first_tier,domestic_league,39,Denmark,DK1,europa
9,ES1,laliga,first_tier,domestic_league,157,Spain,ES1,europa
10,FR1,ligue-1,first_tier,domestic_league,50,France,FR1,europa
11,IT1,serie-a,first_tier,domestic_league,75,Italy,IT1,europa
12,NL1,eredivisie,first_tier,domestic_league,122,Netherlands,NL1,europa
14,PO1,liga-portugal-bwin,first_tier,domestic_league,136,Portugal,PO1,europa
16,GB1,premier-league,first_tier,domestic_league,189,England,GB1,europa
24,UKR1,ukraine-premier-liga,first_tier,domestic_league,177,Ukraine,UKR1,europa
27,GR1,super-league-1,first_tier,domestic_league,56,Greece,GR1,europa
28,TR1,super-lig,first_tier,domestic_league,174,Turkey,TR1,europa


In [36]:
# adding a column name for the national leagues

league_names = {
    'BE1': 'Belgian Jupiler Pro League',
    'DK1': 'Danish Superliga',
    'ES1': 'LaLiga',
    'FR1': 'Ligue 1',
    'GB1': 'Premier League',
    'GR1': 'Greece Super League 1',
    'IT1': 'Serie A',
    'L1': 'Bundesliga',
    'NL1': 'Eredivisie',
    'PO1': 'Liga Portugal',
    'RU1': 'Russian Premier Liga',
    'SC1': 'Scottish Premiership',
    'TR1': 'Turkish Super League',
    'UKR1': 'Ukrainian Premier Liga'
}

competitions_df.loc[competitions_df['sub_type'] == 'first_tier', 'name'] = competitions_df['competition_id'].map(league_names)
 

competitions_df

Unnamed: 0,competition_id,competition_code,sub_type,type,country_id,country_name,domestic_league_code,confederation,name
0,CIT,italy-cup,domestic_cup,domestic_cup,75,Italy,IT1,europa,
1,NLSC,johan-cruijff-schaal,domestic_super_cup,other,122,Netherlands,NL1,europa,
2,GRP,kypello-elladas,domestic_cup,domestic_cup,56,Greece,GR1,europa,
3,POSU,supertaca-candido-de-oliveira,domestic_super_cup,other,136,Portugal,PO1,europa,
4,RUSS,russian-super-cup,domestic_super_cup,other,141,Russia,RU1,europa,
5,SUC,supercopa,domestic_super_cup,other,157,Spain,ES1,europa,
6,USC,uefa-super-cup,uefa_super_cup,other,-1,International,INT,europa,
7,DK1,superligaen,first_tier,domestic_league,39,Denmark,DK1,europa,Danish Superliga
8,EL,europa-league,europa_league,international_cup,-1,International,INT,europa,
9,ES1,laliga,first_tier,domestic_league,157,Spain,ES1,europa,LaLiga
