In [1]:
urL = "https://fbref.com/en/squads/53a2f082/2022-2023/Real-Madrid-Stats"

import requests
from bs4 import BeautifulSoup
import re
import time
import pandas as pd


class Dataset:

  def __init__(self) -> None:
    pass


  def get_soup(self, url):
      """
          params1: url (contains the url of google scholar page)
          return: soup (fetching the url page data and then further converted to html parser)
      """
    # headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'}
      headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'}
      try:
          # requesting for data using requests with url and headers for authentication
          data = requests.get(url)
          # print(f"Extracted the data with response as {data.status_code}")
          if data.status_code != 200:
              raise Exception("Failed to fetch data")
      except Exception as ex:
          print(f"Exception occurred as {ex}")
          return None
      soup = BeautifulSoup(data.text, "html.parser")
      return soup


  def main(self, urL):
    soup_data = self.get_soup(urL)
    tables_soup = soup_data.find_all("div", attrs={"class": "table_wrapper tabbed"})
    final_data = []
    final_mapped_data = {}
    final_mapped_data["Standard_Stats"] = self.table_reorder(tables_soup[0])
    final_mapped_data["Scores & Fixtures"] = self.table_order(tables_soup[1])
    final_mapped_data["Goalkeeping"] = self.table_reorder(tables_soup[2])
    final_mapped_data["Advanced Goalkeeping"] = self.table_reorder(tables_soup[3])
    final_mapped_data["Shooting"] = self.table_reorder(tables_soup[4])
    final_mapped_data["Passing"] = self.table_reorder(tables_soup[5])
    final_mapped_data["Pass Types"] = self.table_reorder(tables_soup[6])
    final_mapped_data["Goal and Shot Creation"] = self.table_reorder(tables_soup[7])
    final_mapped_data["Defensive Actions"] = self.table_reorder(tables_soup[8])
    final_mapped_data["Possession"] = self.table_reorder(tables_soup[9])
    final_mapped_data["Playing Time"] = self.table_reorder(tables_soup[10])
    final_mapped_data["Miscellaneous Stats"] = self.table_reorder(tables_soup[11])
    final_mapped_data["Regular season"] = self.table_order(tables_soup[12])
    return final_mapped_data

  def table_order(self, tables_soup):
    data_2 = tables_soup.find("div", attrs={"class": "table_container tabbed current"})
    rows_data = data_2.find_all("tr")
    sub_head_list = [row.text for row in rows_data[0].find_all("th") if row.text]
    final_data = {}
    for i in range(1, len(rows_data)):
      data_row1 = rows_data[i].find_all("th")
      data_rows = rows_data[i].find_all("td")
      for j in range(len(sub_head_list)):
        if i==1:
          if j == 0:
            final_data[sub_head_list[j]] = [data_row1[j].text]
          else:
            final_data[sub_head_list[j]] = [data_rows[j-1].text]
        else:
          if j == 0:
            final_data[sub_head_list[j]].append(data_row1[j].text)
          else:
            final_data[sub_head_list[j]].append(data_rows[j-1].text)

    return final_data


  def table_reorder(self, tables_soup):
    # soup_data = get_soup(urL)
    # tables_soup = soup_data.find_all("div", attrs={"class": "table_wrapper tabbed"})
    data_1 = tables_soup.find("div", attrs={"class": "table_container tabbed current"}) # table-1: Standard Stats

    rows_data = data_1.find_all("tr")
    sub_head_list = rows_data[1].find_all("th") # getting sub heading
    table1_data = {}

    table_row1 = rows_data[2].find_all("th")

    for i in range(2, len(rows_data)):
      data_row1 = rows_data[i].find_all("th")
      data_rows = rows_data[i].find_all("td")
      if not data_row1[0].text:
        continue
      for j in range(len(sub_head_list)):
        if i==2:
          if j==0:
            table1_data[sub_head_list[j].text] = [data_row1[j].text]
          else:
            head = sub_head_list[j].get("data-over-header")
            if head:
              table1_data[head+"_"+sub_head_list[j].text] = [data_rows[j-1].text]
            else:
              table1_data[sub_head_list[j].text] = [data_rows[j-1].text]
        else:
          if j==0:
            table1_data[sub_head_list[j].text].append(data_row1[j].text)
          else:
            head = sub_head_list[j].get("data-over-header")
            if head:
              table1_data[head+"_"+sub_head_list[j].text].append(data_rows[j-1].text)
            else:
              table1_data[sub_head_list[j].text].append(data_rows[j-1].text)

    return table1_data

In [2]:
data_obj = Dataset()
data_final = data_obj.main(urL)
data_final

{'Standard_Stats': {'Player': ['Vinicius Júnior',
   'Thibaut Courtois',
   'Éder Militão',
   'Federico Valverde',
   'Antonio Rüdiger',
   'Rodrygo',
   'Toni Kroos',
   'Aurélien Tchouaméni',
   'Karim Benzema',
   'Eduardo Camavinga',
   'David Alaba',
   'Dani Carvajal',
   'Luka Modrić',
   'Dani Ceballos',
   'Nacho',
   'Ferland Mendy',
   'Marco Asensio',
   'Lucas Vázquez',
   'Andriy Lunin',
   'Eden Hazard',
   'Mariano',
   'Álvaro Muñoz',
   'Álvaro Odriozola',
   'Jesús Vallejo',
   'Sergio Arribas',
   'Casemiro',
   'Lucas Cañizares',
   'Carlos Dotor',
   'Fran González',
   'Luis López',
   'Mario de Luis',
   'Mario Martín',
   'Nicolás Paz',
   'Diego Piñeiro',
   'Squad Total',
   'Opponent Total'],
  'Nation': ['br BRA',
   'be BEL',
   'br BRA',
   'uy URU',
   'de GER',
   'br BRA',
   'de GER',
   'fr FRA',
   'fr FRA',
   'fr FRA',
   'at AUT',
   'es ESP',
   'hr CRO',
   'es ESP',
   'es ESP',
   'fr FRA',
   'es ESP',
   'es ESP',
   'ua UKR',
   'be BEL',

In [5]:
df_Standard_Stats = pd.DataFrame(data_final["Standard_Stats"])
df_Standard_Stats

Unnamed: 0,Player,Nation,Pos,Age,MP,Playing Time_Starts,Playing Time_Min,Playing Time_90s,Performance_Gls,Performance_Ast,...,Per 90 Minutes_Ast,Per 90 Minutes_G+A,Per 90 Minutes_G-PK,Per 90 Minutes_G+A-PK,Per 90 Minutes_xG,Per 90 Minutes_xAG,Per 90 Minutes_xG+xAG,Per 90 Minutes_npxG,Per 90 Minutes_npxG+xAG,Matches
0,Vinicius Júnior,br BRA,FW,22.0,33,32,2823.0,31.4,10.0,9.0,...,0.29,0.61,0.32,0.61,0.34,0.3,0.63,0.34,0.63,Matches
1,Thibaut Courtois,be BEL,GK,30.0,31,31,2790.0,31.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,Matches
2,Éder Militão,br BRA,DF,24.0,33,30,2703.0,30.0,5.0,0.0,...,0.0,0.17,0.17,0.17,0.08,0.02,0.1,0.08,0.1,Matches
3,Federico Valverde,uy URU,"MF,FW",24.0,34,29,2502.0,27.8,7.0,4.0,...,0.14,0.4,0.25,0.4,0.15,0.23,0.38,0.15,0.38,Matches
4,Antonio Rüdiger,de GER,DF,29.0,33,26,2415.0,26.8,1.0,0.0,...,0.0,0.04,0.04,0.04,0.07,0.0,0.07,0.07,0.07,Matches
5,Rodrygo,br BRA,"FW,MF",21.0,34,25,2379.0,26.4,9.0,8.0,...,0.3,0.64,0.3,0.61,0.52,0.26,0.78,0.49,0.75,Matches
6,Toni Kroos,de GER,MF,32.0,30,25,2155.0,23.9,2.0,4.0,...,0.17,0.25,0.08,0.25,0.06,0.19,0.24,0.06,0.24,Matches
7,Aurélien Tchouaméni,fr FRA,MF,22.0,33,24,2119.0,23.5,0.0,4.0,...,0.17,0.17,0.0,0.17,0.08,0.09,0.17,0.08,0.17,Matches
8,Karim Benzema,fr FRA,FW,34.0,24,24,2038.0,22.6,19.0,3.0,...,0.13,0.97,0.53,0.66,0.95,0.26,1.2,0.66,0.91,Matches
9,Eduardo Camavinga,fr FRA,"MF,DF",19.0,37,21,2010.0,22.3,0.0,1.0,...,0.04,0.04,0.0,0.04,0.04,0.07,0.1,0.04,0.1,Matches


In [6]:
df_Standard_Stats.columns

Index(['Player', 'Nation', 'Pos', 'Age', 'MP', 'Playing Time_Starts',
       'Playing Time_Min', 'Playing Time_90s', 'Performance_Gls',
       'Performance_Ast', 'Performance_G+A', 'Performance_G-PK',
       'Performance_PK', 'Performance_PKatt', 'Performance_CrdY',
       'Performance_CrdR', 'Expected_xG', 'Expected_npxG', 'Expected_xAG',
       'Expected_npxG+xAG', 'Progression_PrgC', 'Progression_PrgP',
       'Progression_PrgR', 'Per 90 Minutes_Gls', 'Per 90 Minutes_Ast',
       'Per 90 Minutes_G+A', 'Per 90 Minutes_G-PK', 'Per 90 Minutes_G+A-PK',
       'Per 90 Minutes_xG', 'Per 90 Minutes_xAG', 'Per 90 Minutes_xG+xAG',
       'Per 90 Minutes_npxG', 'Per 90 Minutes_npxG+xAG', 'Matches'],
      dtype='object')

In [7]:
df_Standard_Stats.dtypes

Player                     object
Nation                     object
Pos                        object
Age                        object
MP                         object
Playing Time_Starts        object
Playing Time_Min           object
Playing Time_90s           object
Performance_Gls            object
Performance_Ast            object
Performance_G+A            object
Performance_G-PK           object
Performance_PK             object
Performance_PKatt          object
Performance_CrdY           object
Performance_CrdR           object
Expected_xG                object
Expected_npxG              object
Expected_xAG               object
Expected_npxG+xAG          object
Progression_PrgC           object
Progression_PrgP           object
Progression_PrgR           object
Per 90 Minutes_Gls         object
Per 90 Minutes_Ast         object
Per 90 Minutes_G+A         object
Per 90 Minutes_G-PK        object
Per 90 Minutes_G+A-PK      object
Per 90 Minutes_xG          object
Per 90 Minutes

In [8]:
df_Standard_Stats['MP'] = pd.to_numeric(df_Standard_Stats['MP'], errors='coerce') #converting all the datatypes

df_Standard_Stats = df_Standard_Stats[df_Standard_Stats['MP'] >= 5].drop(columns=['Matches']) #atleast played 5 matches

df_Standard_Stats.head(35)

Unnamed: 0,Player,Nation,Pos,Age,MP,Playing Time_Starts,Playing Time_Min,Playing Time_90s,Performance_Gls,Performance_Ast,...,Per 90 Minutes_Gls,Per 90 Minutes_Ast,Per 90 Minutes_G+A,Per 90 Minutes_G-PK,Per 90 Minutes_G+A-PK,Per 90 Minutes_xG,Per 90 Minutes_xAG,Per 90 Minutes_xG+xAG,Per 90 Minutes_npxG,Per 90 Minutes_npxG+xAG
0,Vinicius Júnior,br BRA,FW,22.0,33,32,2823,31.4,10,9,...,0.32,0.29,0.61,0.32,0.61,0.34,0.3,0.63,0.34,0.63
1,Thibaut Courtois,be BEL,GK,30.0,31,31,2790,31.0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01
2,Éder Militão,br BRA,DF,24.0,33,30,2703,30.0,5,0,...,0.17,0.0,0.17,0.17,0.17,0.08,0.02,0.1,0.08,0.1
3,Federico Valverde,uy URU,"MF,FW",24.0,34,29,2502,27.8,7,4,...,0.25,0.14,0.4,0.25,0.4,0.15,0.23,0.38,0.15,0.38
4,Antonio Rüdiger,de GER,DF,29.0,33,26,2415,26.8,1,0,...,0.04,0.0,0.04,0.04,0.04,0.07,0.0,0.07,0.07,0.07
5,Rodrygo,br BRA,"FW,MF",21.0,34,25,2379,26.4,9,8,...,0.34,0.3,0.64,0.3,0.61,0.52,0.26,0.78,0.49,0.75
6,Toni Kroos,de GER,MF,32.0,30,25,2155,23.9,2,4,...,0.08,0.17,0.25,0.08,0.25,0.06,0.19,0.24,0.06,0.24
7,Aurélien Tchouaméni,fr FRA,MF,22.0,33,24,2119,23.5,0,4,...,0.0,0.17,0.17,0.0,0.17,0.08,0.09,0.17,0.08,0.17
8,Karim Benzema,fr FRA,FW,34.0,24,24,2038,22.6,19,3,...,0.84,0.13,0.97,0.53,0.66,0.95,0.26,1.2,0.66,0.91
9,Eduardo Camavinga,fr FRA,"MF,DF",19.0,37,21,2010,22.3,0,1,...,0.0,0.04,0.04,0.0,0.04,0.04,0.07,0.1,0.04,0.1


In [9]:
print(df_Standard_Stats.columns)

Index(['Player', 'Nation', 'Pos', 'Age', 'MP', 'Playing Time_Starts',
       'Playing Time_Min', 'Playing Time_90s', 'Performance_Gls',
       'Performance_Ast', 'Performance_G+A', 'Performance_G-PK',
       'Performance_PK', 'Performance_PKatt', 'Performance_CrdY',
       'Performance_CrdR', 'Expected_xG', 'Expected_npxG', 'Expected_xAG',
       'Expected_npxG+xAG', 'Progression_PrgC', 'Progression_PrgP',
       'Progression_PrgR', 'Per 90 Minutes_Gls', 'Per 90 Minutes_Ast',
       'Per 90 Minutes_G+A', 'Per 90 Minutes_G-PK', 'Per 90 Minutes_G+A-PK',
       'Per 90 Minutes_xG', 'Per 90 Minutes_xAG', 'Per 90 Minutes_xG+xAG',
       'Per 90 Minutes_npxG', 'Per 90 Minutes_npxG+xAG'],
      dtype='object')


In [10]:
df_Scores_Fixtures = pd.DataFrame(data_final["Scores & Fixtures"]).drop(columns=['Match Report', 'Notes'])
df_Scores_Fixtures.head()

Unnamed: 0,Date,Time,Comp,Round,Day,Venue,Result,GF,GA,Opponent,xG,xGA,Poss,Attendance,Captain,Formation,Referee
0,2022-08-10,22:00,Super Cup,UEFA Super Cup,Wed,Home,W,2,0,de Eint Frankfurt,,,58,31042,Karim Benzema,4-3-3,Michael Oliver
1,2022-08-14,22:00,La Liga,Matchweek 1,Sun,Away,W,2,1,Almería,2.1,0.7,67,14386,Karim Benzema,4-3-3,Juan Martínez
2,2022-08-20,22:00,La Liga,Matchweek 2,Sat,Away,W,4,1,Celta Vigo,3.7,1.4,49,15681,Karim Benzema,4-3-3,Jesús Gil
3,2022-08-28,22:00,La Liga,Matchweek 3,Sun,Away,W,3,1,Espanyol,2.1,1.3,64,25778,Karim Benzema,4-3-3,Mario Melero
4,2022-09-03,16:15,La Liga,Matchweek 4,Sat,Home,W,2,1,Betis,2.9,0.5,49,58579,Karim Benzema,4-3-3,José Sánchez


In [11]:
print(df_Scores_Fixtures.columns)

Index(['Date', 'Time', 'Comp', 'Round', 'Day', 'Venue', 'Result', 'GF', 'GA',
       'Opponent', 'xG', 'xGA', 'Poss', 'Attendance', 'Captain', 'Formation',
       'Referee'],
      dtype='object')


In [12]:
df_Goalkeeping = pd.DataFrame(data_final["Goalkeeping"]).drop(columns=['Matches'])

df_Goalkeeping.head()

Unnamed: 0,Player,Nation,Pos,Age,Playing Time_MP,Playing Time_Starts,Playing Time_Min,Playing Time_90s,Performance_GA,Performance_GA90,...,Performance_W,Performance_D,Performance_L,Performance_CS,Performance_CS%,Penalty Kicks_PKatt,Penalty Kicks_PKA,Penalty Kicks_PKsv,Penalty Kicks_PKm,Penalty Kicks_Save%
0,Thibaut Courtois,be BEL,GK,30.0,31,31,2790,31.0,29,0.94,...,20,5,6,10,32.3,5,4,1,0,20.0
1,Andriy Lunin,ua UKR,GK,23.0,7,7,630,7.0,7,1.0,...,4,1,2,3,42.9,0,0,0,0,
2,Squad Total,,,27.4,38,38,3420,38.0,36,0.95,...,24,6,8,13,34.2,5,4,1,0,20.0
3,Opponent Total,,,27.3,38,38,3419,38.0,75,1.97,...,8,6,24,5,13.2,12,9,2,1,18.2


In [13]:
print(df_Goalkeeping.columns)

Index(['Player', 'Nation', 'Pos', 'Age', 'Playing Time_MP',
       'Playing Time_Starts', 'Playing Time_Min', 'Playing Time_90s',
       'Performance_GA', 'Performance_GA90', 'Performance_SoTA',
       'Performance_Saves', 'Performance_Save%', 'Performance_W',
       'Performance_D', 'Performance_L', 'Performance_CS', 'Performance_CS%',
       'Penalty Kicks_PKatt', 'Penalty Kicks_PKA', 'Penalty Kicks_PKsv',
       'Penalty Kicks_PKm', 'Penalty Kicks_Save%'],
      dtype='object')


In [14]:
df_Advanced_Goalkeeping = pd.DataFrame(data_final["Advanced Goalkeeping"]).drop(columns=['Matches'])

df_Advanced_Goalkeeping.head()

Unnamed: 0,Player,Nation,Pos,Age,90s,Goals_GA,Goals_PKA,Goals_FK,Goals_CK,Goals_OG,...,Passes_AvgLen,Goal Kicks_Att,Goal Kicks_Launch%,Goal Kicks_AvgLen,Crosses_Opp,Crosses_Stp,Crosses_Stp%,Sweeper_#OPA,Sweeper_#OPA/90,Sweeper_AvgDist
0,Thibaut Courtois,be BEL,GK,30.0,31.0,29,4,0,1,0,...,27.7,152,21.7,29.5,366,32,8.7,40,1.29,15.4
1,Andriy Lunin,ua UKR,GK,23.0,7.0,7,0,0,1,1,...,23.3,37,32.4,35.6,54,1,1.9,11,1.57,21.7
2,Squad Total,,,27.4,38.0,36,4,0,2,1,...,,253,17.8,25.0,420,33,7.9,51,1.34,
3,Opponent Total,,,27.3,38.0,75,9,3,9,2,...,38.1,351,63.8,48.2,448,27,6.0,21,0.55,12.2


In [15]:
print(df_Advanced_Goalkeeping.columns)

Index(['Player', 'Nation', 'Pos', 'Age', '90s', 'Goals_GA', 'Goals_PKA',
       'Goals_FK', 'Goals_CK', 'Goals_OG', 'Expected_PSxG',
       'Expected_PSxG/SoT', 'Expected_PSxG+/-', 'Expected_/90', 'Launched_Cmp',
       'Launched_Att', 'Launched_Cmp%', 'Passes_Att (GK)', 'Passes_Thr',
       'Passes_Launch%', 'Passes_AvgLen', 'Goal Kicks_Att',
       'Goal Kicks_Launch%', 'Goal Kicks_AvgLen', 'Crosses_Opp', 'Crosses_Stp',
       'Crosses_Stp%', 'Sweeper_#OPA', 'Sweeper_#OPA/90', 'Sweeper_AvgDist'],
      dtype='object')


In [16]:
df_Shooting = pd.DataFrame(data_final["Shooting"])

df_Shooting['90s'] = pd.to_numeric(df_Shooting['90s'], errors='coerce') #converting all the datatypes

df_Shooting = df_Shooting[df_Shooting['90s'] >= 5.0].drop(columns=['Matches']) #has atleast had five 90s

df_Shooting.head(35)

Unnamed: 0,Player,Nation,Pos,Age,90s,Standard_Gls,Standard_Sh,Standard_SoT,Standard_SoT%,Standard_Sh/90,...,Standard_G/SoT,Standard_Dist,Standard_FK,Standard_PK,Standard_PKatt,Expected_xG,Expected_npxG,Expected_npxG/Sh,Expected_G-xG,Expected_np:G-xG
0,Vinicius Júnior,br BRA,FW,22.0,31.4,10,77,36,46.8,2.45,...,0.28,16.2,0,0,0,10.5,10.5,0.14,-0.5,-0.5
1,Thibaut Courtois,be BEL,GK,30.0,31.0,0,0,0,,0.0,...,,,0,0,0,0.0,0.0,,0.0,0.0
2,Éder Militão,br BRA,DF,24.0,30.0,5,22,8,36.4,0.73,...,0.63,13.8,0,0,0,2.4,2.4,0.11,2.6,2.6
3,Federico Valverde,uy URU,"MF,FW",24.0,27.8,7,63,21,33.3,2.27,...,0.33,22.7,2,0,0,4.3,4.3,0.07,2.7,2.7
4,Antonio Rüdiger,de GER,DF,29.0,26.8,1,22,4,18.2,0.82,...,0.25,14.8,0,0,0,1.8,1.8,0.08,-0.8,-0.8
5,Rodrygo,br BRA,"FW,MF",21.0,26.4,9,97,31,32.0,3.67,...,0.26,16.6,8,1,1,13.7,12.9,0.13,-4.7,-4.9
6,Toni Kroos,de GER,MF,32.0,23.9,2,31,11,35.5,1.29,...,0.18,25.4,5,0,0,1.4,1.4,0.05,0.6,0.6
7,Aurélien Tchouaméni,fr FRA,MF,22.0,23.5,0,38,6,15.8,1.61,...,0.0,22.1,0,0,0,1.8,1.8,0.05,-1.8,-1.8
8,Karim Benzema,fr FRA,FW,34.0,22.6,19,99,36,36.4,4.37,...,0.33,15.9,6,7,8,21.5,14.9,0.15,-2.5,-2.9
9,Eduardo Camavinga,fr FRA,"MF,DF",19.0,22.3,0,18,3,16.7,0.81,...,0.0,23.2,0,0,0,0.8,0.8,0.04,-0.8,-0.8


In [17]:
print(df_Shooting.columns)

Index(['Player', 'Nation', 'Pos', 'Age', '90s', 'Standard_Gls', 'Standard_Sh',
       'Standard_SoT', 'Standard_SoT%', 'Standard_Sh/90', 'Standard_SoT/90',
       'Standard_G/Sh', 'Standard_G/SoT', 'Standard_Dist', 'Standard_FK',
       'Standard_PK', 'Standard_PKatt', 'Expected_xG', 'Expected_npxG',
       'Expected_npxG/Sh', 'Expected_G-xG', 'Expected_np:G-xG'],
      dtype='object')


In [18]:
df_Passing = pd.DataFrame(data_final["Passing"])

df_Passing['90s'] = pd.to_numeric(df_Passing['90s'], errors='coerce') #converting all the datatypes

df_Passing = df_Passing[df_Passing['90s'] >= 5.0].drop(columns=['Matches']) #has atleast had five 90s

df_Passing.head(35)

Unnamed: 0,Player,Nation,Pos,Age,90s,Total_Cmp,Total_Att,Total_Cmp%,Total_TotDist,Total_PrgDist,...,Long_Cmp%,Ast,xAG,Expected_xA,Expected_A-xAG,KP,1/3,PPA,CrsPA,PrgP
0,Vinicius Júnior,br BRA,FW,22.0,31.4,883,1153,76.6,11725,2422,...,59.5,9,9.4,8.8,-0.4,64,30,45,2,87
1,Thibaut Courtois,be BEL,GK,30.0,31.0,980,1136,86.3,23008,14737,...,57.5,0,0.4,0.0,-0.4,2,6,1,0,0
2,Éder Militão,br BRA,DF,24.0,30.0,1660,1881,88.3,31882,10563,...,68.9,0,0.6,0.8,-0.6,6,138,5,1,98
3,Federico Valverde,uy URU,"MF,FW",24.0,27.8,1427,1641,87.0,21092,4276,...,72.8,4,6.3,5.1,-2.3,43,118,45,6,151
4,Antonio Rüdiger,de GER,DF,29.0,26.8,1486,1617,91.9,26566,8924,...,71.2,0,0.1,0.9,-0.1,2,75,6,0,59
5,Rodrygo,br BRA,"FW,MF",21.0,26.4,918,1050,87.4,10744,2092,...,76.7,8,7.0,6.2,1.0,47,63,28,3,81
6,Toni Kroos,de GER,MF,32.0,23.9,2255,2482,90.9,42174,15247,...,74.9,4,4.4,5.5,-0.4,55,372,26,7,319
7,Aurélien Tchouaméni,fr FRA,MF,22.0,23.5,1578,1700,92.8,25016,7823,...,83.0,4,2.2,2.1,1.8,22,159,11,0,142
8,Karim Benzema,fr FRA,FW,34.0,22.6,924,1070,86.4,10990,2417,...,75.0,3,5.8,5.5,-2.8,48,71,51,0,138
9,Eduardo Camavinga,fr FRA,"MF,DF",19.0,22.3,1364,1504,90.7,21221,5654,...,79.5,1,1.5,2.2,-0.5,24,138,17,1,165


In [19]:
print(df_Passing.columns)

Index(['Player', 'Nation', 'Pos', 'Age', '90s', 'Total_Cmp', 'Total_Att',
       'Total_Cmp%', 'Total_TotDist', 'Total_PrgDist', 'Short_Cmp',
       'Short_Att', 'Short_Cmp%', 'Medium_Cmp', 'Medium_Att', 'Medium_Cmp%',
       'Long_Cmp', 'Long_Att', 'Long_Cmp%', 'Ast', 'xAG', 'Expected_xA',
       'Expected_A-xAG', 'KP', '1/3', 'PPA', 'CrsPA', 'PrgP'],
      dtype='object')


In [20]:
df_Pass_Types = pd.DataFrame(data_final["Pass Types"])

df_Pass_Types['90s'] = pd.to_numeric(df_Pass_Types['90s'], errors='coerce') #converting all the datatypes

df_Pass_Types = df_Pass_Types[df_Pass_Types['90s'] >= 5.0].drop(columns=['Matches']) #has atleast had five 90s

df_Pass_Types.head(35)

Unnamed: 0,Player,Nation,Pos,Age,90s,Att,Pass Types_Live,Pass Types_Dead,Pass Types_FK,Pass Types_TB,Pass Types_Sw,Pass Types_Crs,Pass Types_TI,Pass Types_CK,Corner Kicks_In,Corner Kicks_Out,Corner Kicks_Str,Outcomes_Cmp,Outcomes_Off,Outcomes_Blocks
0,Vinicius Júnior,br BRA,FW,22.0,31.4,1153,1107,41,8,5,10,64,24,7,0,0,0,883,5,61
1,Thibaut Courtois,be BEL,GK,30.0,31.0,1136,942,192,40,0,3,0,0,0,0,0,0,980,2,1
2,Éder Militão,br BRA,DF,24.0,30.0,1881,1756,123,52,2,40,12,25,0,0,0,0,1660,2,20
3,Federico Valverde,uy URU,"MF,FW",24.0,27.8,1641,1611,21,6,3,22,27,14,0,0,0,0,1427,9,26
4,Antonio Rüdiger,de GER,DF,29.0,26.8,1617,1562,55,25,3,16,5,20,0,0,0,0,1486,0,14
5,Rodrygo,br BRA,"FW,MF",21.0,26.4,1050,991,51,6,2,6,22,9,18,3,2,0,918,8,24
6,Toni Kroos,de GER,MF,32.0,23.9,2482,2246,223,123,4,41,122,15,84,27,30,1,2255,13,12
7,Aurélien Tchouaméni,fr FRA,MF,22.0,23.5,1700,1657,41,40,7,7,12,1,0,0,0,0,1578,2,9
8,Karim Benzema,fr FRA,FW,34.0,22.6,1070,1012,52,4,9,7,3,4,9,0,0,0,924,6,25
9,Eduardo Camavinga,fr FRA,"MF,DF",19.0,22.3,1504,1389,107,34,4,6,6,73,0,0,0,0,1364,8,15


In [21]:
print(df_Pass_Types.columns)

Index(['Player', 'Nation', 'Pos', 'Age', '90s', 'Att', 'Pass Types_Live',
       'Pass Types_Dead', 'Pass Types_FK', 'Pass Types_TB', 'Pass Types_Sw',
       'Pass Types_Crs', 'Pass Types_TI', 'Pass Types_CK', 'Corner Kicks_In',
       'Corner Kicks_Out', 'Corner Kicks_Str', 'Outcomes_Cmp', 'Outcomes_Off',
       'Outcomes_Blocks'],
      dtype='object')


In [22]:
df_Goal_Shot_Creation = pd.DataFrame(data_final["Goal and Shot Creation"])

df_Goal_Shot_Creation['90s'] = pd.to_numeric(df_Goal_Shot_Creation['90s'], errors='coerce') #converting all the datatypes

df_Goal_Shot_Creation = df_Goal_Shot_Creation[df_Goal_Shot_Creation['90s'] >= 5.0].drop(columns=['Matches']) #has atleast had five 90s

df_Goal_Shot_Creation.head(35)

Unnamed: 0,Player,Nation,Pos,Age,90s,SCA_SCA,SCA_SCA90,SCA Types_PassLive,SCA Types_PassDead,SCA Types_TO,...,SCA Types_Fld,SCA Types_Def,GCA_GCA,GCA_GCA90,GCA Types_PassLive,GCA Types_PassDead,GCA Types_TO,GCA Types_Sh,GCA Types_Fld,GCA Types_Def
0,Vinicius Júnior,br BRA,FW,22.0,31.4,161,5.13,103,2,27,...,17,1,21,0.67,12,0,4,4,0,1
1,Thibaut Courtois,be BEL,GK,30.0,31.0,5,0.16,2,3,0,...,0,0,0,0.0,0,0,0,0,0,0
2,Éder Militão,br BRA,DF,24.0,30.0,22,0.73,19,2,0,...,1,0,4,0.13,3,0,0,0,1,0
3,Federico Valverde,uy URU,"MF,FW",24.0,27.8,81,2.91,77,0,2,...,1,0,14,0.5,13,0,1,0,0,0
4,Antonio Rüdiger,de GER,DF,29.0,26.8,18,0.67,16,0,0,...,1,0,1,0.04,0,0,0,1,0,0
5,Rodrygo,br BRA,"FW,MF",21.0,26.4,133,5.03,78,5,25,...,15,1,21,0.79,12,0,6,0,3,0
6,Toni Kroos,de GER,MF,32.0,23.9,103,4.3,64,33,0,...,2,1,6,0.25,5,1,0,0,0,0
7,Aurélien Tchouaméni,fr FRA,MF,22.0,23.5,63,2.67,53,1,2,...,3,1,12,0.51,11,0,0,0,1,0
8,Karim Benzema,fr FRA,FW,34.0,22.6,102,4.5,86,1,6,...,2,0,7,0.31,5,0,1,1,0,0
9,Eduardo Camavinga,fr FRA,"MF,DF",19.0,22.3,72,3.23,55,0,5,...,9,0,5,0.22,3,0,0,1,1,0


In [23]:
print(df_Goal_Shot_Creation.columns)

Index(['Player', 'Nation', 'Pos', 'Age', '90s', 'SCA_SCA', 'SCA_SCA90',
       'SCA Types_PassLive', 'SCA Types_PassDead', 'SCA Types_TO',
       'SCA Types_Sh', 'SCA Types_Fld', 'SCA Types_Def', 'GCA_GCA',
       'GCA_GCA90', 'GCA Types_PassLive', 'GCA Types_PassDead', 'GCA Types_TO',
       'GCA Types_Sh', 'GCA Types_Fld', 'GCA Types_Def'],
      dtype='object')


In [24]:
df_Defensive_Actions = pd.DataFrame(data_final["Defensive Actions"])

df_Defensive_Actions['90s'] = pd.to_numeric(df_Defensive_Actions['90s'], errors='coerce') #converting all the datatypes

df_Defensive_Actions = df_Defensive_Actions[df_Defensive_Actions['90s'] >= 5.0].drop(columns=['Matches']) #has atleast had five 90s

df_Defensive_Actions.head(35)

Unnamed: 0,Player,Nation,Pos,Age,90s,Tackles_Tkl,Tackles_TklW,Tackles_Def 3rd,Tackles_Mid 3rd,Tackles_Att 3rd,...,Challenges_Att,Challenges_Tkl%,Challenges_Lost,Blocks_Blocks,Blocks_Sh,Blocks_Pass,Int,Tkl+Int,Clr,Err
0,Vinicius Júnior,br BRA,FW,22.0,31.4,30,21,10,12,8,...,39,35.9,25,24,1,23,6,36,1,0
1,Thibaut Courtois,be BEL,GK,30.0,31.0,1,0,1,0,0,...,1,100.0,0,0,0,0,3,4,7,1
2,Éder Militão,br BRA,DF,24.0,30.0,46,32,25,18,3,...,33,78.8,7,67,39,28,42,88,104,2
3,Federico Valverde,uy URU,"MF,FW",24.0,27.8,31,20,14,11,6,...,26,50.0,13,31,4,27,17,48,15,0
4,Antonio Rüdiger,de GER,DF,29.0,26.8,18,16,12,5,1,...,23,60.9,9,23,13,10,19,37,74,0
5,Rodrygo,br BRA,"FW,MF",21.0,26.4,17,10,3,7,7,...,23,17.4,19,23,0,23,8,25,5,0
6,Toni Kroos,de GER,MF,32.0,23.9,51,32,16,33,2,...,69,33.3,46,23,4,19,24,75,11,0
7,Aurélien Tchouaméni,fr FRA,MF,22.0,23.5,64,40,23,36,5,...,38,60.5,15,20,6,14,49,113,50,0
8,Karim Benzema,fr FRA,FW,34.0,22.6,9,5,2,4,3,...,11,18.2,9,11,0,11,2,11,5,0
9,Eduardo Camavinga,fr FRA,"MF,DF",19.0,22.3,68,48,32,21,15,...,64,37.5,40,24,5,19,17,85,25,0


In [25]:
print(df_Defensive_Actions.columns)

Index(['Player', 'Nation', 'Pos', 'Age', '90s', 'Tackles_Tkl', 'Tackles_TklW',
       'Tackles_Def 3rd', 'Tackles_Mid 3rd', 'Tackles_Att 3rd',
       'Challenges_Tkl', 'Challenges_Att', 'Challenges_Tkl%',
       'Challenges_Lost', 'Blocks_Blocks', 'Blocks_Sh', 'Blocks_Pass', 'Int',
       'Tkl+Int', 'Clr', 'Err'],
      dtype='object')


In [26]:
df_Possession = pd.DataFrame(data_final["Possession"])

df_Possession['90s'] = pd.to_numeric(df_Possession['90s'], errors='coerce') #converting all the datatypes

df_Possession = df_Possession[df_Possession['90s'] >= 5.0].drop(columns=['Matches']) #has atleast had five 90s

df_Possession.head(35)

Unnamed: 0,Player,Nation,Pos,Age,90s,Touches_Touches,Touches_Def Pen,Touches_Def 3rd,Touches_Mid 3rd,Touches_Att 3rd,...,Carries_Carries,Carries_TotDist,Carries_PrgDist,Carries_PrgC,Carries_1/3,Carries_CPA,Carries_Mis,Carries_Dis,Receiving_Rec,Receiving_PrgR
0,Vinicius Júnior,br BRA,FW,22.0,31.4,1707,3,94,444,1190,...,1357,10854,5658,221,99,140,103,61,1460,495
1,Thibaut Courtois,be BEL,GK,30.0,31.0,1209,1020,1205,5,0,...,824,4223,2619,0,0,0,2,0,704,0
2,Éder Militão,br BRA,DF,24.0,30.0,2216,314,1088,984,157,...,1378,7011,3877,28,20,1,16,6,1505,16
3,Federico Valverde,uy URU,"MF,FW",24.0,27.8,1870,35,229,857,800,...,1190,6633,3349,70,68,8,21,7,1485,135
4,Antonio Rüdiger,de GER,DF,29.0,26.8,1814,194,907,840,79,...,1216,5991,3731,21,11,0,8,2,1339,10
5,Rodrygo,br BRA,"FW,MF",21.0,26.4,1400,3,56,537,823,...,1035,6613,3658,146,81,79,64,30,1198,268
6,Toni Kroos,de GER,MF,32.0,23.9,2633,58,531,1618,506,...,1796,8015,4384,51,41,3,10,12,2106,38
7,Aurélien Tchouaméni,fr FRA,MF,22.0,23.5,1990,94,470,1234,298,...,1291,6841,3344,41,43,4,24,12,1533,23
8,Karim Benzema,fr FRA,FW,34.0,22.6,1284,7,56,457,776,...,796,3734,1900,72,43,31,32,28,1092,157
9,Eduardo Camavinga,fr FRA,"MF,DF",19.0,22.3,1770,51,372,942,485,...,1162,6589,3684,69,77,12,39,28,1326,99


In [27]:
print(df_Possession.columns)

Index(['Player', 'Nation', 'Pos', 'Age', '90s', 'Touches_Touches',
       'Touches_Def Pen', 'Touches_Def 3rd', 'Touches_Mid 3rd',
       'Touches_Att 3rd', 'Touches_Att Pen', 'Touches_Live', 'Take-Ons_Att',
       'Take-Ons_Succ', 'Take-Ons_Succ%', 'Take-Ons_Tkld', 'Take-Ons_Tkld%',
       'Carries_Carries', 'Carries_TotDist', 'Carries_PrgDist', 'Carries_PrgC',
       'Carries_1/3', 'Carries_CPA', 'Carries_Mis', 'Carries_Dis',
       'Receiving_Rec', 'Receiving_PrgR'],
      dtype='object')


In [29]:
df_Playing_Time = pd.DataFrame(data_final["Playing Time"])
df_Playing_Time

Unnamed: 0,Player,Nation,Pos,Age,MP,Playing Time_Min,Playing Time_Mn/MP,Playing Time_Min%,Playing Time_90s,Starts_Starts,...,Team Success_onGA,Team Success_+/-,Team Success_+/-90,Team Success_On-Off,Team Success (xG)_onxG,Team Success (xG)_onxGA,Team Success (xG)_xG+/-,Team Success (xG)_xG+/-90,Team Success (xG)_On-Off,Matches
0,Vinicius Júnior,br BRA,FW,22.0,33,2823.0,86.0,82.5,31.4,32,...,32.0,26.0,0.83,-1.13,58.9,32.2,26.8,0.85,-0.63,Matches
1,Thibaut Courtois,be BEL,GK,30.0,31,2790.0,90.0,81.6,31.0,31,...,29.0,32.0,1.03,0.03,61.5,32.3,29.2,0.94,-0.11,Matches
2,Éder Militão,br BRA,DF,24.0,33,2703.0,82.0,79.0,30.0,30,...,29.0,31.0,1.03,0.03,60.7,33.0,27.6,0.92,-0.2,Matches
3,Federico Valverde,uy URU,"MF,FW",24.0,34,2502.0,74.0,73.2,27.8,29,...,25.0,26.0,0.94,-0.34,52.7,26.7,26.0,0.93,-0.1,Matches
4,Antonio Rüdiger,de GER,DF,29.0,33,2415.0,73.0,70.6,26.8,26,...,25.0,19.0,0.71,-1.08,51.1,27.2,23.8,0.89,-0.25,Matches
5,Rodrygo,br BRA,"FW,MF",21.0,34,2379.0,70.0,69.6,26.4,25,...,27.0,27.0,1.02,-0.02,56.8,26.6,30.2,1.14,0.59,Matches
6,Toni Kroos,de GER,MF,32.0,30,2155.0,72.0,63.0,23.9,25,...,23.0,25.0,1.04,0.05,43.8,26.7,17.1,0.71,-0.67,Matches
7,Aurélien Tchouaméni,fr FRA,MF,22.0,33,2119.0,64.0,62.0,23.5,24,...,27.0,24.0,1.02,-0.02,46.3,25.3,21.0,0.89,-0.19,Matches
8,Karim Benzema,fr FRA,FW,34.0,24,2038.0,85.0,59.6,22.6,24,...,17.0,29.0,1.28,0.63,47.5,23.9,23.6,1.04,0.2,Matches
9,Eduardo Camavinga,fr FRA,"MF,DF",19.0,37,2010.0,54.0,58.8,22.3,21,...,16.0,35.0,1.57,1.31,47.6,20.8,26.8,1.2,0.58,Matches


In [30]:
df_Playing_Time = pd.DataFrame(data_final["Playing Time"])

df_Playing_Time['MP'] = pd.to_numeric(df_Playing_Time['MP'], errors='coerce') #converting all the datatypes

df_Playing_Time = df_Playing_Time[df_Playing_Time['MP'] >= 5].drop(columns=['Matches']) #atleast played 5 matches

df_Playing_Time.head(35)

Unnamed: 0,Player,Nation,Pos,Age,MP,Playing Time_Min,Playing Time_Mn/MP,Playing Time_Min%,Playing Time_90s,Starts_Starts,...,Team Success_onG,Team Success_onGA,Team Success_+/-,Team Success_+/-90,Team Success_On-Off,Team Success (xG)_onxG,Team Success (xG)_onxGA,Team Success (xG)_xG+/-,Team Success (xG)_xG+/-90,Team Success (xG)_On-Off
0,Vinicius Júnior,br BRA,FW,22.0,33,2823,86,82.5,31.4,32,...,58,32,26,0.83,-1.13,58.9,32.2,26.8,0.85,-0.63
1,Thibaut Courtois,be BEL,GK,30.0,31,2790,90,81.6,31.0,31,...,61,29,32,1.03,0.03,61.5,32.3,29.2,0.94,-0.11
2,Éder Militão,br BRA,DF,24.0,33,2703,82,79.0,30.0,30,...,60,29,31,1.03,0.03,60.7,33.0,27.6,0.92,-0.2
3,Federico Valverde,uy URU,"MF,FW",24.0,34,2502,74,73.2,27.8,29,...,51,25,26,0.94,-0.34,52.7,26.7,26.0,0.93,-0.1
4,Antonio Rüdiger,de GER,DF,29.0,33,2415,73,70.6,26.8,26,...,44,25,19,0.71,-1.08,51.1,27.2,23.8,0.89,-0.25
5,Rodrygo,br BRA,"FW,MF",21.0,34,2379,70,69.6,26.4,25,...,54,27,27,1.02,-0.02,56.8,26.6,30.2,1.14,0.59
6,Toni Kroos,de GER,MF,32.0,30,2155,72,63.0,23.9,25,...,48,23,25,1.04,0.05,43.8,26.7,17.1,0.71,-0.67
7,Aurélien Tchouaméni,fr FRA,MF,22.0,33,2119,64,62.0,23.5,24,...,51,27,24,1.02,-0.02,46.3,25.3,21.0,0.89,-0.19
8,Karim Benzema,fr FRA,FW,34.0,24,2038,85,59.6,22.6,24,...,46,17,29,1.28,0.63,47.5,23.9,23.6,1.04,0.2
9,Eduardo Camavinga,fr FRA,"MF,DF",19.0,37,2010,54,58.8,22.3,21,...,51,16,35,1.57,1.31,47.6,20.8,26.8,1.2,0.58


In [31]:
print(df_Playing_Time.columns)

Index(['Player', 'Nation', 'Pos', 'Age', 'MP', 'Playing Time_Min',
       'Playing Time_Mn/MP', 'Playing Time_Min%', 'Playing Time_90s',
       'Starts_Starts', 'Starts_Mn/Start', 'Starts_Compl', 'Subs_Subs',
       'Subs_Mn/Sub', 'Subs_unSub', 'Team Success_PPM', 'Team Success_onG',
       'Team Success_onGA', 'Team Success_+/-', 'Team Success_+/-90',
       'Team Success_On-Off', 'Team Success (xG)_onxG',
       'Team Success (xG)_onxGA', 'Team Success (xG)_xG+/-',
       'Team Success (xG)_xG+/-90', 'Team Success (xG)_On-Off'],
      dtype='object')


In [32]:
df_Miscellaneous_Stats = pd.DataFrame(data_final["Miscellaneous Stats"])

df_Miscellaneous_Stats['90s'] = pd.to_numeric(df_Miscellaneous_Stats['90s'], errors='coerce') #converting all the datatypes

df_Miscellaneous_Stats = df_Miscellaneous_Stats[df_Miscellaneous_Stats['90s'] >= 5.0].drop(columns=['Matches']) #has atleast had five 90s

df_Miscellaneous_Stats.head(35)

Unnamed: 0,Player,Nation,Pos,Age,90s,Performance_CrdY,Performance_CrdR,Performance_2CrdY,Performance_Fls,Performance_Fld,...,Performance_Crs,Performance_Int,Performance_TklW,Performance_PKwon,Performance_PKcon,Performance_OG,Performance_Recov,Aerial Duels_Won,Aerial Duels_Lost,Aerial Duels_Won%
0,Vinicius Júnior,br BRA,FW,22.0,31.4,10,1,0,47,122,...,64,6,21,1,0,0,98,2,6,25.0
1,Thibaut Courtois,be BEL,GK,30.0,31.0,0,0,0,0,3,...,0,3,0,0,0,0,59,9,0,100.0
2,Éder Militão,br BRA,DF,24.0,30.0,4,0,0,35,38,...,12,42,32,1,1,0,159,64,34,65.3
3,Federico Valverde,uy URU,"MF,FW",24.0,27.8,2,0,0,11,15,...,27,17,20,0,0,0,132,31,11,73.8
4,Antonio Rüdiger,de GER,DF,29.0,26.8,1,0,0,13,7,...,5,19,16,0,0,0,158,37,22,62.7
5,Rodrygo,br BRA,"FW,MF",21.0,26.4,4,0,0,18,49,...,22,8,10,2,0,0,63,14,23,37.8
6,Toni Kroos,de GER,MF,32.0,23.9,2,1,1,28,36,...,122,24,32,0,1,0,154,7,4,63.6
7,Aurélien Tchouaméni,fr FRA,MF,22.0,23.5,2,0,0,35,35,...,12,49,40,0,0,0,123,48,24,66.7
8,Karim Benzema,fr FRA,FW,34.0,22.6,1,0,0,10,6,...,3,2,5,2,0,0,35,14,13,51.9
9,Eduardo Camavinga,fr FRA,"MF,DF",19.0,22.3,6,0,0,39,69,...,6,17,48,0,0,0,136,22,11,66.7


In [33]:
print(df_Miscellaneous_Stats.columns)

Index(['Player', 'Nation', 'Pos', 'Age', '90s', 'Performance_CrdY',
       'Performance_CrdR', 'Performance_2CrdY', 'Performance_Fls',
       'Performance_Fld', 'Performance_Off', 'Performance_Crs',
       'Performance_Int', 'Performance_TklW', 'Performance_PKwon',
       'Performance_PKcon', 'Performance_OG', 'Performance_Recov',
       'Aerial Duels_Won', 'Aerial Duels_Lost', 'Aerial Duels_Won%'],
      dtype='object')


In [34]:
df_Regular_Season = pd.DataFrame(data_final["Regular season"])
df_Regular_Season.head()

Unnamed: 0,Rk,Squad,MP,W,D,L,GF,GA,GD,Pts,Pts/MP,xG,xGA,xGD,xGD/90,Attendance,Top Team Scorer,Goalkeeper,Notes
0,1,Barcelona,38,28,4,6,70,20,50,88,2.32,75.5,33.2,42.3,1.11,83498,Robert Lewandowski - 23,Marc-André ter Stegen,→ Champions League via league finish
1,2,Real Madrid,38,24,6,8,75,36,39,78,2.05,75.5,38.9,36.6,0.96,56649,Karim Benzema - 19,Thibaut Courtois,→ Champions League via league finish
2,3,Atlético Madrid,38,23,8,7,70,33,37,77,2.03,61.9,41.1,20.8,0.55,55800,Antoine Griezmann - 15,Jan Oblak,→ Champions League via league finish
3,4,Real Sociedad,38,21,8,9,51,35,16,71,1.87,52.9,33.0,19.8,0.52,32189,Alexander Sørloth - 12,Álex Remiro,→ Champions League via league finish


In [35]:
print(df_Regular_Season.columns)

Index(['Rk', 'Squad', 'MP', 'W', 'D', 'L', 'GF', 'GA', 'GD', 'Pts', 'Pts/MP',
       'xG', 'xGA', 'xGD', 'xGD/90', 'Attendance', 'Top Team Scorer',
       'Goalkeeper', 'Notes'],
      dtype='object')


## FORWARDS

KEY PLAYERS

In [36]:
df_key_players = df_Standard_Stats.iloc[:, :7]
df_key_players

Unnamed: 0,Player,Nation,Pos,Age,MP,Playing Time_Starts,Playing Time_Min
0,Vinicius Júnior,br BRA,FW,22.0,33,32,2823
1,Thibaut Courtois,be BEL,GK,30.0,31,31,2790
2,Éder Militão,br BRA,DF,24.0,33,30,2703
3,Federico Valverde,uy URU,"MF,FW",24.0,34,29,2502
4,Antonio Rüdiger,de GER,DF,29.0,33,26,2415
5,Rodrygo,br BRA,"FW,MF",21.0,34,25,2379
6,Toni Kroos,de GER,MF,32.0,30,25,2155
7,Aurélien Tchouaméni,fr FRA,MF,22.0,33,24,2119
8,Karim Benzema,fr FRA,FW,34.0,24,24,2038
9,Eduardo Camavinga,fr FRA,"MF,DF",19.0,37,21,2010


GOAL INVOLVEMENT

In [38]:
goal_involvement_column = ['Player','Age','MP','Performance_Gls','Performance_Ast','Performance_G+A']
df_goal_involvement = df_Standard_Stats[goal_involvement_column].copy()
df_goal_involvement

Unnamed: 0,Player,Age,MP,Performance_Gls,Performance_Ast,Performance_G+A
0,Vinicius Júnior,22.0,33,10,9,19
1,Thibaut Courtois,30.0,31,0,0,0
2,Éder Militão,24.0,33,5,0,5
3,Federico Valverde,24.0,34,7,4,11
4,Antonio Rüdiger,29.0,33,1,0,1
5,Rodrygo,21.0,34,9,8,17
6,Toni Kroos,32.0,30,2,4,6
7,Aurélien Tchouaméni,22.0,33,0,4,4
8,Karim Benzema,34.0,24,19,3,22
9,Eduardo Camavinga,19.0,37,0,1,1


PENALTY CONVERSION

In [39]:
penalty_conversion_columns = ['Player', 'MP', 'Performance_PK', 'Performance_PKatt']
df_penalty_conversions = df_Standard_Stats[penalty_conversion_columns].copy()
df_penalty_conversions

Unnamed: 0,Player,MP,Performance_PK,Performance_PKatt
0,Vinicius Júnior,33,0,0
1,Thibaut Courtois,31,0,0
2,Éder Militão,33,0,0
3,Federico Valverde,34,0,0
4,Antonio Rüdiger,33,0,0
5,Rodrygo,34,1,1
6,Toni Kroos,30,0,0
7,Aurélien Tchouaméni,33,0,0
8,Karim Benzema,24,7,8
9,Eduardo Camavinga,37,0,0


ATTACKING THREAT

In [40]:
attacking_threat_columns = ['Player', 'Expected_xG', 'Expected_xAG']
df_attacking_threat = df_Standard_Stats[attacking_threat_columns].copy()
df_attacking_threat

Unnamed: 0,Player,Expected_xG,Expected_xAG
0,Vinicius Júnior,10.5,9.4
1,Thibaut Courtois,0.0,0.4
2,Éder Militão,2.4,0.6
3,Federico Valverde,4.3,6.3
4,Antonio Rüdiger,1.8,0.1
5,Rodrygo,13.7,7.0
6,Toni Kroos,1.4,4.4
7,Aurélien Tchouaméni,1.8,2.2
8,Karim Benzema,21.5,5.8
9,Eduardo Camavinga,0.8,1.5


PROGRESSIVE THREAT

In [41]:
progressive_threat_columns = ['Player','Progression_PrgC','Progression_PrgP','Progression_PrgR']
df_progressive_threat = df_Standard_Stats[progressive_threat_columns].copy()
df_progressive_threat

Unnamed: 0,Player,Progression_PrgC,Progression_PrgP,Progression_PrgR
0,Vinicius Júnior,221,87,495
1,Thibaut Courtois,0,0,0
2,Éder Militão,28,98,16
3,Federico Valverde,70,151,135
4,Antonio Rüdiger,21,59,10
5,Rodrygo,146,81,268
6,Toni Kroos,51,319,38
7,Aurélien Tchouaméni,41,142,23
8,Karim Benzema,72,138,157
9,Eduardo Camavinga,69,165,99


SHOT EFFICIENCY

In [42]:
shot_efficiency_columns = ['Player','Standard_Gls','Standard_Sh','Standard_SoT']
df_shot_efficiency = df_Shooting[shot_efficiency_columns].copy()
df_shot_efficiency

Unnamed: 0,Player,Standard_Gls,Standard_Sh,Standard_SoT
0,Vinicius Júnior,10,77,36
1,Thibaut Courtois,0,0,0
2,Éder Militão,5,22,8
3,Federico Valverde,7,63,21
4,Antonio Rüdiger,1,22,4
5,Rodrygo,9,97,31
6,Toni Kroos,2,31,11
7,Aurélien Tchouaméni,0,38,6
8,Karim Benzema,19,99,36
9,Eduardo Camavinga,0,18,3


# filling nulll values with "0"

In [43]:
df_Playing_Time

Unnamed: 0,Player,Nation,Pos,Age,MP,Playing Time_Min,Playing Time_Mn/MP,Playing Time_Min%,Playing Time_90s,Starts_Starts,...,Team Success_onG,Team Success_onGA,Team Success_+/-,Team Success_+/-90,Team Success_On-Off,Team Success (xG)_onxG,Team Success (xG)_onxGA,Team Success (xG)_xG+/-,Team Success (xG)_xG+/-90,Team Success (xG)_On-Off
0,Vinicius Júnior,br BRA,FW,22.0,33,2823,86,82.5,31.4,32,...,58,32,26,0.83,-1.13,58.9,32.2,26.8,0.85,-0.63
1,Thibaut Courtois,be BEL,GK,30.0,31,2790,90,81.6,31.0,31,...,61,29,32,1.03,0.03,61.5,32.3,29.2,0.94,-0.11
2,Éder Militão,br BRA,DF,24.0,33,2703,82,79.0,30.0,30,...,60,29,31,1.03,0.03,60.7,33.0,27.6,0.92,-0.2
3,Federico Valverde,uy URU,"MF,FW",24.0,34,2502,74,73.2,27.8,29,...,51,25,26,0.94,-0.34,52.7,26.7,26.0,0.93,-0.1
4,Antonio Rüdiger,de GER,DF,29.0,33,2415,73,70.6,26.8,26,...,44,25,19,0.71,-1.08,51.1,27.2,23.8,0.89,-0.25
5,Rodrygo,br BRA,"FW,MF",21.0,34,2379,70,69.6,26.4,25,...,54,27,27,1.02,-0.02,56.8,26.6,30.2,1.14,0.59
6,Toni Kroos,de GER,MF,32.0,30,2155,72,63.0,23.9,25,...,48,23,25,1.04,0.05,43.8,26.7,17.1,0.71,-0.67
7,Aurélien Tchouaméni,fr FRA,MF,22.0,33,2119,64,62.0,23.5,24,...,51,27,24,1.02,-0.02,46.3,25.3,21.0,0.89,-0.19
8,Karim Benzema,fr FRA,FW,34.0,24,2038,85,59.6,22.6,24,...,46,17,29,1.28,0.63,47.5,23.9,23.6,1.04,0.2
9,Eduardo Camavinga,fr FRA,"MF,DF",19.0,37,2010,54,58.8,22.3,21,...,51,16,35,1.57,1.31,47.6,20.8,26.8,1.2,0.58


In [50]:
df_Playing_Time.isnull().sum()

Player                       0
Nation                       0
Pos                          0
Age                          0
MP                           0
Playing Time_Min             0
Playing Time_Mn/MP           0
Playing Time_Min%            0
Playing Time_90s             0
Starts_Starts                0
Starts_Mn/Start              0
Starts_Compl                 0
Subs_Subs                    0
Subs_Mn/Sub                  0
Subs_unSub                   0
Team Success_PPM             0
Team Success_onG             0
Team Success_onGA            0
Team Success_+/-             0
Team Success_+/-90           0
Team Success_On-Off          0
Team Success (xG)_onxG       0
Team Success (xG)_onxGA      0
Team Success (xG)_xG+/-      0
Team Success (xG)_xG+/-90    0
Team Success (xG)_On-Off     0
dtype: int64

In [51]:
df_Scores_Fixtures.isnull().sum()

Date          0
Time          0
Comp          0
Round         0
Day           0
Venue         0
Result        0
GF            0
GA            0
Opponent      0
xG            0
xGA           0
Poss          0
Attendance    0
Captain       0
Formation     0
Referee       0
dtype: int64

In [52]:
df_Goalkeeping.isnull().sum()

Player                 0
Nation                 0
Pos                    0
Age                    0
Playing Time_MP        0
Playing Time_Starts    0
Playing Time_Min       0
Playing Time_90s       0
Performance_GA         0
Performance_GA90       0
Performance_SoTA       0
Performance_Saves      0
Performance_Save%      0
Performance_W          0
Performance_D          0
Performance_L          0
Performance_CS         0
Performance_CS%        0
Penalty Kicks_PKatt    0
Penalty Kicks_PKA      0
Penalty Kicks_PKsv     0
Penalty Kicks_PKm      0
Penalty Kicks_Save%    0
dtype: int64

In [53]:
df_Advanced_Goalkeeping.isnull().sum()

Player                0
Nation                0
Pos                   0
Age                   0
90s                   0
Goals_GA              0
Goals_PKA             0
Goals_FK              0
Goals_CK              0
Goals_OG              0
Expected_PSxG         0
Expected_PSxG/SoT     0
Expected_PSxG+/-      0
Expected_/90          0
Launched_Cmp          0
Launched_Att          0
Launched_Cmp%         0
Passes_Att (GK)       0
Passes_Thr            0
Passes_Launch%        0
Passes_AvgLen         0
Goal Kicks_Att        0
Goal Kicks_Launch%    0
Goal Kicks_AvgLen     0
Crosses_Opp           0
Crosses_Stp           0
Crosses_Stp%          0
Sweeper_#OPA          0
Sweeper_#OPA/90       0
Sweeper_AvgDist       0
dtype: int64

In [54]:
df_Passing.isnull().sum()

Player            0
Nation            0
Pos               0
Age               0
90s               0
Total_Cmp         0
Total_Att         0
Total_Cmp%        0
Total_TotDist     0
Total_PrgDist     0
Short_Cmp         0
Short_Att         0
Short_Cmp%        0
Medium_Cmp        0
Medium_Att        0
Medium_Cmp%       0
Long_Cmp          0
Long_Att          0
Long_Cmp%         0
Ast               0
xAG               0
Expected_xA       0
Expected_A-xAG    0
KP                0
1/3               0
PPA               0
CrsPA             0
PrgP              0
dtype: int64

In [55]:
df_Pass_Types.isnull().sum()

Player              0
Nation              0
Pos                 0
Age                 0
90s                 0
Att                 0
Pass Types_Live     0
Pass Types_Dead     0
Pass Types_FK       0
Pass Types_TB       0
Pass Types_Sw       0
Pass Types_Crs      0
Pass Types_TI       0
Pass Types_CK       0
Corner Kicks_In     0
Corner Kicks_Out    0
Corner Kicks_Str    0
Outcomes_Cmp        0
Outcomes_Off        0
Outcomes_Blocks     0
dtype: int64

In [56]:
df_Goal_Shot_Creation.isnull().sum()

Player                0
Nation                0
Pos                   0
Age                   0
90s                   0
SCA_SCA               0
SCA_SCA90             0
SCA Types_PassLive    0
SCA Types_PassDead    0
SCA Types_TO          0
SCA Types_Sh          0
SCA Types_Fld         0
SCA Types_Def         0
GCA_GCA               0
GCA_GCA90             0
GCA Types_PassLive    0
GCA Types_PassDead    0
GCA Types_TO          0
GCA Types_Sh          0
GCA Types_Fld         0
GCA Types_Def         0
dtype: int64

In [57]:
df_Defensive_Actions.isnull().sum()

Player             0
Nation             0
Pos                0
Age                0
90s                0
Tackles_Tkl        0
Tackles_TklW       0
Tackles_Def 3rd    0
Tackles_Mid 3rd    0
Tackles_Att 3rd    0
Challenges_Tkl     0
Challenges_Att     0
Challenges_Tkl%    0
Challenges_Lost    0
Blocks_Blocks      0
Blocks_Sh          0
Blocks_Pass        0
Int                0
Tkl+Int            0
Clr                0
Err                0
dtype: int64

In [58]:
df_Possession.isnull().sum()

Player             0
Nation             0
Pos                0
Age                0
90s                0
Touches_Touches    0
Touches_Def Pen    0
Touches_Def 3rd    0
Touches_Mid 3rd    0
Touches_Att 3rd    0
Touches_Att Pen    0
Touches_Live       0
Take-Ons_Att       0
Take-Ons_Succ      0
Take-Ons_Succ%     0
Take-Ons_Tkld      0
Take-Ons_Tkld%     0
Carries_Carries    0
Carries_TotDist    0
Carries_PrgDist    0
Carries_PrgC       0
Carries_1/3        0
Carries_CPA        0
Carries_Mis        0
Carries_Dis        0
Receiving_Rec      0
Receiving_PrgR     0
dtype: int64

In [59]:
df_Playing_Time.isnull().sum()

Player                       0
Nation                       0
Pos                          0
Age                          0
MP                           0
Playing Time_Min             0
Playing Time_Mn/MP           0
Playing Time_Min%            0
Playing Time_90s             0
Starts_Starts                0
Starts_Mn/Start              0
Starts_Compl                 0
Subs_Subs                    0
Subs_Mn/Sub                  0
Subs_unSub                   0
Team Success_PPM             0
Team Success_onG             0
Team Success_onGA            0
Team Success_+/-             0
Team Success_+/-90           0
Team Success_On-Off          0
Team Success (xG)_onxG       0
Team Success (xG)_onxGA      0
Team Success (xG)_xG+/-      0
Team Success (xG)_xG+/-90    0
Team Success (xG)_On-Off     0
dtype: int64

In [60]:
df_Miscellaneous_Stats.isnull().sum()

Player               0
Nation               0
Pos                  0
Age                  0
90s                  0
Performance_CrdY     0
Performance_CrdR     0
Performance_2CrdY    0
Performance_Fls      0
Performance_Fld      0
Performance_Off      0
Performance_Crs      0
Performance_Int      0
Performance_TklW     0
Performance_PKwon    0
Performance_PKcon    0
Performance_OG       0
Performance_Recov    0
Aerial Duels_Won     0
Aerial Duels_Lost    0
Aerial Duels_Won%    0
dtype: int64

In [61]:
df_Regular_Season.isnull().sum()

Rk                 0
Squad              0
MP                 0
W                  0
D                  0
L                  0
GF                 0
GA                 0
GD                 0
Pts                0
Pts/MP             0
xG                 0
xGA                0
xGD                0
xGD/90             0
Attendance         0
Top Team Scorer    0
Goalkeeper         0
Notes              0
dtype: int64