## DATA was SCRAPED from FBREF website
* standings_url = "https://fbref.com/en/comps/9/Premier-League-Stats"

In [3]:
import requests
import os
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
import warnings 
warnings.filterwarnings('ignore')

In [4]:
import time

In [5]:
def grab_data(pattern,data_main ,match):
    soup = BeautifulSoup(data_main.text)
    links = soup.find_all('a')
    links = [l.get("href") for l in links]
    links = [l for l in links if l and 'all_comps/'+pattern+'/' in l]
    data_in = requests.get(f"https://fbref.com{links[0]}")
    df = pd.read_html(data_in.text, match=match)[0]
    print(f"Got {match} data sucessfully")
    time.sleep(1)
    return df

In [6]:
def clean_data(df,renamed_col,df_name):
    df = df.copy()
    df.columns = df.columns.droplevel()
    df.columns = renamed_col
    df.drop(['Time', 'Comp', 'Round', 'Day', 'Venue', 'Result', 'GF', 'GA','Opponent','Match Report'],axis=1,inplace=True)
    print(f"Shape  {df_name} : " ,df.shape)
    return df

In [7]:
    Shooting_common_col = ['Date', 'Time', 'Comp', 'Round', 'Day', 'Venue', 'Result', 'GF', 'GA','Opponent']
    Shooting_Standard_col = ['Standard_'+col for col in   ['Gls', 'Sh', 'SoT', 'SoT%', 'G/Sh', 'G/SoT', 'Dist', 'FK','PK', 'PKatt']] 
    Shooting_Expected_col =  ['Expected_'+col for col in ['xG', 'npxG', 'npxG/Sh', 'G-xG', 'np:G-xG']] 
    Shooting_renamed_col = Shooting_common_col + Shooting_Standard_col + Shooting_Expected_col + ['Match Report']

In [8]:
    GSA_common_col = ['Date', 'Time', 'Comp', 'Round', 'Day', 'Venue', 'Result', 'GF', 'GA','Opponent']
    GSA_SCA_Types_col = ['SCA_Types_'+col for col in   ['SCA', 'PassLive', 'PassDead', 'Drib', 'Sh', 'Fld', 'Def']] 
    GSA_GCA_Types_col =  ['GCA_Types_'+col for col in ['GCA', 'PassLive', 'PassDead', 'Drib', 'Sh', 'Fld', 'Def']] 
    GSA_renamed_col= GSA_common_col + GSA_SCA_Types_col + GSA_GCA_Types_col + ['Match Report']

In [9]:
    Defensive_Actions_common_col = ['Date', 'Time', 'Comp', 'Round', 'Day', 'Venue', 'Result', 'GF', 'GA','Opponent']
    Defensive_Actions_Tackles_col = ['Tackles_'+col for col in   ['Tkl', 'TklW', 'Def_3rd', 'Mid_3rd', 'Att_3rd']] 
    Defensive_Actions_Vs_Dribbles_col =  ['Vs_Dribbles_'+col for col in [ 'Tkl','Att', 'Tkl%', 'Past',]]
    Defensive_Actions_Pressures_col =  ['Pressures_'+col for col in ['Press', 'Succ', '%', 'Def_3rd', 'Mid_3rd','Att_3rd']]
    Defensive_Actions_Blocks_col =  ['Blocks_'+col for col in ['Blocks', 'Sh', 'ShSv', 'Pass']]
    Defensive_Actions_Def_col =  ['Def_'+col for col in ['Int', 'Tkl+Int', 'Clr','Err']]
    Defensive_Actions_renamed_col= Defensive_Actions_common_col + Defensive_Actions_Tackles_col + Defensive_Actions_Vs_Dribbles_col + Defensive_Actions_Pressures_col + Defensive_Actions_Blocks_col + Defensive_Actions_Def_col + ['Match Report']

In [10]:
    Goalkeeping_cols=['Date', 'Time', 'Comp', 'Round', 'Day', 'Venue', 'Result', 'GF', 'GA','Opponent']
    gk_abrevations = ['GK_Perf_','GK_Penalty_','GK_Launch_','GK_Passes_','Gk_Goal_Kk_','GK_Crosses_','Gk_Sweeper_']
    GK_listings = [
        ['SoTA', 'GA', 'Saves', 'Save%', 'CS', 'PSxG', 'PSxG+/-'],#GK_Perf_
        ['PKatt', 'PKA', 'PKsv', 'PKm'],#GK_Penalty_
        ['Cmp', 'Att', 'Cmp%'],#GK_Launch_
        ['Att', 'Thr','Launch%', 'AvgLen'],#GK_Passes_
        ['Att', 'Launch%', 'AvgLen'],#Gk_Goal_Kk_
        ['Opp', 'Stp', 'Stp%'],#GK_Crosses_
        ['#OPA', 'AvgDist'],#Gk_Sweeper_
    ]

    for abv,listings in zip(gk_abrevations,GK_listings):
        new_cols = [abv+col for col in listings]
        Goalkeeping_cols.extend(new_cols)
    Goalkeeping_renamed_col = Goalkeeping_cols + ['Match Report']


In [11]:
    Passing_cols=['Date', 'Time', 'Comp', 'Round', 'Day', 'Venue', 'Result', 'GF', 'GA','Opponent']
    passing_abrevations = ['Passing_Total_','Passing_Short_','Passing_Medium_','Passing_Long_','Passing_']
    passing_listings = [
        ['Cmp', 'Att', 'Cmp%', 'TotDist', 'PrgDist'],#Passing_Total_
        ['Cmp', 'Att','Cmp%'],#Passing_Short_
        ['Cmp', 'Att', 'Cmp%'],#'Passing_Medium_'
        ['Cmp', 'Att', 'Cmp%'],#Passing_Long_
        ['Ast', 'xA', 'KP','1/3', 'PPA', 'CrsPA', 'Prog'],#Passing_
    ]

    for abv,listings in zip(passing_abrevations,passing_listings):
        new_cols = [abv+col for col in listings]
        Passing_cols.extend(new_cols)
    Passing_renamed_col = Passing_cols + ['Match Report']


In [12]:
    PassingType_cols=['Date', 'Time', 'Comp', 'Round', 'Day', 'Venue', 'Result', 'GF', 'GA','Opponent','Pass_Att']
    passingType_abrevations = ['PassType_','Corner_','Pass_Height_','PassWith_','Pass_Outome_']
    passingType_listings = [
        ['Live', 'Dead', 'FK', 'TB', 'Press', 'Sw', 'Crs','CK'],#PassType_
        ['In', 'Out', 'Str'],#Corner_
        [ 'Ground', 'Low', 'High'],#Pass_Height_
        ['Left', 'Right','Head', 'TI','Other'],#PassWith_
        ['Cmp', 'Off', 'Out', 'Int', 'Blocks'],#Pass_Outome_
    ]

    for abv,listings in zip(passingType_abrevations,passingType_listings):
        new_cols = [abv+col for col in listings]
        PassingType_cols.extend(new_cols)
    PassingType_renamed_col = PassingType_cols + ['Match Report']

In [13]:
    Possession_cols=['Date', 'Time', 'Comp', 'Round', 'Day', 'Venue', 'Result', 'GF', 'GA','Opponent', 'Poss']
    Possession_abrevations = ['Touches_','Dribbles_','Carries_','Receiving_',]
    Possession_listings = [
        ['Touches', 'Def_Pen', 'Def_3rd', 'Mid_3rd', 'Att_3rd', 'Att_Pen', 'Live'],#Touches_
        ['Succ', 'Att', 'Succ%', '#Pl', 'Megs'],#Dribbles_
        [ 'Carries', 'TotDist', 'PrgDist', 'Prog', '1/3', 'CPA', 'Mis', 'Dis'],#Carries_
        ['Targ', 'Rec', 'Rec%', 'Prog'],#Receiving_
    ]

    for abv,listings in zip(Possession_abrevations,Possession_listings):
        new_cols = [abv+col for col in listings]
        Possession_cols.extend(new_cols)
    Possession_renamed_col = Possession_cols + ['Match Report']

In [14]:
    Miscellaneous_Stats_cols=['Date', 'Time', 'Comp', 'Round', 'Day', 'Venue', 'Result', 'GF', 'GA','Opponent',]
    Miscellaneous_Stats_abrevations = ['Misc_Stats_','Misc_Stats_Aerial_Duels']
    Miscellaneous_Stats_listings = [
        ['CrdY', 'CrdR', '2CrdY', 'Fls', 'Fld', 'Off', 'Crs', 'Int','TklW', 'PKwon', 'PKcon', 'OG', 'Recov'],#Misc_Stats_
        ['Won', 'Lost', 'Won%'],#Misc_Stats_Aerial_Duels
    ]

    for abv,listings in zip(Miscellaneous_Stats_abrevations,Miscellaneous_Stats_listings):
        new_cols = [abv+col for col in listings]
        Miscellaneous_Stats_cols.extend(new_cols)
    Miscellaneous_Stats_renamed_col = Miscellaneous_Stats_cols + ['Match Report']

In [15]:
standings_url = "https://fbref.com/en/comps/9/Premier-League-Stats"

In [16]:
years = list(range(2022, 2014, -1))
all_matches = []

In [17]:
for i in years:
    print(i)

2022
2021
2020
2019
2018
2017
2016
2015


In [18]:
year_path_dict = dict()

In [19]:
    for year in years:    
        print('Year === : ',year)
        data = requests.get(standings_url)
        soup = BeautifulSoup(data.text)
        standings_table = soup.select('table.stats_table')[0]

        links = [l.get("href") for l in standings_table.find_all('a')]
        links = [l for l in links if '/squads/' in l]
        team_urls = [f"https://fbref.com{l}" for l in links]

        previous_season = soup.select("a.prev")[0].get("href")
        standings_url = f"https://fbref.com{previous_season}"
        team_paths = []
        for team_url in team_urls:
            team_name = team_url.split("/")[-1].replace("-Stats", "").replace("-", " ")
            data = requests.get(team_url)
            matches = pd.read_html(data.text, match="Scores & Fixtures")[0]
            soup = BeautifulSoup(data.text)
            print(f"Starting {year} {team_name} Process")
            a = matches.copy()

            shooting = grab_data(pattern='shooting',data_main=data ,match='Shooting')
            b = clean_data(df=shooting,renamed_col=Shooting_renamed_col,df_name='Shooting')

            GSA = grab_data(pattern='gca',data_main=data ,match="Goal and Shot")
            c = clean_data(df=GSA,renamed_col=GSA_renamed_col,df_name='GSA')

            DEF = grab_data(pattern='defense',data_main=data ,match="Defensive Actions")
            d = clean_data(df=DEF,renamed_col=Defensive_Actions_renamed_col,df_name='Defensive Actions')

            goalie = grab_data(pattern='keeper', data_main=data, match="Goalkeeping")
            e = clean_data(df=goalie,renamed_col=Goalkeeping_renamed_col,df_name='Goalkeeping')

            passing = grab_data(pattern='passing', data_main=data, match="Passing")
            f = clean_data(df=passing, renamed_col = Passing_renamed_col ,df_name='Passing')

            pass_types = grab_data(pattern='passing_types', data_main=data, match="Pass Types")
            g = clean_data(df=pass_types,renamed_col=PassingType_renamed_col,df_name='PassingType')

            possession = grab_data(pattern='possession', data_main=data, match="Possession")
            h = clean_data(df=possession,renamed_col=Possession_renamed_col,df_name='Possession')

            Misc_data = grab_data(pattern='misc', data_main=data, match="Miscellaneous Stats")
            i = clean_data(df=Misc_data,renamed_col=Miscellaneous_Stats_renamed_col ,df_name='Miscellaneous_Stats')

            #merge
            #a['Team'] = team_name 
            try:
                #team_data = matches.merge(shooting[shooting.columns.drop(drops)], on="Date")
                a['Team'] = team_name 
                #b['Team'] = team_name 
                #b.drop(['Date'],axis=1,inplace=True)
                ab = a.merge(b, on="Date")
                abc = ab.merge(c, on="Date")
                abcd = abc.merge(d, on="Date")
                abcde = abcd.merge(e, on="Date")
                #f['Team'] = team_name
                #f.drop(['Date'],axis=1,inplace=True)
                abcdef = abcde.merge(f, on="Date")
                abcdefg = abcdef.merge(g, on="Date")
                abcdefgh = abcdef.merge(h, on="Date")
                abcdefghi = abcdef.merge(i, on="Date")
                print(f"Merged df shape of {year} {team_name} : ",abcdefghi.shape)
                team_data = abcdefghi
            except ValueError:
                continue
            #team_data = team_data[team_data["Comp"] == "Premier League"]
            team_data["Season"] = year
            path = str(year)+team_name+".csv" 
            team_paths.append(path)
            team_data.to_csv(path)
            all_matches.append(team_data)
            time.sleep(1)
        year_path_dict[year]=team_paths

Year === :  2022
Starting 2022 Manchester City Process
Got Shooting data sucessfully
Shape  Shooting :  (59, 16)
Got Goal and Shot data sucessfully
Shape  GSA :  (59, 15)
Got Defensive Actions data sucessfully
Shape  Defensive Actions :  (59, 24)
Got Goalkeeping data sucessfully
Shape  Goalkeeping :  (59, 27)
Got Passing data sucessfully
Shape  Passing :  (59, 22)
Got Pass Types data sucessfully
Shape  PassingType :  (59, 26)
Got Possession data sucessfully
Shape  Possession :  (59, 26)
Got Miscellaneous Stats data sucessfully
Shape  Miscellaneous_Stats :  (59, 17)
Merged df shape of 2022 Manchester City :  (58, 135)
Starting 2022 Liverpool Process
Got Shooting data sucessfully
Shape  Shooting :  (63, 16)
Got Goal and Shot data sucessfully
Shape  GSA :  (63, 15)
Got Defensive Actions data sucessfully
Shape  Defensive Actions :  (63, 24)
Got Goalkeeping data sucessfully
Shape  Goalkeeping :  (63, 27)
Got Passing data sucessfully
Shape  Passing :  (63, 22)
Got Pass Types data sucessfully

Got Passing data sucessfully
Shape  Passing :  (42, 22)
Got Pass Types data sucessfully
Shape  PassingType :  (42, 26)
Got Possession data sucessfully
Shape  Possession :  (42, 26)
Got Miscellaneous Stats data sucessfully
Shape  Miscellaneous_Stats :  (42, 17)
Merged df shape of 2022 Aston Villa :  (41, 135)
Starting 2022 Southampton Process
Got Shooting data sucessfully
Shape  Shooting :  (46, 16)
Got Goal and Shot data sucessfully
Shape  GSA :  (46, 15)
Got Defensive Actions data sucessfully
Shape  Defensive Actions :  (46, 24)
Got Goalkeeping data sucessfully
Shape  Goalkeeping :  (46, 27)
Got Passing data sucessfully
Shape  Passing :  (46, 22)
Got Pass Types data sucessfully
Shape  PassingType :  (46, 26)
Got Possession data sucessfully
Shape  Possession :  (46, 26)
Got Miscellaneous Stats data sucessfully
Shape  Miscellaneous_Stats :  (46, 17)
Merged df shape of 2022 Southampton :  (45, 135)
Starting 2022 Everton Process
Got Shooting data sucessfully
Shape  Shooting :  (45, 16)
Go

Shape  Shooting :  (59, 16)
Got Goal and Shot data sucessfully
Shape  GSA :  (59, 15)
Got Defensive Actions data sucessfully
Shape  Defensive Actions :  (59, 24)
Got Goalkeeping data sucessfully
Shape  Goalkeeping :  (59, 27)
Got Passing data sucessfully
Shape  Passing :  (59, 22)
Got Pass Types data sucessfully
Shape  PassingType :  (59, 26)
Got Possession data sucessfully
Shape  Possession :  (59, 26)
Got Miscellaneous Stats data sucessfully
Shape  Miscellaneous_Stats :  (59, 17)
Merged df shape of 2021 Arsenal :  (58, 135)
Starting 2021 Leeds United Process
Got Shooting data sucessfully
Shape  Shooting :  (41, 16)
Got Goal and Shot data sucessfully
Shape  GSA :  (41, 15)
Got Defensive Actions data sucessfully
Shape  Defensive Actions :  (41, 24)
Got Goalkeeping data sucessfully
Shape  Goalkeeping :  (41, 27)
Got Passing data sucessfully
Shape  Passing :  (41, 22)
Got Pass Types data sucessfully
Shape  PassingType :  (41, 26)
Got Possession data sucessfully
Shape  Possession :  (41, 

Shape  PassingType :  (56, 26)
Got Possession data sucessfully
Shape  Possession :  (56, 26)
Got Miscellaneous Stats data sucessfully
Shape  Miscellaneous_Stats :  (56, 17)
Merged df shape of 2020 Liverpool :  (55, 135)
Starting 2020 Manchester City Process
Got Shooting data sucessfully
Shape  Shooting :  (60, 16)
Got Goal and Shot data sucessfully
Shape  GSA :  (60, 15)
Got Defensive Actions data sucessfully
Shape  Defensive Actions :  (60, 24)
Got Goalkeeping data sucessfully
Shape  Goalkeeping :  (60, 27)
Got Passing data sucessfully
Shape  Passing :  (60, 22)
Got Pass Types data sucessfully
Shape  PassingType :  (60, 26)
Got Possession data sucessfully
Shape  Possession :  (60, 26)
Got Miscellaneous Stats data sucessfully
Shape  Miscellaneous_Stats :  (60, 17)
Merged df shape of 2020 Manchester City :  (59, 135)
Starting 2020 Manchester United Process
Got Shooting data sucessfully
Shape  Shooting :  (62, 16)
Got Goal and Shot data sucessfully
Shape  GSA :  (62, 15)
Got Defensive Ac

Shape  GSA :  (42, 15)
Got Defensive Actions data sucessfully
Shape  Defensive Actions :  (42, 24)
Got Goalkeeping data sucessfully
Shape  Goalkeeping :  (42, 27)
Got Passing data sucessfully
Shape  Passing :  (42, 22)
Got Pass Types data sucessfully
Shape  PassingType :  (42, 26)
Got Possession data sucessfully
Shape  Possession :  (42, 26)
Got Miscellaneous Stats data sucessfully
Shape  Miscellaneous_Stats :  (42, 17)
Merged df shape of 2020 Brighton and Hove Albion :  (41, 135)
Starting 2020 West Ham United Process
Got Shooting data sucessfully
Shape  Shooting :  (43, 16)
Got Goal and Shot data sucessfully
Shape  GSA :  (43, 15)
Got Defensive Actions data sucessfully
Shape  Defensive Actions :  (43, 24)
Got Goalkeeping data sucessfully
Shape  Goalkeeping :  (43, 27)
Got Passing data sucessfully
Shape  Passing :  (43, 22)
Got Pass Types data sucessfully
Shape  PassingType :  (43, 26)
Got Possession data sucessfully
Shape  Possession :  (43, 26)
Got Miscellaneous Stats data sucessfull

Got Miscellaneous Stats data sucessfully
Shape  Miscellaneous_Stats :  (43, 17)
Merged df shape of 2019 Everton :  (42, 135)
Starting 2019 Leicester City Process
Got Shooting data sucessfully
Shape  Shooting :  (44, 16)
Got Goal and Shot data sucessfully
Shape  GSA :  (44, 15)
Got Defensive Actions data sucessfully
Shape  Defensive Actions :  (44, 24)
Got Goalkeeping data sucessfully
Shape  Goalkeeping :  (44, 27)
Got Passing data sucessfully
Shape  Passing :  (44, 22)
Got Pass Types data sucessfully
Shape  PassingType :  (44, 26)
Got Possession data sucessfully
Shape  Possession :  (44, 26)
Got Miscellaneous Stats data sucessfully
Shape  Miscellaneous_Stats :  (44, 17)
Merged df shape of 2019 Leicester City :  (43, 135)
Starting 2019 West Ham United Process
Got Shooting data sucessfully
Shape  Shooting :  (44, 16)
Got Goal and Shot data sucessfully
Shape  GSA :  (44, 15)
Got Defensive Actions data sucessfully
Shape  Defensive Actions :  (44, 24)
Got Goalkeeping data sucessfully
Shape 

ConnectionError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))

In [16]:
abcdefghi

Unnamed: 0,Date,Time,Comp,Round,Day,Venue,Result,GF,GA,Opponent,xG,xGA,Poss,Attendance,Captain,Formation,Referee,Match Report,Notes,Team,Standard_Gls,Standard_Sh,Standard_SoT,Standard_SoT%,Standard_G/Sh,Standard_G/SoT,Standard_Dist,Standard_FK,Standard_PK,Standard_PKatt,Expected_xG,Expected_npxG,Expected_npxG/Sh,Expected_G-xG,Expected_np:G-xG,SCA_Types_SCA,SCA_Types_PassLive,SCA_Types_PassDead,SCA_Types_Drib,SCA_Types_Sh,SCA_Types_Fld,SCA_Types_Def,GCA_Types_GCA,GCA_Types_PassLive,GCA_Types_PassDead,GCA_Types_Drib,GCA_Types_Sh,GCA_Types_Fld,GCA_Types_Def,Tackles_Tkl,Tackles_TklW,Tackles_Def_3rd,Tackles_Mid_3rd,Tackles_Att_3rd,Vs_Dribbles_Tkl,Vs_Dribbles_Att,Vs_Dribbles_Tkl%,Vs_Dribbles_Past,Pressures_Press,Pressures_Succ,Pressures_%,Pressures_Def_3rd,Pressures_Mid_3rd,Pressures_Att_3rd,Blocks_Blocks,Blocks_Sh,Blocks_ShSv,Blocks_Pass,Def_Int,Def_Tkl+Int,Def_Clr,Def_Err,GK_Perf_SoTA,GK_Perf_GA,GK_Perf_Saves,GK_Perf_Save%,GK_Perf_CS,GK_Perf_PSxG,GK_Perf_PSxG+/-,GK_Penalty_PKatt,GK_Penalty_PKA,GK_Penalty_PKsv,GK_Penalty_PKm,GK_Launch_Cmp,GK_Launch_Att,GK_Launch_Cmp%,GK_Passes_Att,GK_Passes_Thr,GK_Passes_Launch%,GK_Passes_AvgLen,Gk_Goal_Kk_Att,Gk_Goal_Kk_Launch%,Gk_Goal_Kk_AvgLen,GK_Crosses_Opp,GK_Crosses_Stp,GK_Crosses_Stp%,Gk_Sweeper_#OPA,Gk_Sweeper_AvgDist,Passing_Total_Cmp,Passing_Total_Att,Passing_Total_Cmp%,Passing_Total_TotDist,Passing_Total_PrgDist,Passing_Short_Cmp,Passing_Short_Att,Passing_Short_Cmp%,Passing_Medium_Cmp,Passing_Medium_Att,Passing_Medium_Cmp%,Passing_Long_Cmp,Passing_Long_Att,Passing_Long_Cmp%,Passing_Ast,Passing_xA,Passing_KP,Passing_1/3,Passing_PPA,Passing_CrsPA,Passing_Prog,Misc_Stats_CrdY,Misc_Stats_CrdR,Misc_Stats_2CrdY,Misc_Stats_Fls,Misc_Stats_Fld,Misc_Stats_Off,Misc_Stats_Crs,Misc_Stats_Int,Misc_Stats_TklW,Misc_Stats_PKwon,Misc_Stats_PKcon,Misc_Stats_OG,Misc_Stats_Recov,Misc_Stats_Aerial_DuelsWon,Misc_Stats_Aerial_DuelsLost,Misc_Stats_Aerial_DuelsWon%
0,2021-08-14,17:30,Premier League,Matchweek 1,Sat,Home,L,0,3,Liverpool,1.4,2.0,49,27023,Grant Hanley,4-3-3,Andre Marriner,Match Report,,Norwich City,0,15,4,26.7,0.00,0.00,17.3,0.0,0,0,1.4,1.4,0.12,-1.4,-1.4,19.0,16.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0,17,13.0,7.0,0.0,11.0,21.0,52.4,10.0,125.0,42.0,33.6,56.0,43.0,26.0,16.0,7.0,1.0,9.0,29,,31.0,0.0,6,3,3,50.0,0,1.6,-1.4,0,0,0,0,7.0,21.0,33.3,36.0,4.0,50.0,37.1,8.0,37.5,32.5,11.0,0.0,0.0,1.0,11.4,453.0,539.0,84.0,8884.0,2733.0,182.0,201.0,90.5,184.0,200.0,92.0,72.0,117.0,61.5,0,1.0,11.0,21.0,5.0,3.0,21.0,1,0,0,4,17,2,14,29,17,0.0,0.0,0,73.0,10.0,17.0,37.0
1,2021-08-14,17:30,Premier League,Matchweek 1,Sat,Home,L,0,3,Liverpool,1.4,2.0,49,27023,Grant Hanley,4-3-3,Andre Marriner,Match Report,,Norwich City,0,15,4,26.7,0.00,0.00,17.3,0.0,0,0,1.4,1.4,0.12,-1.4,-1.4,19.0,16.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0,17,13.0,7.0,0.0,11.0,21.0,52.4,10.0,125.0,42.0,33.6,56.0,43.0,26.0,16.0,7.0,1.0,9.0,29,,31.0,0.0,6,3,3,50.0,0,1.6,-1.4,0,0,0,0,7.0,21.0,33.3,36.0,4.0,50.0,37.1,8.0,37.5,32.5,11.0,0.0,0.0,1.0,11.4,317.0,379.0,83.6,6233.0,1792.0,126.0,136.0,92.6,147.0,156.0,94.2,42.0,78.0,53.8,0,0.0,0.0,11.0,0.0,0.0,9.0,1,0,0,4,17,2,14,29,17,0.0,0.0,0,73.0,10.0,17.0,37.0
2,2021-08-14,17:30,Premier League,Matchweek 1,Sat,Home,L,0,3,Liverpool,1.4,2.0,49,27023,Grant Hanley,4-3-3,Andre Marriner,Match Report,,Norwich City,0,15,4,26.7,0.00,0.00,17.3,0.0,0,0,1.4,1.4,0.12,-1.4,-1.4,19.0,16.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0,17,13.0,7.0,0.0,11.0,21.0,52.4,10.0,125.0,42.0,33.6,56.0,43.0,26.0,16.0,7.0,1.0,9.0,29,,31.0,0.0,6,3,3,50.0,0,1.6,-1.4,0,0,0,0,7.0,21.0,33.3,36.0,4.0,50.0,37.1,8.0,37.5,32.5,11.0,0.0,0.0,1.0,11.4,,,,,,,,,,,,,,,5,,,,,,,1,0,0,4,17,2,14,29,17,0.0,0.0,0,73.0,10.0,17.0,37.0
3,2021-08-14,17:30,Premier League,Matchweek 1,Sat,Home,L,0,3,Liverpool,1.4,2.0,49,27023,Grant Hanley,4-3-3,Andre Marriner,Match Report,,Norwich City,0,15,4,26.7,0.00,0.00,17.3,0.0,0,0,1.4,1.4,0.12,-1.4,-1.4,19.0,16.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0,17,13.0,7.0,0.0,11.0,21.0,52.4,10.0,125.0,42.0,33.6,56.0,43.0,26.0,16.0,7.0,1.0,9.0,29,,31.0,0.0,6,3,3,50.0,0,1.6,-1.4,0,0,0,0,7.0,21.0,33.3,36.0,4.0,50.0,37.1,8.0,37.5,32.5,11.0,0.0,0.0,1.0,11.4,307.0,399.0,76.9,6253.0,2149.0,111.0,129.0,86.0,137.0,162.0,84.6,50.0,96.0,52.1,0,0.7,9.0,18.0,8.0,3.0,25.0,1,0,0,4,17,2,14,29,17,0.0,0.0,0,73.0,10.0,17.0,37.0
4,2021-08-14,17:30,Premier League,Matchweek 1,Sat,Home,L,0,3,Liverpool,1.4,2.0,49,27023,Grant Hanley,4-3-3,Andre Marriner,Match Report,,Norwich City,0,15,4,26.7,0.00,0.00,17.3,0.0,0,0,1.4,1.4,0.12,-1.4,-1.4,19.0,16.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0,17,13.0,7.0,0.0,11.0,21.0,52.4,10.0,125.0,42.0,33.6,56.0,43.0,26.0,16.0,7.0,1.0,9.0,29,,31.0,0.0,6,3,3,50.0,0,1.6,-1.4,0,0,0,0,7.0,21.0,33.3,36.0,4.0,50.0,37.1,8.0,37.5,32.5,11.0,0.0,0.0,1.0,11.4,349.0,453.0,77.0,6591.0,2358.0,146.0,167.0,87.4,152.0,178.0,85.4,43.0,83.0,51.8,0,0.5,7.0,20.0,9.0,2.0,24.0,1,0,0,4,17,2,14,29,17,0.0,0.0,0,73.0,10.0,17.0,37.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83243,2022-05-22,16:00,Premier League,Matchweek 38,Sun,Home,L,0,5,Tottenham,0.3,3.7,40,27022,Grant Hanley,3-4-3,Chris Kavanagh,Match Report,,Norwich City,30,426,131,30.8,0.06,0.21,18.1,18.0,3,4,33.3,31.0,0.09,-3.3,-4.0,12.0,6.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.0,5,9.0,2.0,0.0,3.0,16.0,18.8,13.0,117.0,32.0,27.4,44.0,40.0,33.0,8.0,4.0,1.0,4.0,16,,7.0,1.0,12,5,7,58.3,0,4.6,-0.4,0,0,0,0,4.0,14.0,28.6,28.0,7.0,39.3,39.1,4.0,75.0,59.3,5.0,0.0,0.0,0.0,11.0,335.0,412.0,81.3,6189.0,2105.0,140.0,158.0,88.6,146.0,162.0,90.1,42.0,72.0,58.3,0,0.5,5.0,19.0,6.0,1.0,25.0,3,0,0,17,8,0,12,16,5,0.0,0.0,0,58.0,9.0,13.0,40.9
83244,2022-05-22,16:00,Premier League,Matchweek 38,Sun,Home,L,0,5,Tottenham,0.3,3.7,40,27022,Grant Hanley,3-4-3,Chris Kavanagh,Match Report,,Norwich City,30,426,131,30.8,0.06,0.21,18.1,18.0,3,4,33.3,31.0,0.09,-3.3,-4.0,12.0,6.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.0,5,9.0,2.0,0.0,3.0,16.0,18.8,13.0,117.0,32.0,27.4,44.0,40.0,33.0,8.0,4.0,1.0,4.0,16,,7.0,1.0,12,5,7,58.3,0,4.6,-0.4,0,0,0,0,4.0,14.0,28.6,28.0,7.0,39.3,39.1,4.0,75.0,59.3,5.0,0.0,0.0,0.0,11.0,295.0,367.0,80.4,5864.0,2054.0,132.0,148.0,89.2,119.0,133.0,89.5,41.0,74.0,55.4,0,0.8,5.0,16.0,7.0,1.0,22.0,3,0,0,17,8,0,12,16,5,0.0,0.0,0,58.0,9.0,13.0,40.9
83245,2022-05-22,16:00,Premier League,Matchweek 38,Sun,Home,L,0,5,Tottenham,0.3,3.7,40,27022,Grant Hanley,3-4-3,Chris Kavanagh,Match Report,,Norwich City,30,426,131,30.8,0.06,0.21,18.1,18.0,3,4,33.3,31.0,0.09,-3.3,-4.0,12.0,6.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.0,5,9.0,2.0,0.0,3.0,16.0,18.8,13.0,117.0,32.0,27.4,44.0,40.0,33.0,8.0,4.0,1.0,4.0,16,,7.0,1.0,12,5,7,58.3,0,4.6,-0.4,0,0,0,0,4.0,14.0,28.6,28.0,7.0,39.3,39.1,4.0,75.0,59.3,5.0,0.0,0.0,0.0,11.0,275.0,363.0,75.8,4915.0,1856.0,125.0,143.0,87.4,112.0,131.0,85.5,31.0,73.0,42.5,1,1.2,9.0,12.0,2.0,0.0,14.0,3,0,0,17,8,0,12,16,5,0.0,0.0,0,58.0,9.0,13.0,40.9
83246,2022-05-22,16:00,Premier League,Matchweek 38,Sun,Home,L,0,5,Tottenham,0.3,3.7,40,27022,Grant Hanley,3-4-3,Chris Kavanagh,Match Report,,Norwich City,30,426,131,30.8,0.06,0.21,18.1,18.0,3,4,33.3,31.0,0.09,-3.3,-4.0,12.0,6.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.0,5,9.0,2.0,0.0,3.0,16.0,18.8,13.0,117.0,32.0,27.4,44.0,40.0,33.0,8.0,4.0,1.0,4.0,16,,7.0,1.0,12,5,7,58.3,0,4.6,-0.4,0,0,0,0,4.0,14.0,28.6,28.0,7.0,39.3,39.1,4.0,75.0,59.3,5.0,0.0,0.0,0.0,11.0,335.0,422.0,79.4,7435.0,1726.0,99.0,114.0,86.8,163.0,184.0,88.6,71.0,116.0,61.2,0,0.2,5.0,18.0,4.0,1.0,21.0,3,0,0,17,8,0,12,16,5,0.0,0.0,0,58.0,9.0,13.0,40.9


In [24]:
standings_url = "https://fbref.com/en/comps/9/1889/2018-2019-Premier-League-Stats"

In [21]:
data_main = requests.get(standings_url)

In [22]:
soup = BeautifulSoup(data_main.text)
standings_table = soup.select('table.stats_table')[0]
links = standings_table.find_all('a')
links = [l.get("href") for l in links]
links = [l for l in links if '/squads/' in l]
links

['/en/squads/b8fd03ef/2018-2019/Manchester-City-Stats',
 '/en/squads/822bd0ba/2018-2019/Liverpool-Stats',
 '/en/squads/cff3d9bb/2018-2019/Chelsea-Stats',
 '/en/squads/361ca564/2018-2019/Tottenham-Hotspur-Stats',
 '/en/squads/18bb7c10/2018-2019/Arsenal-Stats',
 '/en/squads/19538871/2018-2019/Manchester-United-Stats',
 '/en/squads/8cec06e1/2018-2019/Wolverhampton-Wanderers-Stats',
 '/en/squads/d3fd31cc/2018-2019/Everton-Stats',
 '/en/squads/a2d435b3/2018-2019/Leicester-City-Stats',
 '/en/squads/7c21e445/2018-2019/West-Ham-United-Stats',
 '/en/squads/2abfe087/2018-2019/Watford-Stats',
 '/en/squads/47c64c55/2018-2019/Crystal-Palace-Stats',
 '/en/squads/b2b47a98/2018-2019/Newcastle-United-Stats',
 '/en/squads/4ba7cbea/2018-2019/Bournemouth-Stats',
 '/en/squads/943e8050/2018-2019/Burnley-Stats',
 '/en/squads/33c895d4/2018-2019/Southampton-Stats',
 '/en/squads/d07537b9/2018-2019/Brighton-and-Hove-Albion-Stats',
 '/en/squads/75fae011/2018-2019/Cardiff-City-Stats',
 '/en/squads/fd962109/2018-20

In [28]:
standings_url='https://fbref.com/en/comps/9/1889/2018-2019-Premier-League-Stats'

In [29]:
years = [2019]

In [30]:
for year in years:    
        print('Year === : ',year)
        data = requests.get(standings_url)
        soup = BeautifulSoup(data.text)
        standings_table = soup.select('table.stats_table')[0]

        links = [l.get("href") for l in standings_table.find_all('a')]
        links = [l for l in links if '/squads/' in l]
        team_urls = [f"https://fbref.com{l}" for l in links]

        previous_season = soup.select("a.prev")[0].get("href")
        standings_url = f"https://fbref.com{previous_season}"
        team_paths = []
        for team_url in team_urls:
            team_name = team_url.split("/")[-1].replace("-Stats", "").replace("-", " ")
            data = requests.get(team_url)
            matches = pd.read_html(data.text, match="Scores & Fixtures")[0]
            soup = BeautifulSoup(data.text)
            print(f"Starting {year} {team_name} Process")
            a = matches.copy()

            shooting = grab_data(pattern='shooting',data_main=data ,match='Shooting')
            b = clean_data(df=shooting,renamed_col=Shooting_renamed_col,df_name='Shooting')

            GSA = grab_data(pattern='gca',data_main=data ,match="Goal and Shot")
            c = clean_data(df=GSA,renamed_col=GSA_renamed_col,df_name='GSA')

            DEF = grab_data(pattern='defense',data_main=data ,match="Defensive Actions")
            d = clean_data(df=DEF,renamed_col=Defensive_Actions_renamed_col,df_name='Defensive Actions')

            goalie = grab_data(pattern='keeper', data_main=data, match="Goalkeeping")
            e = clean_data(df=goalie,renamed_col=Goalkeeping_renamed_col,df_name='Goalkeeping')

            passing = grab_data(pattern='passing', data_main=data, match="Passing")
            f = clean_data(df=passing, renamed_col = Passing_renamed_col ,df_name='Passing')

            pass_types = grab_data(pattern='passing_types', data_main=data, match="Pass Types")
            g = clean_data(df=pass_types,renamed_col=PassingType_renamed_col,df_name='PassingType')

            possession = grab_data(pattern='possession', data_main=data, match="Possession")
            h = clean_data(df=possession,renamed_col=Possession_renamed_col,df_name='Possession')

            Misc_data = grab_data(pattern='misc', data_main=data, match="Miscellaneous Stats")
            i = clean_data(df=Misc_data,renamed_col=Miscellaneous_Stats_renamed_col ,df_name='Miscellaneous_Stats')

            #merge
            #a['Team'] = team_name 
            try:
                #team_data = matches.merge(shooting[shooting.columns.drop(drops)], on="Date")
                a['Team'] = team_name 
                #b['Team'] = team_name 
                #b.drop(['Date'],axis=1,inplace=True)
                ab = a.merge(b, on="Date")
                abc = ab.merge(c, on="Date")
                abcd = abc.merge(d, on="Date")
                abcde = abcd.merge(e, on="Date")
                #f['Team'] = team_name
                #f.drop(['Date'],axis=1,inplace=True)
                abcdef = abcde.merge(f, on="Date")
                abcdefg = abcdef.merge(g, on="Date")
                abcdefgh = abcdef.merge(h, on="Date")
                abcdefghi = abcdef.merge(i, on="Date")
                print(f"Merged df shape of {year} {team_name} : ",abcdefghi.shape)
                team_data = abcdefghi
            except ValueError:
                continue
            #team_data = team_data[team_data["Comp"] == "Premier League"]
            team_data["Season"] = year
            path = str(year)+team_name+".csv" 
            team_paths.append(path)
            try:
                team_data.to_csv(path)
            except PermissionError:
                continue
            all_matches.append(team_data)
            time.sleep(1)
        year_path_dict[year]=team_paths

Year === :  2019
Starting 2019 Manchester City Process
Got Shooting data sucessfully
Shape  Shooting :  (62, 16)
Got Goal and Shot data sucessfully
Shape  GSA :  (62, 15)
Got Defensive Actions data sucessfully
Shape  Defensive Actions :  (62, 24)
Got Goalkeeping data sucessfully
Shape  Goalkeeping :  (62, 27)
Got Passing data sucessfully
Shape  Passing :  (62, 22)
Got Pass Types data sucessfully
Shape  PassingType :  (62, 26)
Got Possession data sucessfully
Shape  Possession :  (62, 26)
Got Miscellaneous Stats data sucessfully
Shape  Miscellaneous_Stats :  (62, 17)
Merged df shape of 2019 Manchester City :  (61, 135)
Starting 2019 Liverpool Process
Got Shooting data sucessfully
Shape  Shooting :  (54, 16)
Got Goal and Shot data sucessfully
Shape  GSA :  (54, 15)
Got Defensive Actions data sucessfully
Shape  Defensive Actions :  (54, 24)
Got Goalkeeping data sucessfully
Shape  Goalkeeping :  (54, 27)
Got Passing data sucessfully
Shape  Passing :  (54, 22)
Got Pass Types data sucessfully

Got Pass Types data sucessfully
Shape  PassingType :  (44, 26)
Got Possession data sucessfully
Shape  Possession :  (44, 26)
Got Miscellaneous Stats data sucessfully
Shape  Miscellaneous_Stats :  (44, 17)
Merged df shape of 2019 Bournemouth :  (43, 135)
Starting 2019 Burnley Process
Got Shooting data sucessfully
Shape  Shooting :  (48, 16)
Got Goal and Shot data sucessfully
Shape  GSA :  (48, 15)
Got Defensive Actions data sucessfully
Shape  Defensive Actions :  (48, 24)
Got Goalkeeping data sucessfully
Shape  Goalkeeping :  (48, 27)
Got Passing data sucessfully
Shape  Passing :  (48, 22)
Got Pass Types data sucessfully
Shape  PassingType :  (48, 26)
Got Possession data sucessfully
Shape  Possession :  (48, 26)
Got Miscellaneous Stats data sucessfully
Shape  Miscellaneous_Stats :  (48, 17)
Merged df shape of 2019 Burnley :  (47, 135)
Starting 2019 Southampton Process
Got Shooting data sucessfully
Shape  Shooting :  (44, 16)
Got Goal and Shot data sucessfully
Shape  GSA :  (44, 15)
Got 