In [1]:
# Steps 
# 1. Get the dataset (Web scraping)
# 2. Clean your dataset
# 3. Train-test split (12 features and 1 target(winning team(Home/Away/draw)))
# 4. Train 4 different classifiers - Logistic, SVM, XGBoost, Random Forest
# 5. Given an away and home team, Use the best classifier to predict who will win 

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

### Parsing the html content of the page using requests

In [3]:
EPL_url = "https://fbref.com/en/comps/9/Premier-League-Stats" 
EPL_response_html = requests.get(EPL_url)
EPL_response_html

<Response [200]>

### Parsing relevant HTML links using Beautiful soup

In [4]:
soup = BeautifulSoup(EPL_response_html.content, 'html.parser')     # Create a soup object
table = soup.find('table', {'class': 'stats_table'})

# Find all the <a> tags inside the table
a_tags = table.find_all('a')

# Scrape the team urls from the relevant <a> tags (which contains '/squads' in it)
team_urls = []
for a_tag in a_tags:
    href = a_tag.get("href")
    if '/squads/' in href:
        team_urls.append('https://fbref.com' + href)
        
team_urls

['https://fbref.com/en/squads/18bb7c10/Arsenal-Stats',
 'https://fbref.com/en/squads/b8fd03ef/Manchester-City-Stats',
 'https://fbref.com/en/squads/b2b47a98/Newcastle-United-Stats',
 'https://fbref.com/en/squads/19538871/Manchester-United-Stats',
 'https://fbref.com/en/squads/361ca564/Tottenham-Hotspur-Stats',
 'https://fbref.com/en/squads/8602292d/Aston-Villa-Stats',
 'https://fbref.com/en/squads/d07537b9/Brighton-and-Hove-Albion-Stats',
 'https://fbref.com/en/squads/822bd0ba/Liverpool-Stats',
 'https://fbref.com/en/squads/cd051869/Brentford-Stats',
 'https://fbref.com/en/squads/fd962109/Fulham-Stats',
 'https://fbref.com/en/squads/cff3d9bb/Chelsea-Stats',
 'https://fbref.com/en/squads/47c64c55/Crystal-Palace-Stats',
 'https://fbref.com/en/squads/8cec06e1/Wolverhampton-Wanderers-Stats',
 'https://fbref.com/en/squads/7c21e445/West-Ham-United-Stats',
 'https://fbref.com/en/squads/4ba7cbea/Bournemouth-Stats',
 'https://fbref.com/en/squads/5bfb9659/Leeds-United-Stats',
 'https://fbref.com

## Extracting the Match stats using pandas 

In [5]:
team_url = team_urls[0]
team_match_data = requests.get(team_url)     # response HTML - team_match_data.text

In [6]:
# Scan all the tables in the page and if the string matches "Scores & Fixtures", then gets the table 
matches = pd.read_html(team_match_data.text, match="Scores & Fixtures")
matches_df = matches[0]
matches_df.head()

Unnamed: 0,Date,Time,Comp,Round,Day,Venue,Result,GF,GA,Opponent,xG,xGA,Poss,Attendance,Captain,Formation,Referee,Match Report,Notes
0,2022-08-05,20:00,Premier League,Matchweek 1,Fri,Away,W,2,0,Crystal Palace,1.0,1.2,44.0,25286.0,Martin Ødegaard,4-3-3,Anthony Taylor,Match Report,
1,2022-08-13,15:00,Premier League,Matchweek 2,Sat,Home,W,4,2,Leicester City,2.7,0.5,50.0,60033.0,Martin Ødegaard,4-3-3,Darren England,Match Report,
2,2022-08-20,17:30,Premier League,Matchweek 3,Sat,Away,W,3,0,Bournemouth,1.3,0.3,57.0,10423.0,Martin Ødegaard,4-3-3,Craig Pawson,Match Report,
3,2022-08-27,17:30,Premier League,Matchweek 4,Sat,Home,W,2,1,Fulham,2.6,0.8,71.0,60164.0,Martin Ødegaard,4-3-3,Jarred Gillett,Match Report,
4,2022-08-31,19:30,Premier League,Matchweek 5,Wed,Home,W,2,1,Aston Villa,2.4,0.4,59.0,60012.0,Martin Ødegaard,4-3-3,Robert Jones,Match Report,


## Get the match shooting stats to extract more match features

In [7]:
soup = BeautifulSoup(team_match_data.text)
all_links = soup.find_all('a')
all_links = [link.get("href") for link in all_links]
shooting_link = ['https://fbref.com' + link for link in all_links if link and 'all_comps/shooting/' in link]
shooting_link

['https://fbref.com/en/squads/18bb7c10/2022-2023/matchlogs/all_comps/shooting/Arsenal-Match-Logs-All-Competitions',
 'https://fbref.com/en/squads/18bb7c10/2022-2023/matchlogs/all_comps/shooting/Arsenal-Match-Logs-All-Competitions',
 'https://fbref.com/en/squads/18bb7c10/2022-2023/matchlogs/all_comps/shooting/Arsenal-Match-Logs-All-Competitions',
 'https://fbref.com/en/squads/18bb7c10/2022-2023/matchlogs/all_comps/shooting/Arsenal-Match-Logs-All-Competitions']

In [8]:
shooting_data = requests.get(shooting_link[0])        
shooting_df = pd.read_html(shooting_data.text)[0]
shooting_df.head()

Unnamed: 0_level_0,For Arsenal,For Arsenal,For Arsenal,For Arsenal,For Arsenal,For Arsenal,For Arsenal,For Arsenal,For Arsenal,For Arsenal,...,Standard,Standard,Standard,Standard,Expected,Expected,Expected,Expected,Expected,Unnamed: 25_level_0
Unnamed: 0_level_1,Date,Time,Comp,Round,Day,Venue,Result,GF,GA,Opponent,...,Dist,FK,PK,PKatt,xG,npxG,npxG/Sh,G-xG,np:G-xG,Match Report
0,2022-08-05,20:00,Premier League,Matchweek 1,Fri,Away,W,2,0,Crystal Palace,...,14.6,1.0,0,0,1.0,1.0,0.1,0.0,0.0,Match Report
1,2022-08-13,15:00,Premier League,Matchweek 2,Sat,Home,W,4,2,Leicester City,...,13.0,0.0,0,0,2.7,2.7,0.16,1.3,1.3,Match Report
2,2022-08-20,17:30,Premier League,Matchweek 3,Sat,Away,W,3,0,Bournemouth,...,14.8,0.0,0,0,1.3,1.3,0.1,1.7,1.7,Match Report
3,2022-08-27,17:30,Premier League,Matchweek 4,Sat,Home,W,2,1,Fulham,...,15.5,1.0,0,0,2.6,2.6,0.12,-0.6,-0.6,Match Report
4,2022-08-31,19:30,Premier League,Matchweek 5,Wed,Home,W,2,1,Aston Villa,...,16.3,1.0,0,0,2.4,2.4,0.12,-0.4,-0.4,Match Report


## Data cleaning

In [9]:
# Drop the outer index
shooting_df.columns = shooting_df.columns.droplevel()
shooting_df.head()

Unnamed: 0,Date,Time,Comp,Round,Day,Venue,Result,GF,GA,Opponent,...,Dist,FK,PK,PKatt,xG,npxG,npxG/Sh,G-xG,np:G-xG,Match Report
0,2022-08-05,20:00,Premier League,Matchweek 1,Fri,Away,W,2,0,Crystal Palace,...,14.6,1.0,0,0,1.0,1.0,0.1,0.0,0.0,Match Report
1,2022-08-13,15:00,Premier League,Matchweek 2,Sat,Home,W,4,2,Leicester City,...,13.0,0.0,0,0,2.7,2.7,0.16,1.3,1.3,Match Report
2,2022-08-20,17:30,Premier League,Matchweek 3,Sat,Away,W,3,0,Bournemouth,...,14.8,0.0,0,0,1.3,1.3,0.1,1.7,1.7,Match Report
3,2022-08-27,17:30,Premier League,Matchweek 4,Sat,Home,W,2,1,Fulham,...,15.5,1.0,0,0,2.6,2.6,0.12,-0.6,-0.6,Match Report
4,2022-08-31,19:30,Premier League,Matchweek 5,Wed,Home,W,2,1,Aston Villa,...,16.3,1.0,0,0,2.4,2.4,0.12,-0.4,-0.4,Match Report


In [10]:
# Merging the matches and shooting dataframe
print(shooting_df.shape)
print(matches_df.shape)
team_data_df = matches_df.merge(shooting_df[["Date", "Sh", "SoT", "SoT%", "G/Sh", "G/SoT", "Dist", "FK", "PK", "PKatt"]], on="Date")
print(team_data_df.shape)
team_data_df.head()

(42, 26)
(49, 19)
(41, 28)


Unnamed: 0,Date,Time,Comp,Round,Day,Venue,Result,GF,GA,Opponent,...,Notes,Sh,SoT,SoT%,G/Sh,G/SoT,Dist,FK,PK,PKatt
0,2022-08-05,20:00,Premier League,Matchweek 1,Fri,Away,W,2,0,Crystal Palace,...,,10,2,20.0,0.1,0.5,14.6,1.0,0,0
1,2022-08-13,15:00,Premier League,Matchweek 2,Sat,Home,W,4,2,Leicester City,...,,19,7,36.8,0.21,0.57,13.0,0.0,0,0
2,2022-08-20,17:30,Premier League,Matchweek 3,Sat,Away,W,3,0,Bournemouth,...,,14,6,42.9,0.21,0.5,14.8,0.0,0,0
3,2022-08-27,17:30,Premier League,Matchweek 4,Sat,Home,W,2,1,Fulham,...,,22,8,36.4,0.09,0.25,15.5,1.0,0,0
4,2022-08-31,19:30,Premier League,Matchweek 5,Wed,Home,W,2,1,Aston Villa,...,,22,8,36.4,0.09,0.25,16.3,1.0,0,0


## Scraping data for for multiple season and teams 

In [11]:
years = [2022, 2021, 2020]
all_matches = []               # list of dfs, each item in the list is a df of a specific team in a specific season


EPL_url = "https://fbref.com/en/comps/9/Premier-League-Stats" 

for year in years:
    EPL_response_html = requests.get(EPL_url)
    soup = BeautifulSoup(EPL_response_html.content, 'html.parser')     # Create a soup object
    table = soup.find('table', {'class': 'stats_table'})

    # Find all the <a> tags inside the table
    a_tags = table.find_all('a')

    # Scrape the team urls FROM THE {CURRENT SEASON} from the relevant <a> tags (which contains '/squads' in it)
    team_urls = []
    for a_tag in a_tags:
        href = a_tag.get("href")
        if '/squads/' in href:
            team_urls.append('https://fbref.com' + href)
            
    
    
    for team_url in team_urls:
        team_name = team_url.split('/')[-1].replace('-Stats', '').replace('-', ' ')
        
        team_match_data = requests.get(team_url)         # response HTML
             
        matches = pd.read_html(team_match_data.text, match="Scores & Fixtures")
        matches_df = matches[0]
        
        soup = BeautifulSoup(team_match_data.text)
        all_links = soup.find_all('a')
        all_links = [link.get("href") for link in all_links]
        shooting_link = ['https://fbref.com' + link for link in all_links if link and 'all_comps/shooting/' in link]
        
        shooting_data = requests.get(shooting_link[0])
        print(shooting_data)
        shooting_df = pd.read_html(shooting_data.text, match="Shooting")[0]
        shooting_df.columns = shooting_df.columns.droplevel()

        # Skipping over those teams whose shooting datas aren't available
        try:
            team_data_df = matches_df.merge(shooting_df[["Date", "Sh", "SoT", "SoT%", "G/Sh", "G/SoT", "Dist", "FK", "PK", "PKatt"]], on="Date") 
        except ValueError:
            continue
            
        # Filter out non Premier league matches
        team_data_df = team_data_df[team_data_df["Comp"] == "Premier League"]
        team_data_df["Season"] = year
        team_data_df["Team Name"] = team_name
        
        all_matches.append(team_data_df)
        print(team_name)
        print(team_data_df.head())
        
        # Wait for 2 seconds before retrying
        time.sleep(4)   
 
    EPL_response_html = requests.get(EPL_url)
    soup = BeautifulSoup(EPL_response_html.content, 'html.parser')     # Create a soup object
    
    # Get the previous season's page URL to scrape data from :- 
    previous_season_href = soup.select("a.prev")[0].get("href")
    EPL_url = 'https://fbref.com' + previous_season_href
    

<Response [200]>
Arsenal
         Date   Time            Comp        Round  Day Venue Result GF GA  \
0  2022-08-05  20:00  Premier League  Matchweek 1  Fri  Away      W  2  0   
1  2022-08-13  15:00  Premier League  Matchweek 2  Sat  Home      W  4  2   
2  2022-08-20  17:30  Premier League  Matchweek 3  Sat  Away      W  3  0   
3  2022-08-27  17:30  Premier League  Matchweek 4  Sat  Home      W  2  1   
4  2022-08-31  19:30  Premier League  Matchweek 5  Wed  Home      W  2  1   

         Opponent  ...  SoT  SoT%  G/Sh  G/SoT  Dist   FK PK PKatt Season  \
0  Crystal Palace  ...    2  20.0  0.10   0.50  14.6  1.0  0     0   2022   
1  Leicester City  ...    7  36.8  0.21   0.57  13.0  0.0  0     0   2022   
2     Bournemouth  ...    6  42.9  0.21   0.50  14.8  0.0  0     0   2022   
3          Fulham  ...    8  36.4  0.09   0.25  15.5  1.0  0     0   2022   
4     Aston Villa  ...    8  36.4  0.09   0.25  16.3  1.0  0     0   2022   

   Team Name  
0    Arsenal  
1    Arsenal  
2   

<Response [200]>
Brentford
         Date   Time            Comp        Round  Day Venue Result GF GA  \
0  2022-08-07  14:00  Premier League  Matchweek 1  Sun  Away      D  2  2   
1  2022-08-13  17:30  Premier League  Matchweek 2  Sat  Home      W  4  0   
2  2022-08-20  15:00  Premier League  Matchweek 3  Sat  Away      L  2  3   
4  2022-08-27  15:00  Premier League  Matchweek 4  Sat  Home      D  1  1   
5  2022-08-30  19:30  Premier League  Matchweek 5  Tue  Away      D  1  1   

         Opponent  ...  SoT  SoT%  G/Sh  G/SoT  Dist   FK PK PKatt Season  \
0  Leicester City  ...    3  37.5  0.25   0.67  17.0  0.0  0     0   2022   
1  Manchester Utd  ...    7  53.8  0.31   0.57  18.6  1.0  0     0   2022   
2          Fulham  ...    5  38.5  0.15   0.40  14.1  0.0  0     0   2022   
4         Everton  ...    5  25.0  0.05   0.20  14.8  0.0  0     0   2022   
5  Crystal Palace  ...    3  33.3  0.11   0.33  10.7  0.0  0     0   2022   

   Team Name  
0  Brentford  
1  Brentford  
2 

<Response [200]>
Everton
         Date   Time            Comp        Round  Day Venue Result   GF   GA  \
0  2022-08-06  17:30  Premier League  Matchweek 1  Sat  Home      L  0.0  1.0   
1  2022-08-13  12:30  Premier League  Matchweek 2  Sat  Away      L  1.0  2.0   
2  2022-08-20  15:00  Premier League  Matchweek 3  Sat  Home      D  1.0  1.0   
4  2022-08-27  15:00  Premier League  Matchweek 4  Sat  Away      D  1.0  1.0   
5  2022-08-30  20:00  Premier League  Matchweek 5  Tue  Away      D  1.0  1.0   

          Opponent  ...  SoT  SoT%  G/Sh  G/SoT  Dist   FK PK PKatt  Season  \
0          Chelsea  ...    3  37.5  0.00   0.00  13.5  0.0  0     0    2022   
1      Aston Villa  ...    4  26.7  0.00   0.00  14.8  0.0  0     0    2022   
2  Nott'ham Forest  ...    8  42.1  0.05   0.13  20.8  1.0  0     0    2022   
4        Brentford  ...    7  50.0  0.07   0.14  16.7  0.0  0     0    2022   
5     Leeds United  ...    2  28.6  0.14   0.50  19.1  1.0  0     0    2022   

   Team Name 

<Response [200]>
Arsenal
         Date   Time            Comp        Round  Day Venue Result  GF  GA  \
0  2021-08-13  20:00  Premier League  Matchweek 1  Fri  Away      L   0   2   
1  2021-08-22  16:30  Premier League  Matchweek 2  Sun  Home      L   0   2   
3  2021-08-28  12:30  Premier League  Matchweek 3  Sat  Away      L   0   5   
4  2021-09-11  15:00  Premier League  Matchweek 4  Sat  Home      W   1   0   
5  2021-09-18  15:00  Premier League  Matchweek 5  Sat  Away      W   1   0   

          Opponent  ...  SoT  SoT%  G/Sh  G/SoT  Dist   FK PK PKatt Season  \
0        Brentford  ...    4  18.2  0.00   0.00  19.0  1.0  0     0   2021   
1          Chelsea  ...    3  50.0  0.00   0.00  18.7  0.0  0     0   2021   
3  Manchester City  ...    0   0.0  0.00    NaN   6.8  0.0  0     0   2021   
4     Norwich City  ...    6  20.0  0.03   0.17  16.9  1.0  0     0   2021   
5          Burnley  ...    3  23.1  0.08   0.33  20.7  1.0  0     0   2021   

   Team Name  
0    Arsenal  
1

<Response [200]>
Brentford
         Date   Time            Comp        Round  Day Venue Result  GF  GA  \
0  2021-08-13  20:00  Premier League  Matchweek 1  Fri  Home      W   2   0   
1  2021-08-21  15:00  Premier League  Matchweek 2  Sat  Away      D   0   0   
3  2021-08-28  15:00  Premier League  Matchweek 3  Sat  Away      D   1   1   
4  2021-09-11  15:00  Premier League  Matchweek 4  Sat  Home      L   0   1   
5  2021-09-18  12:30  Premier League  Matchweek 5  Sat  Away      W   2   0   

         Opponent  ...  SoT  SoT%  G/Sh  G/SoT  Dist   FK PK PKatt  Season  \
0         Arsenal  ...    3  37.5  0.25   0.67  12.1  0.0  0     0    2021   
1  Crystal Palace  ...    3  21.4  0.00   0.00  18.0  1.0  0     0    2021   
3     Aston Villa  ...    2  22.2  0.11   0.50  15.6  0.0  0     0    2021   
4        Brighton  ...    1  14.3  0.00   0.00  20.3  0.0  0     0    2021   
5          Wolves  ...    2  25.0  0.13   0.50  18.7  0.0  1     1    2021   

   Team Name  
0  Brentford  

<Response [200]>
Manchester City
         Date   Time            Comp        Round  Day Venue Result  GF  GA  \
0  2020-09-21  20:15  Premier League  Matchweek 2  Mon  Away      W   3   1   
2  2020-09-27  16:30  Premier League  Matchweek 3  Sun  Home      L   2   5   
4  2020-10-03  17:30  Premier League  Matchweek 4  Sat  Away      D   1   1   
5  2020-10-17  17:30  Premier League  Matchweek 5  Sat  Home      W   1   0   
7  2020-10-24  12:30  Premier League  Matchweek 6  Sat  Away      D   1   1   

         Opponent  ...  SoT  SoT%  G/Sh  G/SoT  Dist   FK PK PKatt Season  \
0          Wolves  ...    8  61.5  0.15   0.25  21.1  2.0  1     1   2020   
2  Leicester City  ...    5  31.3  0.13   0.40  19.8  1.0  0     0   2020   
4    Leeds United  ...    1   4.3  0.04   1.00  18.2  1.0  0     0   2020   
5         Arsenal  ...    5  38.5  0.08   0.20  17.7  0.0  0     0   2020   
7        West Ham  ...    7  50.0  0.07   0.14  20.9  1.0  0     0   2020   

         Team Name  
0  Manch

<Response [200]>
Leeds United
         Date   Time            Comp        Round  Day Venue Result GF GA  \
0  2020-09-12  17:30  Premier League  Matchweek 1  Sat  Away      L  3  4   
2  2020-09-19  15:00  Premier League  Matchweek 2  Sat  Home      W  4  3   
3  2020-09-27  12:00  Premier League  Matchweek 3  Sun  Away      W  1  0   
4  2020-10-03  17:30  Premier League  Matchweek 4  Sat  Home      D  1  1   
5  2020-10-19  20:00  Premier League  Matchweek 5  Mon  Home      L  0  1   

          Opponent  ...  SoT  SoT%  G/Sh  G/SoT  Dist   FK PK PKatt Season  \
0        Liverpool  ...    3  50.0  0.50   1.00  19.9  1.0  0     0   2020   
2           Fulham  ...    6  66.7  0.33   0.50  16.9  1.0  1     1   2020   
3    Sheffield Utd  ...    8  47.1  0.06   0.13  19.0  0.0  0     0   2020   
4  Manchester City  ...    7  58.3  0.08   0.14  11.5  0.0  0     0   2020   
5           Wolves  ...    2  15.4  0.00   0.00  12.8  0.0  0     0   2020   

      Team Name  
0  Leeds United  
2 

<Response [200]>
Burnley
         Date   Time            Comp        Round  Day Venue Result GF GA  \
1  2020-09-20  19:00  Premier League  Matchweek 2  Sun  Away      L  2  4   
3  2020-09-26  20:00  Premier League  Matchweek 3  Sat  Home      L  0  1   
5  2020-10-03  20:00  Premier League  Matchweek 4  Sat  Away      L  1  3   
6  2020-10-19  17:30  Premier League  Matchweek 5  Mon  Away      D  0  0   
7  2020-10-26  20:00  Premier League  Matchweek 6  Mon  Home      L  0  1   

         Opponent  ...  SoT  SoT%  G/Sh  G/SoT  Dist   FK PK PKatt Season  \
1  Leicester City  ...    5  31.3  0.13   0.40  16.4  1.0  0     0   2020   
3     Southampton  ...    2  20.0  0.00   0.00  18.3  1.0  0     0   2020   
5   Newcastle Utd  ...    3  42.9  0.14   0.33  13.4  0.0  0     0   2020   
6       West Brom  ...    4  44.4  0.00   0.00  16.1  1.0  0     0   2020   
7       Tottenham  ...    3  23.1  0.00   0.00  21.1  1.0  0     0   2020   

   Team Name  
1    Burnley  
3    Burnley  
5   

In [12]:
match_df = pd.concat(all_matches)
match_df

Unnamed: 0,Date,Time,Comp,Round,Day,Venue,Result,GF,GA,Opponent,...,SoT,SoT%,G/Sh,G/SoT,Dist,FK,PK,PKatt,Season,Team Name
0,2022-08-05,20:00,Premier League,Matchweek 1,Fri,Away,W,2,0,Crystal Palace,...,2.0,20.0,0.10,0.50,14.6,1.0,0.0,0.0,2022,Arsenal
1,2022-08-13,15:00,Premier League,Matchweek 2,Sat,Home,W,4,2,Leicester City,...,7.0,36.8,0.21,0.57,13.0,0.0,0.0,0.0,2022,Arsenal
2,2022-08-20,17:30,Premier League,Matchweek 3,Sat,Away,W,3,0,Bournemouth,...,6.0,42.9,0.21,0.50,14.8,0.0,0.0,0.0,2022,Arsenal
3,2022-08-27,17:30,Premier League,Matchweek 4,Sat,Home,W,2,1,Fulham,...,8.0,36.4,0.09,0.25,15.5,1.0,0.0,0.0,2022,Arsenal
4,2022-08-31,19:30,Premier League,Matchweek 5,Wed,Home,W,2,1,Aston Villa,...,8.0,36.4,0.09,0.25,16.3,1.0,0.0,0.0,2022,Arsenal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38,2021-05-02,19:15,Premier League,Matchweek 34,Sun,Away,L,0,4,Tottenham,...,1.0,12.5,0.00,0.00,18.2,0.0,0.0,0.0,2020,Sheffield United
39,2021-05-08,15:00,Premier League,Matchweek 35,Sat,Home,L,0,2,Crystal Palace,...,0.0,0.0,0.00,,13.4,1.0,0.0,0.0,2020,Sheffield United
40,2021-05-16,19:00,Premier League,Matchweek 36,Sun,Away,W,1,0,Everton,...,3.0,30.0,0.10,0.33,18.5,0.0,0.0,0.0,2020,Sheffield United
41,2021-05-19,18:00,Premier League,Matchweek 37,Wed,Away,L,0,1,Newcastle Utd,...,1.0,9.1,0.00,0.00,18.3,1.0,0.0,0.0,2020,Sheffield United


In [13]:
match_df.columns = [col.lower() for col in match_df.columns]
match_df.to_csv("all_matches.csv")