# EPL Fixture Results Data Consolidation

The following data has been sourced from [Football-Data.Co.uk](https://www.football-data.co.uk/englandm.php)

In [1]:
import requests
import pandas as pd
import io

The purpose of this section is consolidate all extracted EPL results data into a single csv file. The following script will scan the Football Analysis/data file and filter for all documents prefixed with epl_results. This documents will then be consolidated into one dataframe.

The script assumes all documents to be imported are strucutred as 'epl_results_year', with year reflecting the year the season ended.

In [2]:
# GitHub repository details
user = 'redbackoperations'
repo = 'T2_2023'
path = 'Project%205%20-%20Sports%20Performance%20Analysis/frontend/Football%20Analysis/data'

# GitHub API URL for contents of the directory
url = f'https://api.github.com/repos/{user}/{repo}/contents/{path}'

# Send a request to the GitHub API
response = requests.get(url)
data = response.json()

# Initialise a list to hold dataframes
dfs = []

# Loop through items in the directory
for item in data:
    file_name = item['name']
    if file_name.startswith('epl_results'):
        # Extract year from the file name
        year = file_name.split('_')[-1].split('.')[0]

        # Convert year to integer and calculate the season
        year = int(year)
        season = f'{year-1}/{year}'

        # Send a request to download the file
        file_content = requests.get(item['download_url']).content
        season_data = pd.read_csv(io.StringIO(file_content.decode('utf-8')))

        # Add the season column
        season_data['Season'] = season

        # Append DataFrame to the list
        dfs.append(season_data)

# Concatenate all dataframes in the list
df = pd.concat(dfs, ignore_index=True)

# Display first rows of dataframe
df.head(10)


Unnamed: 0,Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,...,AvgC<2.5,AHCh,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA
0,E0,13/08/05,Aston Villa,Bolton,2.0,2.0,D,2.0,2.0,D,...,,,,,,,,,,
1,E0,13/08/05,Everton,Man United,0.0,2.0,A,0.0,1.0,A,...,,,,,,,,,,
2,E0,13/08/05,Fulham,Birmingham,0.0,0.0,D,0.0,0.0,D,...,,,,,,,,,,
3,E0,13/08/05,Man City,West Brom,0.0,0.0,D,0.0,0.0,D,...,,,,,,,,,,
4,E0,13/08/05,Middlesbrough,Liverpool,0.0,0.0,D,0.0,0.0,D,...,,,,,,,,,,
5,E0,13/08/05,Portsmouth,Tottenham,0.0,2.0,A,0.0,1.0,A,...,,,,,,,,,,
6,E0,13/08/05,Sunderland,Charlton,1.0,3.0,A,1.0,1.0,D,...,,,,,,,,,,
7,E0,13/08/05,West Ham,Blackburn,3.0,1.0,H,0.0,1.0,A,...,,,,,,,,,,
8,E0,14/08/05,Arsenal,Newcastle,2.0,0.0,H,0.0,0.0,D,...,,,,,,,,,,
9,E0,14/08/05,Wigan,Chelsea,0.0,1.0,A,0.0,0.0,D,...,,,,,,,,,,


In [3]:
# Export the consolidated data to csv
df.to_csv('data/consol_raw_epl_results.csv')
print('Consolidated data exported successfully')

Consolidated data exported successfully
