In [1]:
import pandas as pd
import requests
from io import StringIO

# 3 Oragnise all data in dictionary

In [2]:
# create a dictionary with original name of the league as key
dict_countries = {
    'Spanish La Liga': 'SP1',
    'Spanish Segunda Division': 'SP2',
    'German Bundesliga': 'D1',
    'English Premier League': 'E0',
    'English League 1': 'E1',
    'English League 2': 'E2',
}

In [3]:
# get a dictionary element
dict_countries['Spanish La Liga']  # returns 'SP1'

'SP1'

In [4]:
for league in dict_countries:
    print(dict_countries[league])

SP1
SP2
D1
E0
E1
E2


In [5]:
# getting all the leagues
dict_historical_data = {}

# loop through the dictionary
for league in dict_countries:
    frames = []
    for season in range(15, 25):
        url = f"https://www.football-data.co.uk/mmz4281/{str(season)}{str(season + 1)}/{dict_countries[league]}.csv"
        response = requests.get(url)
        df = pd.read_csv(StringIO(response.text))
        df.insert(1, 'Season', str(season))
        frames.append(df)
    df_concat = pd.concat(frames)
    dict_historical_data[league] = df_concat

In [6]:
# show dataframe inside dict_historical_data
dict_historical_data['English Premier League']

Unnamed: 0,Div,Season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,...,1XBCH,1XBCD,1XBCA,BFECH,BFECD,BFECA,BFEC>2.5,BFEC<2.5,BFECAHH,BFECAHA
0,E0,15,08/08/2015,Bournemouth,Aston Villa,0,1,A,0,0,...,,,,,,,,,,
1,E0,15,08/08/2015,Chelsea,Swansea,2,2,D,2,1,...,,,,,,,,,,
2,E0,15,08/08/2015,Everton,Watford,2,2,D,0,1,...,,,,,,,,,,
3,E0,15,08/08/2015,Leicester,Sunderland,4,2,H,3,0,...,,,,,,,,,,
4,E0,15,08/08/2015,Man United,Tottenham,1,0,H,1,0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375,,24,25/05/2025,Newcastle,Everton,0,1,A,0,0,...,1.30,6.11,10.20,1.27,7.0,13.00,1.50,2.94,1.93,2.05
376,,24,25/05/2025,Nott'm Forest,Chelsea,0,1,A,0,0,...,3.50,3.86,2.06,3.55,3.9,2.14,1.76,2.28,1.86,2.14
377,,24,25/05/2025,Southampton,Arsenal,1,2,A,0,1,...,12.20,6.11,1.28,13.00,6.8,1.28,1.45,3.15,2.06,1.89
378,,24,25/05/2025,Tottenham,Brighton,1,4,A,1,0,...,3.63,4.12,1.95,3.95,4.2,1.93,1.51,2.92,2.06,1.93


In [7]:
for league in dict_historical_data:
    df = dict_historical_data[league]
    print(f"League: {league}, Number of rows: {len(df)}")
    print(df.info())  # Display the first few rows of each league's data
    print("\n")  # Add a newline for better readability
    

League: Spanish La Liga, Number of rows: 3800
<class 'pandas.core.frame.DataFrame'>
Index: 3800 entries, 0 to 379
Columns: 154 entries, Div to BFECAHA
dtypes: float64(129), int64(16), object(9)
memory usage: 4.5+ MB
None


League: Spanish Segunda Division, Number of rows: 4620
<class 'pandas.core.frame.DataFrame'>
Index: 4620 entries, 0 to 461
Columns: 154 entries, Div to BFECAHA
dtypes: float64(143), int64(2), object(9)
memory usage: 5.5+ MB
None


League: German Bundesliga, Number of rows: 3060
<class 'pandas.core.frame.DataFrame'>
Index: 3060 entries, 0 to 305
Columns: 154 entries, Div to BFECAHA
dtypes: float64(143), int64(2), object(9)
memory usage: 3.6+ MB
None


League: English Premier League, Number of rows: 3800
<class 'pandas.core.frame.DataFrame'>
Index: 3800 entries, 0 to 379
Columns: 155 entries, Div to BFECAHA
dtypes: float64(129), int64(16), object(10)
memory usage: 4.5+ MB
None


League: English League 1, Number of rows: 5520
<class 'pandas.core.frame.DataFrame'>
Index:

In [8]:
# saving all dataframes to csv files
for league, df in dict_historical_data.items():
    filename = f"{league.replace(' ', '_').replace('/', '-')}.csv"
    df.to_csv(filename, index=False)
    print(f"Saved {filename} with {len(df)} rows.")

Saved Spanish_La_Liga.csv with 3800 rows.
Saved Spanish_Segunda_Division.csv with 4620 rows.
Saved German_Bundesliga.csv with 3060 rows.
Saved English_Premier_League.csv with 3800 rows.
Saved English_League_1.csv with 5520 rows.
Saved English_League_2.csv with 5369 rows.


In [9]:
# total number of rows
total_rows = sum(len(df) for df in dict_historical_data.values())
print(f"Total number of rows across all leagues: {total_rows}")

Total number of rows across all leagues: 26169


In [11]:
df_example = dict_historical_data['Spanish La Liga'].head()
