# 1. Read a .csv file from url with Pandas


### Destination website: https://www.football-data.co.uk/englandm.php

In [44]:
import pandas as pd

In [45]:
# Reading the data from a single csv file at the website - just Season 2022/2023 Premier League:
df_premier_league_season_2022_2023 = pd.read_csv('https://www.football-data.co.uk/mmz4281/2223/E0.csv')
df_premier_league_season_2022_2023

Unnamed: 0,Div,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,...,AvgC<2.5,AHCh,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA
0,E0,05/08/2022,20:00,Crystal Palace,Arsenal,0,2,A,0,1,...,1.76,0.50,2.09,1.84,2.04,1.88,2.09,1.88,2.03,1.85
1,E0,06/08/2022,12:30,Fulham,Liverpool,2,2,D,1,0,...,2.73,1.75,1.90,2.03,1.91,2.02,2.01,2.06,1.89,1.99
2,E0,06/08/2022,15:00,Bournemouth,Aston Villa,2,0,H,1,0,...,1.76,0.50,1.93,2.00,1.93,2.00,1.94,2.04,1.88,2.00
3,E0,06/08/2022,15:00,Leeds,Wolves,2,1,H,1,1,...,1.87,-0.25,2.08,1.85,2.10,1.84,2.14,1.87,2.08,1.81
4,E0,06/08/2022,15:00,Newcastle,Nott'm Forest,2,0,H,0,0,...,1.89,-1.00,1.97,1.96,1.99,1.93,2.19,1.97,2.03,1.86
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
301,E0,15/04/2023,15:00,Wolves,Brentford,2,0,H,1,0,...,1.66,0.00,2.00,1.90,2.02,1.91,2.04,1.95,1.97,1.89
302,E0,15/04/2023,17:30,Man City,Leicester,3,1,H,3,0,...,3.19,-2.25,1.92,1.98,1.94,1.96,1.95,2.08,1.86,2.00
303,E0,16/04/2023,14:00,West Ham,Arsenal,2,2,D,1,2,...,2.12,1.00,1.89,2.04,1.88,2.05,1.97,2.11,1.85,2.03
304,E0,16/04/2023,16:30,Nott'm Forest,Man United,0,2,A,0,1,...,2.12,0.75,2.02,1.91,2.02,1.91,2.09,1.92,1.99,1.88


In [46]:
# Rename some column names for better readability('FTHG' and 'FTAG'), FT stands for final time:
df_premier_league_season_2022_2023.rename(columns={'FTHG': 'Home Goals',
                                                   'FTAG': 'Away Goals',
                                                   }, inplace=True)
df_premier_league_season_2022_2023.tail()

Unnamed: 0,Div,Date,Time,HomeTeam,AwayTeam,Home Goals,Away Goals,FTR,HTHG,HTAG,...,AvgC<2.5,AHCh,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA
301,E0,15/04/2023,15:00,Wolves,Brentford,2,0,H,1,0,...,1.66,0.0,2.0,1.9,2.02,1.91,2.04,1.95,1.97,1.89
302,E0,15/04/2023,17:30,Man City,Leicester,3,1,H,3,0,...,3.19,-2.25,1.92,1.98,1.94,1.96,1.95,2.08,1.86,2.0
303,E0,16/04/2023,14:00,West Ham,Arsenal,2,2,D,1,2,...,2.12,1.0,1.89,2.04,1.88,2.05,1.97,2.11,1.85,2.03
304,E0,16/04/2023,16:30,Nott'm Forest,Man United,0,2,A,0,1,...,2.12,0.75,2.02,1.91,2.02,1.91,2.09,1.92,1.99,1.88
305,E0,17/04/2023,20:00,Leeds,Liverpool,1,6,A,0,2,...,2.61,0.75,2.01,1.89,2.01,1.92,2.12,1.94,2.0,1.86


# 2. Read a .csv file from multiple URLs
## url = root + season + league

In [47]:
# Structure of the link in the following case:
# root: https://www.football-data.co.uk/mmz4281/
# season: '2223'
# league: '/E0'
'https://www.football-data.co.uk/mmz4281/' + '2223' + '/E0' + '.csv'

'https://www.football-data.co.uk/mmz4281/2223/E0.csv'

## 2.1 Multiple leagues:
### https://www.football-data.co.uk/mmz4281/2223/E0.csv
### https://www.football-data.co.uk/mmz4281/2223/E1.csv
### https://www.football-data.co.uk/mmz4281/2223/E2.csv
### https://www.football-data.co.uk/mmz4281/2223/E3.csv
### https://www.football-data.co.uk/mmz4281/2223/EC.csv

In [48]:
# Root - assign it to variable
root = 'https://www.football-data.co.uk/mmz4281/'

In [49]:
# Season - current season 2022/2023
season = '2223'

In [54]:
# All leagues as a list of strings
leagues = ['E0', 'E1', 'E2', 'E3', 'EC']
frames = []

# Iterate through all leagues:
for every_league in leagues:
    df = pd.read_csv(root + season + '/' + every_league + '.csv')
    frames.append(df)

In [55]:
frames


[    Div        Date   Time        HomeTeam       AwayTeam  FTHG  FTAG FTR   
 0    E0  05/08/2022  20:00  Crystal Palace        Arsenal     0     2   A  \
 1    E0  06/08/2022  12:30          Fulham      Liverpool     2     2   D   
 2    E0  06/08/2022  15:00     Bournemouth    Aston Villa     2     0   H   
 3    E0  06/08/2022  15:00           Leeds         Wolves     2     1   H   
 4    E0  06/08/2022  15:00       Newcastle  Nott'm Forest     2     0   H   
 ..   ..         ...    ...             ...            ...   ...   ...  ..   
 301  E0  15/04/2023  15:00          Wolves      Brentford     2     0   H   
 302  E0  15/04/2023  17:30        Man City      Leicester     3     1   H   
 303  E0  16/04/2023  14:00        West Ham        Arsenal     2     2   D   
 304  E0  16/04/2023  16:30   Nott'm Forest     Man United     0     2   A   
 305  E0  17/04/2023  20:00           Leeds      Liverpool     1     6   A   
 
      HTHG  HTAG  ... AvgC<2.5  AHCh  B365CAHH  B365CAHA  PCAH