# Data Encoding

This notebook was created to prep the data for our machine learning models. This takes any categorical/qualative value and assigns it a quantative value. Each encoding will have a legend above it to show the encoding used.

Another thing done in this notebook is that two datasets were created. One has every instance stored in it and has the time column dropped. The other dataset only includes instances where the time value is NOT `NaN`.

# Imports

In [47]:
# Data Processing
import pandas as pd

# Loads Data

In [48]:
df = pd.read_csv(r"League_Result_Data/Updated_PremierLeague_Stats_From_2014to2021.csv")

# Had this column get created upon loading, just dropped it
df = df.drop(columns=["Unnamed: 0"]) 
df

Unnamed: 0,Season,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,...,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR
0,Fall,12/09/2020,12:30,Fulham,Arsenal,0.0,3.0,A,0.0,1.0,...,2.0,6.0,12.0,12.0,2.0,3.0,2.0,2.0,0.0,0.0
1,Fall,12/09/2020,15:00,Crystal Palace,Southampton,1.0,0.0,H,1.0,0.0,...,3.0,5.0,14.0,11.0,7.0,3.0,2.0,1.0,0.0,0.0
2,Fall,12/09/2020,17:30,Liverpool,Leeds,4.0,3.0,H,3.0,2.0,...,6.0,3.0,9.0,6.0,9.0,0.0,1.0,0.0,0.0,0.0
3,Fall,12/09/2020,20:00,West Ham,Newcastle,0.0,2.0,A,0.0,0.0,...,3.0,2.0,13.0,7.0,8.0,7.0,2.0,2.0,0.0,0.0
4,Fall,13/09/2020,14:00,West Brom,Leicester,0.0,3.0,A,0.0,0.0,...,1.0,7.0,12.0,9.0,2.0,5.0,1.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2564,Spring,24/05/15,,Everton,Tottenham,0.0,1.0,A,0.0,1.0,...,1.0,3.0,12.0,8.0,3.0,5.0,1.0,2.0,0.0,0.0
2565,Spring,24/05/15,,Hull,Man United,0.0,0.0,D,0.0,0.0,...,6.0,1.0,12.0,15.0,8.0,1.0,2.0,2.0,0.0,1.0
2566,Spring,24/05/15,,Leicester,QPR,5.0,1.0,H,2.0,0.0,...,7.0,2.0,7.0,6.0,5.0,6.0,0.0,0.0,0.0,0.0
2567,Spring,24/05/15,,Man City,Southampton,2.0,0.0,H,1.0,0.0,...,6.0,4.0,13.0,8.0,8.0,4.0,1.0,1.0,0.0,0.0


# Encoding

### Season Encoding

- Fall: 0

- Winter: 1

- Spring: 2

- Summer: 3

In [49]:
df["Season Encoding"] = -1

for index, value in df.iterrows():
    
    if value["Season"] == "Fall":
        df.at[index, "Season Encoding"] = 0
        
    elif value["Season"] == "Winter":
        df.at[index, "Season Encoding"] = 1
        
    elif value["Season"] == "Spring":
        df.at[index, "Season Encoding"] = 2
        
    else:
        df.at[index, "Season Encoding"] = 3

### Team Encoding

- Aresnal: 0

- Ashton Villa: 1

- Bournemouth: 2

- Brighton: 3

- Cardiff: 4

- Chelsea: 5

- Crystal Palace: 6

- Everton: 7

- Fulham: 8

- Huddersfield: 9

- Hull: 10

- Leeds: 11

- Leicester: 12

- Liverpool: 13

- Man City: 14

- Man United: 15

- Middlesbrough: 16

- Newcastle: 17
 
- Norwich: 18

- QPR: 19

- Sheffield United: 20

- Southampton: 21

- Stoke: 22

- Sunderland: 23 

- Swansea: 24

- Tottenham: 25

- Watford: 26

- West Brom: 27

- West Ham: 28

- Wolves: 29

In [50]:
df["HomeTeam Encoding"] = -1
df["AwayTeam Encoding"] = -1

### Year of Season Encoding

- 2020/21
- 2019/20
- 2018/19
- 2017/18
- 2016/17
- 2015/16
- 2014/15

In [51]:
df["YearOfSeason"] = '' #Creating Year of Season varible

# loop to input year of season based on date
for index, value in df['Date'].iteritems():
    if value in pd.date_range('08/01/2020','04/01/2021'):
        df.at[index, 'YearOfSeason'] = '2020/21'
        
    if value in pd.date_range('08/01/2019','07/31/2020'):
        df.at[index, 'YearOfSeason'] = '2019/20'
        
    if value in pd.date_range('08/01/2018','05/31/2019'):
        df.at[index, 'YearOfSeason'] = '2018/19'
        
    if value in pd.date_range('08/01/2017','05/31/2018'):
        df.at[index, 'YearOfSeason'] = '2017/18'
        
    if value in pd.date_range('08/01/2016','05/31/2017'):
        df.at[index, 'YearOfSeason'] = '2016/17'
        
    if value in pd.date_range('08/01/2015','05/31/2016'):
        df.at[index, 'YearOfSeason'] = '2015/16'
        
    if value in pd.date_range('08/01/2014','05/31/2015'):
        df.at[index, 'YearOfSeason'] = '2014/15'

#### Home Team Encoding

In [52]:
for index, value in df["HomeTeam"].iteritems():
    if value == "Arsenal":
        df.at[index, "HomeTeam Encoding"] = 0
        
    if value == "Ashton Villa":
        df.at[index, "HomeTeam Encoding"] = 1
        
    if value == "Bournemouth":
        df.at[index, "HomeTeam Encoding"] = 2
        
    if value == "Brighton":
        df.at[index, "HomeTeam Encoding"] = 3
        
    if value == "Cardiff":
        df.at[index, "HomeTeam Encoding"] = 4
        
    if value == "Chelsea":
        df.at[index, "HomeTeam Encoding"] = 5    
        
    if value == "Crystal Palace":
        df.at[index, "HomeTeam Encoding"] = 6
        
    if value == "Everton":
        df.at[index, "HomeTeam Encoding"] = 7
            
    if value == "Fulham":
        df.at[index, "HomeTeam Encoding"] = 8   
            
    if value == "Huddersfield":
        df.at[index, "HomeTeam Encoding"] = 9        
            
    if value == "Hull":
        df.at[index, "HomeTeam Encoding"] = 10        
            
    if value == "Leeds":
        df.at[index, "HomeTeam Encoding"] = 11
            
    if value == "Leicester":
        df.at[index, "HomeTeam Encoding"] = 12     
        
    if value == "Liverpool":
        df.at[index, "HomeTeam Encoding"] = 13
            
    if value == "Man City":
        df.at[index, "HomeTeam Encoding"] = 14
            
    if value == "Man United":
        df.at[index, "HomeTeam Encoding"] = 15     
        
    if value == "Middlesbrough":
        df.at[index, "HomeTeam Encoding"] = 16 
    
    if value == "Newcastle":
        df.at[index, "HomeTeam Encoding"] = 17    
            
    if value == "Norwich":
            df.at[index, "HomeTeam Encoding"] = 18       
            
    if value == "QPR":
        df.at[index, "HomeTeam Encoding"] = 19
        
    if value == "Sheffield United":
        df.at[index, "HomeTeam Encoding"] = 20
        
    if value == "Southampton":
        df.at[index, "HomeTeam Encoding"] = 21
        
    if value == "Stoke":
        df.at[index, "HomeTeam Encoding"] = 22
        
    if value == "Sunderland":
        df.at[index, "HomeTeam Encoding"] = 23
        
    if value == "Swansea":
        df.at[index, "HomeTeam Encoding"] = 24 
        
    if value == "Tottenham":
        df.at[index, "HomeTeam Encoding"] = 25
        
    if value == "Watford":
        df.at[index, "HomeTeam Encoding"] = 26
        
    if value == "West Brom":
        df.at[index, "HomeTeam Encoding"] = 27
        
    if value == "West Ham":
        df.at[index, "HomeTeam Encoding"] = 28
        
    if value == "Wolves":
        df.at[index, "HomeTeam Encoding"] = 29

#### Away Team Encoding

In [53]:
for index, value in df["AwayTeam"].iteritems():
    if value == "Arsenal":
        df.at[index, "AwayTeam Encoding"] = 0
        
    if value == "Ashton Villa":
        df.at[index, "AwayTeam Encoding"] = 1
        
    if value == "Bournemouth":
        df.at[index, "AwayTeam Encoding"] = 2
        
    if value == "Brighton":
        df.at[index, "AwayTeam Encoding"] = 3
        
    if value == "Cardiff":
        df.at[index, "AwayTeam Encoding"] = 4
        
    if value == "Chelsea":
        df.at[index, "AwayTeam Encoding"] = 5    
        
    if value == "Crystal Palace":
        df.at[index, "AwayTeam Encoding"] = 6
        
    if value == "Everton":
        df.at[index, "AwayTeam Encoding"] = 7
            
    if value == "Fulham":
        df.at[index, "AwayTeam Encoding"] = 8   
            
    if value == "Huddersfield":
        df.at[index, "AwayTeam Encoding"] = 9        
            
    if value == "Hull":
        df.at[index, "AwayTeam Encoding"] = 10        
            
    if value == "Leeds":
        df.at[index, "AwayTeam Encoding"] = 11
            
    if value == "Leicester":
        df.at[index, "AwayTeam Encoding"] = 12     
        
    if value == "Liverpool":
        df.at[index, "AwayTeam Encoding"] = 13
            
    if value == "Man City":
        df.at[index, "AwayTeam Encoding"] = 14
            
    if value == "Man United":
        df.at[index, "AwayTeam Encoding"] = 15     
        
    if value == "Middlesbrough":
        df.at[index, "AwayTeam Encoding"] = 16 
    
    if value == "Newcastle":
        df.at[index, "AwayTeam Encoding"] = 17    
            
    if value == "Norwich":
            df.at[index, "AwayTeam Encoding"] = 18       
            
    if value == "QPR":
        df.at[index, "AwayTeam Encoding"] = 19
        
    if value == "Sheffield United":
        df.at[index, "AwayTeam Encoding"] = 20
        
    if value == "Southampton":
        df.at[index, "AwayTeam Encoding"] = 21
        
    if value == "Stoke":
        df.at[index, "AwayTeam Encoding"] = 22
        
    if value == "Sunderland":
        df.at[index, "AwayTeam Encoding"] = 23
        
    if value == "Swansea":
        df.at[index, "AwayTeam Encoding"] = 24 
        
    if value == "Tottenham":
        df.at[index, "AwayTeam Encoding"] = 25
        
    if value == "Watford":
        df.at[index, "AwayTeam Encoding"] = 26
        
    if value == "West Brom":
        df.at[index, "AwayTeam Encoding"] = 27
        
    if value == "West Ham":
        df.at[index, "AwayTeam Encoding"] = 28
        
    if value == "Wolves":
        df.at[index, "AwayTeam Encoding"] = 29

### Full Time Result Encoding

- Home Team Won: 0

- Away Team Won: 1

- Draw: 2

In [54]:
df["FTR Encoding"] = -1

for index, value in df.iterrows():
    
    if value["FTR"] == "H":
        df.at[index, "FTR Encoding"] = 0
        
    elif value["FTR"] == "A":
        df.at[index, "FTR Encoding"] = 1
        
    elif value["FTR"] == "D":
        df.at[index, "FTR Encoding"] = 2
    
    else:
        pass

### Halftime Time Result Encoding

- Home Team Won: 0

- Away Team Won: 1

- Draw: 2

In [55]:
df["HTR Encoding"] = -1

for index, value in df.iterrows():
    
    if value["HTR"] == "H":
        df.at[index, "HTR Encoding"] = 0
        
    elif value["HTR"] == "A":
        df.at[index, "HTR Encoding"] = 1
        
    elif value["HTR"] == "D":
        df.at[index, "HTR Encoding"] = 2
    
    else:
        pass

### Referee Encoding

- C Kavanagh: 0

- R Madley: 1

- C Foy: 2

- P Bankes: 3

- L Mason: 4

- P Tierney: 5

- K Stroud: 6

- C Kavanagh: 7

- l Mason: 8

- D Coote: 9

- A Taylor: 10

- S Attwell: 11

- O Langford: 12

- P Dowd: 13

- A Marriner: 14

- L Probert: 15

- M Atkinson: 16

- R Jones: 17

- R Madley: 18

- K Friend: 19

- A Madley: 20 

- G Scott: 21

- D England: 22

- M Dean: 23

- M Jones: 24

- C Pawson: 25

- R East: 26

- J Moss: 27

- M Oliver: 28

- T Robinson: 29

- M Clattenburg: 30

- S Scott: 31

In [56]:
df["Referee Encoding"] = -1

for index, value in df.iterrows():
    
    if value["Referee"] == "S Hooper":
        df.at[index, "Referee Encoding"] = 0
        
    elif value["Referee"] == "N Swarbrick":
        df.at[index, "Referee Encoding"] = 1
        
    elif value["Referee"] == "C Foy":
        df.at[index, "Referee Encoding"] = 2
        
    elif value["Referee"] == "P Bankes":
        df.at[index, "Referee Encoding"] = 3
        
    elif value["Referee"] == "L Mason":
        df.at[index, "Referee Encoding"] = 4
        
    elif value["Referee"] == "P Tierney":
        df.at[index, "Referee Encoding"] = 5
        
    elif value["Referee"] == "K Stroud":
        df.at[index, "Referee Encoding"] = 6
        
    elif value["Referee"] == "C Kavanagh":
        df.at[index, "Referee Encoding"] = 7
        
    elif value["Referee"] == "l Mason":
        df.at[index, "Referee Encoding"] = 8
        
    elif value["Referee"] == "D Coote":
        df.at[index, "Referee Encoding"] = 9
        
    elif value["Referee"] == "A Taylor":
        df.at[index, "Referee Encoding"] = 10
        
    elif value["Referee"] == "S Attwell":
        df.at[index, "Referee Encoding"] = 11
        
    elif value["Referee"] == "O Langford":
        df.at[index, "Referee Encoding"] = 12
        
    elif value["Referee"] == "P Dowd":
        df.at[index, "Referee Encoding"] = 13
        
    elif value["Referee"] == "A Marriner":
        df.at[index, "Referee Encoding"] = 14
        
    elif value["Referee"] == "L Probert":
        df.at[index, "Referee Encoding"] = 15
        
    elif value["Referee"] == "M Atkinson":
        df.at[index, "Referee Encoding"] = 16
        
    elif value["Referee"] == "R Jones":
        df.at[index, "Referee Encoding"] = 17
        
    elif value["Referee"] == "R Madley":
        df.at[index, "Referee Encoding"] = 18
        
    elif value["Referee"] == "K Friend":
        df.at[index, "Referee Encoding"] = 19
        
    elif value["Referee"] == "A Madley":
        df.at[index, "Referee Encoding"] = 20
        
    elif value["Referee"] == "G Scott":
        df.at[index, "Referee Encoding"] = 21
        
    elif value["Referee"] == "D England":
        df.at[index, "Referee Encoding"] = 22
        
    elif value["Referee"] == "M Dean":
        df.at[index, "Referee Encoding"] = 23
        
    elif value["Referee"] == "M Jones":
        df.at[index, "Referee Encoding"] = 24
        
    elif value["Referee"] == "C Pawson":
        df.at[index, "Referee Encoding"] = 25
        
    elif value["Referee"] == "R East":
        df.at[index, "Referee Encoding"] = 26
        
    elif value["Referee"] == "J Moss":
        df.at[index, "Referee Encoding"] = 27
        
    elif value["Referee"] == "M Oliver":
        df.at[index, "Referee Encoding"] = 28
        
    elif value["Referee"] == "T Robinson":
        df.at[index, "Referee Encoding"] = 29
        
    elif value["Referee"] == "M Clattenburg":
        df.at[index, "Referee Encoding"] = 30
        
    else:
        df.at[index, "Referee Encoding"] = 31

In [57]:
df

Unnamed: 0,Season,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,...,AY,HR,AR,Season Encoding,HomeTeam Encoding,AwayTeam Encoding,YearOfSeason,FTR Encoding,HTR Encoding,Referee Encoding
0,Fall,12/09/2020,12:30,Fulham,Arsenal,0.0,3.0,A,0.0,1.0,...,2.0,0.0,0.0,0,8,0,2020/21,1,1,7
1,Fall,12/09/2020,15:00,Crystal Palace,Southampton,1.0,0.0,H,1.0,0.0,...,1.0,0.0,0.0,0,6,21,2020/21,0,0,27
2,Fall,12/09/2020,17:30,Liverpool,Leeds,4.0,3.0,H,3.0,2.0,...,0.0,0.0,0.0,0,13,11,2020/21,0,0,28
3,Fall,12/09/2020,20:00,West Ham,Newcastle,0.0,2.0,A,0.0,0.0,...,2.0,0.0,0.0,0,28,17,2020/21,1,2,11
4,Fall,13/09/2020,14:00,West Brom,Leicester,0.0,3.0,A,0.0,0.0,...,1.0,0.0,0.0,0,27,12,2020/21,1,2,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2564,Spring,24/05/15,,Everton,Tottenham,0.0,1.0,A,0.0,1.0,...,2.0,0.0,0.0,2,7,25,2014/15,1,1,27
2565,Spring,24/05/15,,Hull,Man United,0.0,0.0,D,0.0,0.0,...,2.0,0.0,1.0,2,10,15,2014/15,2,2,15
2566,Spring,24/05/15,,Leicester,QPR,5.0,1.0,H,2.0,0.0,...,0.0,0.0,0.0,2,12,19,2014/15,0,0,28
2567,Spring,24/05/15,,Man City,Southampton,2.0,0.0,H,1.0,0.0,...,1.0,0.0,0.0,2,14,21,2014/15,0,0,2


### Reorder Columns

In [58]:
cols = df.columns.tolist()
cols

['Season',
 'Date',
 'Time',
 'HomeTeam',
 'AwayTeam',
 'FTHG',
 'FTAG',
 'FTR',
 'HTHG',
 'HTAG',
 'HTR',
 'Referee',
 'Fouls Called Per Game',
 'HS',
 'AS',
 'HST',
 'AST',
 'HF',
 'AF',
 'HC',
 'AC',
 'HY',
 'AY',
 'HR',
 'AR',
 'Season Encoding',
 'HomeTeam Encoding',
 'AwayTeam Encoding',
 'YearOfSeason',
 'FTR Encoding',
 'HTR Encoding',
 'Referee Encoding']

In [59]:
new_order = ['Season',
 'Season Encoding',
 'Date',
 'YearOfSeason',
 'Time',
 'HomeTeam',
 'HomeTeam Encoding',
 'AwayTeam',
 'AwayTeam Encoding',
 'FTHG',
 'FTAG',
 'FTR',
 'FTR Encoding',
 'HTHG',
 'HTAG',
 'HTR',
 'HTR Encoding',
 'Referee',
 'Referee Encoding',
 'Fouls Called Per Game',
 'HS',
 'AS',
 'HST',
 'AST',
 'HF',
 'AF',
 'HC',
 'AC',
 'HY',
 'AY',
 'HR',
 'AR']

df = df[new_order]

df

Unnamed: 0,Season,Season Encoding,Date,YearOfSeason,Time,HomeTeam,HomeTeam Encoding,AwayTeam,AwayTeam Encoding,FTHG,...,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR
0,Fall,0,12/09/2020,2020/21,12:30,Fulham,8,Arsenal,0,0.0,...,2.0,6.0,12.0,12.0,2.0,3.0,2.0,2.0,0.0,0.0
1,Fall,0,12/09/2020,2020/21,15:00,Crystal Palace,6,Southampton,21,1.0,...,3.0,5.0,14.0,11.0,7.0,3.0,2.0,1.0,0.0,0.0
2,Fall,0,12/09/2020,2020/21,17:30,Liverpool,13,Leeds,11,4.0,...,6.0,3.0,9.0,6.0,9.0,0.0,1.0,0.0,0.0,0.0
3,Fall,0,12/09/2020,2020/21,20:00,West Ham,28,Newcastle,17,0.0,...,3.0,2.0,13.0,7.0,8.0,7.0,2.0,2.0,0.0,0.0
4,Fall,0,13/09/2020,2020/21,14:00,West Brom,27,Leicester,12,0.0,...,1.0,7.0,12.0,9.0,2.0,5.0,1.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2564,Spring,2,24/05/15,2014/15,,Everton,7,Tottenham,25,0.0,...,1.0,3.0,12.0,8.0,3.0,5.0,1.0,2.0,0.0,0.0
2565,Spring,2,24/05/15,2014/15,,Hull,10,Man United,15,0.0,...,6.0,1.0,12.0,15.0,8.0,1.0,2.0,2.0,0.0,1.0
2566,Spring,2,24/05/15,2014/15,,Leicester,12,QPR,19,5.0,...,7.0,2.0,7.0,6.0,5.0,6.0,0.0,0.0,0.0,0.0
2567,Spring,2,24/05/15,2014/15,,Man City,14,Southampton,21,2.0,...,6.0,4.0,13.0,8.0,8.0,4.0,1.0,1.0,0.0,0.0


# Creates a Dataset For Time

### Time Encoding
**Afternoon** 12:00, 12:30

    Afternoon: 0

**Mid-Day**: 14:00, 14:05, 14:15, 15:00, 16:00, 16:15, 16:30 

    Mid-Day: 1

**Late-Day**: 15:00, 17:30, 17:45, 18:00, 19:00, 19:15, 19:30, 19:45, 20:00, 20:15

    Late-Day: 2

In [60]:
time_df = df.dropna()
time_df

Unnamed: 0,Season,Season Encoding,Date,YearOfSeason,Time,HomeTeam,HomeTeam Encoding,AwayTeam,AwayTeam Encoding,FTHG,...,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR
0,Fall,0,12/09/2020,2020/21,12:30,Fulham,8,Arsenal,0,0.0,...,2.0,6.0,12.0,12.0,2.0,3.0,2.0,2.0,0.0,0.0
1,Fall,0,12/09/2020,2020/21,15:00,Crystal Palace,6,Southampton,21,1.0,...,3.0,5.0,14.0,11.0,7.0,3.0,2.0,1.0,0.0,0.0
2,Fall,0,12/09/2020,2020/21,17:30,Liverpool,13,Leeds,11,4.0,...,6.0,3.0,9.0,6.0,9.0,0.0,1.0,0.0,0.0,0.0
3,Fall,0,12/09/2020,2020/21,20:00,West Ham,28,Newcastle,17,0.0,...,3.0,2.0,13.0,7.0,8.0,7.0,2.0,2.0,0.0,0.0
4,Fall,0,13/09/2020,2020/21,14:00,West Brom,27,Leicester,12,0.0,...,1.0,7.0,12.0,9.0,2.0,5.0,1.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
665,Summer,3,26/07/2020,2019/20,16:00,Leicester,12,Man United,15,0.0,...,3.0,3.0,12.0,11.0,3.0,3.0,1.0,4.0,1.0,0.0
666,Summer,3,26/07/2020,2019/20,16:00,Man City,14,Norwich,18,5.0,...,10.0,4.0,7.0,4.0,9.0,0.0,1.0,1.0,0.0,0.0
667,Summer,3,26/07/2020,2019/20,16:00,Newcastle,17,Liverpool,13,1.0,...,2.0,6.0,11.0,5.0,2.0,4.0,1.0,0.0,0.0,0.0
668,Summer,3,26/07/2020,2019/20,16:00,Southampton,21,Sheffield United,20,3.0,...,4.0,3.0,9.0,16.0,9.0,1.0,0.0,1.0,0.0,0.0


In [61]:
time_df["Time"].value_counts()

15:00    153
20:00     90
17:30     67
18:00     62
12:30     57
16:30     55
14:00     54
20:15     41
12:00     22
19:15     18
19:30     18
16:00     11
19:00      7
14:15      7
19:45      5
16:15      1
14:05      1
17:45      1
Name: Time, dtype: int64

In [62]:
time_categories = {"Time": 
                   {
                       "12:00": "Afternoon",
                       "12:30": "Afternoon",
                       
                       "14:00": "Mid-Day",
                       "14:05": "Mid-Day",
                       "14:15": "Mid-Day",
                       "15:00": "Mid-Day",
                       "16:00": "Mid-Day",
                       "16:15": "Mid-Day",
                       "16:30": "Mid-Day",
                       
                       "15:00": "Late-Day",
                       "17:30": "Late-Day",
                       "17:45": "Late-Day",
                       "18:00": "Late-Day",
                       "19:00": "Late-Day",
                       "19:15": "Late-Day",
                       "19:30": "Late-Day",
                       "19:45": "Late-Day",
                       "20:00": "Late-Day",
                       "20:15": "Late-Day",
                   }
                  }

time_df = time_df.replace(time_categories)
time_df

Unnamed: 0,Season,Season Encoding,Date,YearOfSeason,Time,HomeTeam,HomeTeam Encoding,AwayTeam,AwayTeam Encoding,FTHG,...,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR
0,Fall,0,12/09/2020,2020/21,Afternoon,Fulham,8,Arsenal,0,0.0,...,2.0,6.0,12.0,12.0,2.0,3.0,2.0,2.0,0.0,0.0
1,Fall,0,12/09/2020,2020/21,Late-Day,Crystal Palace,6,Southampton,21,1.0,...,3.0,5.0,14.0,11.0,7.0,3.0,2.0,1.0,0.0,0.0
2,Fall,0,12/09/2020,2020/21,Late-Day,Liverpool,13,Leeds,11,4.0,...,6.0,3.0,9.0,6.0,9.0,0.0,1.0,0.0,0.0,0.0
3,Fall,0,12/09/2020,2020/21,Late-Day,West Ham,28,Newcastle,17,0.0,...,3.0,2.0,13.0,7.0,8.0,7.0,2.0,2.0,0.0,0.0
4,Fall,0,13/09/2020,2020/21,Mid-Day,West Brom,27,Leicester,12,0.0,...,1.0,7.0,12.0,9.0,2.0,5.0,1.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
665,Summer,3,26/07/2020,2019/20,Mid-Day,Leicester,12,Man United,15,0.0,...,3.0,3.0,12.0,11.0,3.0,3.0,1.0,4.0,1.0,0.0
666,Summer,3,26/07/2020,2019/20,Mid-Day,Man City,14,Norwich,18,5.0,...,10.0,4.0,7.0,4.0,9.0,0.0,1.0,1.0,0.0,0.0
667,Summer,3,26/07/2020,2019/20,Mid-Day,Newcastle,17,Liverpool,13,1.0,...,2.0,6.0,11.0,5.0,2.0,4.0,1.0,0.0,0.0,0.0
668,Summer,3,26/07/2020,2019/20,Mid-Day,Southampton,21,Sheffield United,20,3.0,...,4.0,3.0,9.0,16.0,9.0,1.0,0.0,1.0,0.0,0.0


In [63]:
time_df["Time Encoding"] = 0

for index, value in time_df.iterrows():
    
    if value["Time"] == "Afternoon":
        time_df.at[index, "Time Encoding"] = 0
        
    elif value["Time"] == "Mid-Day":
        time_df.at[index, "Time Encoding"] = 1
        
    else:
        time_df.at[index, "Time Encoding"] = 2

In [64]:
time_df

Unnamed: 0,Season,Season Encoding,Date,YearOfSeason,Time,HomeTeam,HomeTeam Encoding,AwayTeam,AwayTeam Encoding,FTHG,...,AST,HF,AF,HC,AC,HY,AY,HR,AR,Time Encoding
0,Fall,0,12/09/2020,2020/21,Afternoon,Fulham,8,Arsenal,0,0.0,...,6.0,12.0,12.0,2.0,3.0,2.0,2.0,0.0,0.0,0
1,Fall,0,12/09/2020,2020/21,Late-Day,Crystal Palace,6,Southampton,21,1.0,...,5.0,14.0,11.0,7.0,3.0,2.0,1.0,0.0,0.0,2
2,Fall,0,12/09/2020,2020/21,Late-Day,Liverpool,13,Leeds,11,4.0,...,3.0,9.0,6.0,9.0,0.0,1.0,0.0,0.0,0.0,2
3,Fall,0,12/09/2020,2020/21,Late-Day,West Ham,28,Newcastle,17,0.0,...,2.0,13.0,7.0,8.0,7.0,2.0,2.0,0.0,0.0,2
4,Fall,0,13/09/2020,2020/21,Mid-Day,West Brom,27,Leicester,12,0.0,...,7.0,12.0,9.0,2.0,5.0,1.0,1.0,0.0,0.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
665,Summer,3,26/07/2020,2019/20,Mid-Day,Leicester,12,Man United,15,0.0,...,3.0,12.0,11.0,3.0,3.0,1.0,4.0,1.0,0.0,1
666,Summer,3,26/07/2020,2019/20,Mid-Day,Man City,14,Norwich,18,5.0,...,4.0,7.0,4.0,9.0,0.0,1.0,1.0,0.0,0.0,1
667,Summer,3,26/07/2020,2019/20,Mid-Day,Newcastle,17,Liverpool,13,1.0,...,6.0,11.0,5.0,2.0,4.0,1.0,0.0,0.0,0.0,1
668,Summer,3,26/07/2020,2019/20,Mid-Day,Southampton,21,Sheffield United,20,3.0,...,3.0,9.0,16.0,9.0,1.0,0.0,1.0,0.0,0.0,1


In [65]:
cols = time_df.columns.tolist()
cols

['Season',
 'Season Encoding',
 'Date',
 'YearOfSeason',
 'Time',
 'HomeTeam',
 'HomeTeam Encoding',
 'AwayTeam',
 'AwayTeam Encoding',
 'FTHG',
 'FTAG',
 'FTR',
 'FTR Encoding',
 'HTHG',
 'HTAG',
 'HTR',
 'HTR Encoding',
 'Referee',
 'Referee Encoding',
 'Fouls Called Per Game',
 'HS',
 'AS',
 'HST',
 'AST',
 'HF',
 'AF',
 'HC',
 'AC',
 'HY',
 'AY',
 'HR',
 'AR',
 'Time Encoding']

In [66]:
new_order = ['Season',
 'Season Encoding',
 'Date',
 'YearOfSeason',
 'Time',
 'Time Encoding',
 'HomeTeam',
 'HomeTeam Encoding',
 'AwayTeam',
 'AwayTeam Encoding',
 'FTHG',
 'FTAG',
 'FTR',
 'FTR Encoding',
 'HTHG',
 'HTAG',
 'HTR',
 'Referee',
 'Referee Encoding',
 'Fouls Called Per Game',
 'HS',
 'AS',
 'HST',
 'AST',
 'HF',
 'AF',
 'HC',
 'AC',
 'HY',
 'AY',
 'HR',
 'AR']

time_df = time_df[new_order]
time_df

Unnamed: 0,Season,Season Encoding,Date,YearOfSeason,Time,Time Encoding,HomeTeam,HomeTeam Encoding,AwayTeam,AwayTeam Encoding,...,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR
0,Fall,0,12/09/2020,2020/21,Afternoon,0,Fulham,8,Arsenal,0,...,2.0,6.0,12.0,12.0,2.0,3.0,2.0,2.0,0.0,0.0
1,Fall,0,12/09/2020,2020/21,Late-Day,2,Crystal Palace,6,Southampton,21,...,3.0,5.0,14.0,11.0,7.0,3.0,2.0,1.0,0.0,0.0
2,Fall,0,12/09/2020,2020/21,Late-Day,2,Liverpool,13,Leeds,11,...,6.0,3.0,9.0,6.0,9.0,0.0,1.0,0.0,0.0,0.0
3,Fall,0,12/09/2020,2020/21,Late-Day,2,West Ham,28,Newcastle,17,...,3.0,2.0,13.0,7.0,8.0,7.0,2.0,2.0,0.0,0.0
4,Fall,0,13/09/2020,2020/21,Mid-Day,1,West Brom,27,Leicester,12,...,1.0,7.0,12.0,9.0,2.0,5.0,1.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
665,Summer,3,26/07/2020,2019/20,Mid-Day,1,Leicester,12,Man United,15,...,3.0,3.0,12.0,11.0,3.0,3.0,1.0,4.0,1.0,0.0
666,Summer,3,26/07/2020,2019/20,Mid-Day,1,Man City,14,Norwich,18,...,10.0,4.0,7.0,4.0,9.0,0.0,1.0,1.0,0.0,0.0
667,Summer,3,26/07/2020,2019/20,Mid-Day,1,Newcastle,17,Liverpool,13,...,2.0,6.0,11.0,5.0,2.0,4.0,1.0,0.0,0.0,0.0
668,Summer,3,26/07/2020,2019/20,Mid-Day,1,Southampton,21,Sheffield United,20,...,4.0,3.0,9.0,16.0,9.0,1.0,0.0,1.0,0.0,0.0


### Removes Time From Original Dataset

In [67]:
df = df.drop(columns=["Time"])
df

Unnamed: 0,Season,Season Encoding,Date,YearOfSeason,HomeTeam,HomeTeam Encoding,AwayTeam,AwayTeam Encoding,FTHG,FTAG,...,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR
0,Fall,0,12/09/2020,2020/21,Fulham,8,Arsenal,0,0.0,3.0,...,2.0,6.0,12.0,12.0,2.0,3.0,2.0,2.0,0.0,0.0
1,Fall,0,12/09/2020,2020/21,Crystal Palace,6,Southampton,21,1.0,0.0,...,3.0,5.0,14.0,11.0,7.0,3.0,2.0,1.0,0.0,0.0
2,Fall,0,12/09/2020,2020/21,Liverpool,13,Leeds,11,4.0,3.0,...,6.0,3.0,9.0,6.0,9.0,0.0,1.0,0.0,0.0,0.0
3,Fall,0,12/09/2020,2020/21,West Ham,28,Newcastle,17,0.0,2.0,...,3.0,2.0,13.0,7.0,8.0,7.0,2.0,2.0,0.0,0.0
4,Fall,0,13/09/2020,2020/21,West Brom,27,Leicester,12,0.0,3.0,...,1.0,7.0,12.0,9.0,2.0,5.0,1.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2564,Spring,2,24/05/15,2014/15,Everton,7,Tottenham,25,0.0,1.0,...,1.0,3.0,12.0,8.0,3.0,5.0,1.0,2.0,0.0,0.0
2565,Spring,2,24/05/15,2014/15,Hull,10,Man United,15,0.0,0.0,...,6.0,1.0,12.0,15.0,8.0,1.0,2.0,2.0,0.0,1.0
2566,Spring,2,24/05/15,2014/15,Leicester,12,QPR,19,5.0,1.0,...,7.0,2.0,7.0,6.0,5.0,6.0,0.0,0.0,0.0,0.0
2567,Spring,2,24/05/15,2014/15,Man City,14,Southampton,21,2.0,0.0,...,6.0,4.0,13.0,8.0,8.0,4.0,1.0,1.0,0.0,0.0


# Saves Datasets Separately For Future Use

In [68]:
df.to_csv(r'League_Result_Data/Encoded_PremierLeague_Stats_From_2014to2021.csv')

time_df.to_csv(r'League_Result_Data/TimeEncoded_PremierLeague_Stats_From_2014to2021.csv')