## Create Running Season Averages Dataset

This dataset will contain the running averages for a team during the season. Each row will contain the teams averages coming into the match for relevant statistical categories such as goals scored, penalties commited, etc.

In [10]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import chardet
import warnings

#### Load dataset

In [11]:
guardian_kaggle_combined_dataset_dir = f"../../../source/processed/Guardian_Kaggle_Combined_Dataset.csv"

with open(guardian_kaggle_combined_dataset_dir, 'rb') as f:
    encoding = chardet.detect(f.read())['encoding']

df = pd.read_csv(guardian_kaggle_combined_dataset_dir, encoding=encoding)

warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning)

#### Create dataset

In [12]:
# Sort df by date
df = df.sort_values(by='DateTime', ascending=True)

# All metrics to be calculated
metrics = [
    'Total_Games_Played', 'Total_Games_Played_Home', 'Total_Games_Played_Away',
    'Full_Time_Goals_Scored', 'Full_Time_Goals_Scored_Home', 'Full_Time_Goals_Scored_Away',
    'Full_Time_Goals_Conceded', 'Full_Time_Goals_Conceded_Home', 'Full_Time_Goals_Conceded_Away',
    'Half_Time_Goals_Scored', 'Half_Time_Goals_Scored_Home', 'Half_Time_Goals_Scored_Away',
    'Half_Time_Goals_Conceded', 'Half_Time_Goals_Conceded_Home', 'Half_Time_Goals_Conceded_Away',
    'Shots', 'Shots_Home', 'Shots_Away',
    'Oponent_Shots', 'Oponent_Shots_Home', 'Oponent_Shots_Away',
    'Shots_On_Target', 'Shots_On_Target_Home', 'Shots_On_Target_Away',
    'Oponent_Shots_On_Target', 'Oponent_Shots_On_Target_Home', 'Oponent_Shots_On_Target_Away',
    'Accuracy', 'Accuracy_Home', 'Accuracy_Away',
    'Oponent_Accuracy', 'Oponent_Accuracy_Home', 'Oponent_Accuracy_Away',
    'Saves', 'Saves_Home', 'Saves_Away',
    'Save_Percent', 'Save_Percent_Home', 'Save_Percent_Away',
    'Corners', 'Corners_Home', 'Corners_Away',
    'Fouls', 'Fouls_Home', 'Fouls_Away',
    'Yellow_Cards', 'Yellow_Cards_Home', 'Yellow_Cards_Away',
    'Red_Cards', 'Red_Cards_Home', 'Red_Cards_Away',
    'Win_Percent', 'Win_Percent_Home', 'Win_Percent_Away',
    'Draw_Percent', 'Draw_Percent_Home', 'Draw_Percent_Away',
    'Loss_Percent', 'Loss_Percent_Home', 'Loss_Percent_Away'
]

helper_season_total_metrics = [
    'Full_Time_Goals_Scored', 'Full_Time_Goals_Scored_Home', 'Full_Time_Goals_Scored_Away',
    'Full_Time_Goals_Conceded', 'Full_Time_Goals_Conceded_Home', 'Full_Time_Goals_Conceded_Away',
    'Half_Time_Goals_Scored', 'Half_Time_Goals_Scored_Home', 'Half_Time_Goals_Scored_Away',
    'Half_Time_Goals_Conceded', 'Half_Time_Goals_Conceded_Home', 'Half_Time_Goals_Conceded_Away',
    'Shots', 'Shots_Home', 'Shots_Away',
    'Oponent_Shots', 'Oponent_Shots_Home', 'Oponent_Shots_Away',
    'Shots_On_Target', 'Shots_On_Target_Home', 'Shots_On_Target_Away',
    'Oponent_Shots_On_Target', 'Oponent_Shots_On_Target_Home', 'Oponent_Shots_On_Target_Away',
    'Saves', 'Saves_Home', 'Saves_Away',
    'Corners', 'Corners_Home', 'Corners_Away',
    'Fouls', 'Fouls_Home', 'Fouls_Away',
    'Yellow_Cards', 'Yellow_Cards_Home', 'Yellow_Cards_Away',
    'Red_Cards', 'Red_Cards_Home', 'Red_Cards_Away',
    'Wins', 'Wins_Home', 'Wins_Away',
    'Draws', 'Draws_Home', 'Draws_Away',
    'Losses', 'Losses_Home', 'Losses_Away'
]

# Add metrics to df
for metric in metrics:
    df[f"HomeTeam_{metric}"] = 0.0
    df[f"AwayTeam_{metric}"] = 0.0

# Create a dictionary to store metrics for each team in each season
metric_dict = {}
for season in df['Season'].unique():
    metric_dict[season] = {}

# Loop through every match in df
for idx, row in df.iterrows():

    # Get the season, home team, and away team
    season = row['Season']
    home_team = row['Home']
    away_team = row['Away']

    # Initialize home team
    if home_team not in metric_dict[row['Season']]:
        metric_dict[season][home_team] = {}
        for metric in metrics:
            metric_dict[season][home_team][metric] = 0.0
        for metric in helper_season_total_metrics:
            metric_dict[season][home_team][f"SeasonTotal_{metric}"] = 0.0
    
    # If home team already initialized, report metrics leading up to the match
    else:
        for metric in metrics:
            df.at[idx, f"HomeTeam_{metric}"] = metric_dict[season][home_team][metric]

    # Initialize away team
    if away_team not in metric_dict[row['Season']]:
        metric_dict[season][away_team] = {}
        for metric in metrics:
            metric_dict[season][away_team][metric] = 0
        for metric in helper_season_total_metrics:
            metric_dict[season][away_team][f"SeasonTotal_{metric}"] = 0
    
    # If away team already initialized, report metrics leading up to the match
    else:
        for metric in metrics:
            df.at[idx, f"AwayTeam_{metric}"] = metric_dict[season][away_team][metric]

    # Update helper metrics after the match for the home team
    metric_dict[season][home_team]['SeasonTotal_Full_Time_Goals_Scored'] += row['FTHG']
    metric_dict[season][home_team]['SeasonTotal_Full_Time_Goals_Scored_Home'] += row['FTHG']
    metric_dict[season][home_team]['SeasonTotal_Full_Time_Goals_Conceded'] += row['FTAG']
    metric_dict[season][home_team]['SeasonTotal_Full_Time_Goals_Conceded_Home'] += row['FTAG']
    metric_dict[season][home_team]['SeasonTotal_Half_Time_Goals_Scored'] += row['HTHG']
    metric_dict[season][home_team]['SeasonTotal_Half_Time_Goals_Scored_Home'] += row['HTHG']
    metric_dict[season][home_team]['SeasonTotal_Half_Time_Goals_Conceded'] += row['HTAG']
    metric_dict[season][home_team]['SeasonTotal_Half_Time_Goals_Conceded_Home'] += row['HTAG']
    metric_dict[season][home_team]['SeasonTotal_Shots'] += row['HS']
    metric_dict[season][home_team]['SeasonTotal_Shots_Home'] += row['HS']
    metric_dict[season][home_team]['SeasonTotal_Oponent_Shots'] += row['AS']
    metric_dict[season][home_team]['SeasonTotal_Oponent_Shots_Home'] += row['AS']
    metric_dict[season][home_team]['SeasonTotal_Shots_On_Target'] += row['HST']
    metric_dict[season][home_team]['SeasonTotal_Shots_On_Target_Home'] += row['HST']
    metric_dict[season][home_team]['SeasonTotal_Oponent_Shots_On_Target'] += row['AST']
    metric_dict[season][home_team]['SeasonTotal_Oponent_Shots_On_Target_Home'] += row['AST']
    home_saves = max(0, row['AST'] - row['FTAG'])
    metric_dict[season][home_team]['SeasonTotal_Saves'] += home_saves
    metric_dict[season][home_team]['SeasonTotal_Saves_Home'] += home_saves
    metric_dict[season][home_team]['SeasonTotal_Corners'] += row['HC']
    metric_dict[season][home_team]['SeasonTotal_Corners_Home'] += row['HC']
    metric_dict[season][home_team]['SeasonTotal_Fouls'] += row['HF']
    metric_dict[season][home_team]['SeasonTotal_Fouls_Home'] += row['HF']
    metric_dict[season][home_team]['SeasonTotal_Yellow_Cards'] += row['HY']
    metric_dict[season][home_team]['SeasonTotal_Yellow_Cards_Home'] += row['HY']
    metric_dict[season][home_team]['SeasonTotal_Red_Cards'] += row['HR']
    metric_dict[season][home_team]['SeasonTotal_Red_Cards_Home'] += row['HR']
    if row['FTR'] == 'H':
        metric_dict[season][home_team]['SeasonTotal_Wins'] += 1
        metric_dict[season][home_team]['SeasonTotal_Wins_Home'] += 1
    elif row['FTR'] == 'D':
        metric_dict[season][home_team]['SeasonTotal_Draws'] += 1
        metric_dict[season][home_team]['SeasonTotal_Draws_Home'] += 1
    else:
        metric_dict[season][home_team]['SeasonTotal_Losses'] += 1
        metric_dict[season][home_team]['SeasonTotal_Losses_Home'] += 1
    
    # Update metrics after the match for the home team
    metric_dict[season][home_team]['Total_Games_Played'] += 1
    metric_dict[season][home_team]['Total_Games_Played_Home'] += 1
    metric_dict[season][home_team]['Full_Time_Goals_Scored'] = metric_dict[season][home_team]['SeasonTotal_Full_Time_Goals_Scored'] / metric_dict[season][home_team]['Total_Games_Played']
    metric_dict[season][home_team]['Full_Time_Goals_Scored_Home'] = metric_dict[season][home_team]['SeasonTotal_Full_Time_Goals_Scored_Home'] / metric_dict[season][home_team]['Total_Games_Played_Home']
    metric_dict[season][home_team]['Full_Time_Goals_Conceded'] = metric_dict[season][home_team]['SeasonTotal_Full_Time_Goals_Conceded'] / metric_dict[season][home_team]['Total_Games_Played']
    metric_dict[season][home_team]['Full_Time_Goals_Conceded_Home'] = metric_dict[season][home_team]['SeasonTotal_Full_Time_Goals_Conceded_Home'] / metric_dict[season][home_team]['Total_Games_Played_Home']
    metric_dict[season][home_team]['Half_Time_Goals_Scored'] = metric_dict[season][home_team]['SeasonTotal_Half_Time_Goals_Scored'] / metric_dict[season][home_team]['Total_Games_Played']
    metric_dict[season][home_team]['Half_Time_Goals_Scored_Home'] = metric_dict[season][home_team]['SeasonTotal_Half_Time_Goals_Scored_Home'] / metric_dict[season][home_team]['Total_Games_Played_Home']
    metric_dict[season][home_team]['Half_Time_Goals_Conceded'] = metric_dict[season][home_team]['SeasonTotal_Half_Time_Goals_Conceded'] / metric_dict[season][home_team]['Total_Games_Played']
    metric_dict[season][home_team]['Half_Time_Goals_Conceded_Home'] = metric_dict[season][home_team]['SeasonTotal_Half_Time_Goals_Conceded_Home'] / metric_dict[season][home_team]['Total_Games_Played_Home']
    metric_dict[season][home_team]['Shots'] = metric_dict[season][home_team]['SeasonTotal_Shots'] / metric_dict[season][home_team]['Total_Games_Played']
    metric_dict[season][home_team]['Shots_Home'] = metric_dict[season][home_team]['SeasonTotal_Shots_Home'] / metric_dict[season][home_team]['Total_Games_Played_Home']
    metric_dict[season][home_team]['Oponent_Shots'] = metric_dict[season][home_team]['SeasonTotal_Oponent_Shots'] / metric_dict[season][home_team]['Total_Games_Played']
    metric_dict[season][home_team]['Oponent_Shots_Home'] = metric_dict[season][home_team]['SeasonTotal_Oponent_Shots_Home'] / metric_dict[season][home_team]['Total_Games_Played_Home']
    metric_dict[season][home_team]['Shots_On_Target'] = metric_dict[season][home_team]['SeasonTotal_Shots_On_Target'] / metric_dict[season][home_team]['Total_Games_Played']
    metric_dict[season][home_team]['Shots_On_Target_Home'] = metric_dict[season][home_team]['SeasonTotal_Shots_On_Target_Home'] / metric_dict[season][home_team]['Total_Games_Played_Home']
    metric_dict[season][home_team]['Oponent_Shots_On_Target'] = metric_dict[season][home_team]['SeasonTotal_Oponent_Shots_On_Target'] / metric_dict[season][home_team]['Total_Games_Played']
    metric_dict[season][home_team]['Oponent_Shots_On_Target_Home'] = metric_dict[season][home_team]['SeasonTotal_Oponent_Shots_On_Target_Home'] / metric_dict[season][home_team]['Total_Games_Played_Home']
    if metric_dict[season][home_team]['SeasonTotal_Shots'] == 0:
        metric_dict[season][home_team]['Accuracy'] = 0
    else:
        metric_dict[season][home_team]['Accuracy'] = metric_dict[season][home_team]['SeasonTotal_Shots_On_Target'] / metric_dict[season][home_team]['SeasonTotal_Shots']
    
    if metric_dict[season][home_team]['SeasonTotal_Shots_Home'] == 0:
        metric_dict[season][home_team]['Accuracy_Home'] = 0
    else:
        metric_dict[season][home_team]['Accuracy_Home'] = metric_dict[season][home_team]['SeasonTotal_Shots_On_Target_Home'] / metric_dict[season][home_team]['SeasonTotal_Shots_Home']
    
    if metric_dict[season][home_team]['SeasonTotal_Oponent_Shots'] == 0:
        metric_dict[season][home_team]['Oponent_Accuracy'] = 0
    else:
        metric_dict[season][home_team]['Oponent_Accuracy'] = metric_dict[season][home_team]['SeasonTotal_Oponent_Shots_On_Target'] / metric_dict[season][home_team]['SeasonTotal_Oponent_Shots']
    
    if metric_dict[season][home_team]['SeasonTotal_Oponent_Shots_Home'] == 0:
        metric_dict[season][home_team]['Oponent_Accuracy_Home'] = 0
    else:
        metric_dict[season][home_team]['Oponent_Accuracy_Home'] = metric_dict[season][home_team]['SeasonTotal_Oponent_Shots_On_Target_Home'] / metric_dict[season][home_team]['SeasonTotal_Oponent_Shots_Home']
    metric_dict[season][home_team]['Saves'] = metric_dict[season][home_team]['SeasonTotal_Saves'] / metric_dict[season][home_team]['Total_Games_Played']
    metric_dict[season][home_team]['Saves_Home'] = metric_dict[season][home_team]['SeasonTotal_Saves_Home'] / metric_dict[season][home_team]['Total_Games_Played_Home']
    if metric_dict[season][home_team]['SeasonTotal_Oponent_Shots_On_Target'] == 0:
        metric_dict[season][home_team]['Save_Percent'] = 0
    else:
        metric_dict[season][home_team]['Save_Percent'] = metric_dict[season][home_team]['Saves'] / metric_dict[season][home_team]['SeasonTotal_Oponent_Shots_On_Target']
    if metric_dict[season][home_team]['SeasonTotal_Oponent_Shots_On_Target_Home'] == 0:
        metric_dict[season][home_team]['Save_Percent_Home'] = 0
    else:
        metric_dict[season][home_team]['Save_Percent_Home'] = metric_dict[season][home_team]['Saves_Home'] / metric_dict[season][home_team]['SeasonTotal_Oponent_Shots_On_Target_Home']
    metric_dict[season][home_team]['Corners'] = metric_dict[season][home_team]['SeasonTotal_Corners'] / metric_dict[season][home_team]['Total_Games_Played']
    metric_dict[season][home_team]['Corners_Home'] = metric_dict[season][home_team]['SeasonTotal_Corners_Home'] / metric_dict[season][home_team]['Total_Games_Played_Home']
    metric_dict[season][home_team]['Fouls'] = metric_dict[season][home_team]['SeasonTotal_Fouls'] / metric_dict[season][home_team]['Total_Games_Played']
    metric_dict[season][home_team]['Fouls_Home'] = metric_dict[season][home_team]['SeasonTotal_Fouls_Home'] / metric_dict[season][home_team]['Total_Games_Played_Home']
    metric_dict[season][home_team]['Yellow_Cards'] = metric_dict[season][home_team]['SeasonTotal_Yellow_Cards'] / metric_dict[season][home_team]['Total_Games_Played']
    metric_dict[season][home_team]['Yellow_Cards_Home'] = metric_dict[season][home_team]['SeasonTotal_Yellow_Cards_Home'] / metric_dict[season][home_team]['Total_Games_Played_Home']
    metric_dict[season][home_team]['Red_Cards'] = metric_dict[season][home_team]['SeasonTotal_Red_Cards'] / metric_dict[season][home_team]['Total_Games_Played']
    metric_dict[season][home_team]['Red_Cards_Home'] = metric_dict[season][home_team]['SeasonTotal_Red_Cards_Home'] / metric_dict[season][home_team]['Total_Games_Played_Home']
    metric_dict[season][home_team]['Win_Percent'] = metric_dict[season][home_team]['SeasonTotal_Wins'] / metric_dict[season][home_team]['Total_Games_Played']
    metric_dict[season][home_team]['Win_Percent_Home'] = metric_dict[season][home_team]['SeasonTotal_Wins_Home'] / metric_dict[season][home_team]['Total_Games_Played_Home']
    metric_dict[season][home_team]['Draw_Percent'] = metric_dict[season][home_team]['SeasonTotal_Draws'] / metric_dict[season][home_team]['Total_Games_Played']
    metric_dict[season][home_team]['Draw_Percent_Home'] = metric_dict[season][home_team]['SeasonTotal_Draws_Home'] / metric_dict[season][home_team]['Total_Games_Played_Home']
    metric_dict[season][home_team]['Loss_Percent'] = metric_dict[season][home_team]['SeasonTotal_Losses'] / metric_dict[season][home_team]['Total_Games_Played']
    metric_dict[season][home_team]['Loss_Percent_Home'] = metric_dict[season][home_team]['SeasonTotal_Losses_Home'] / metric_dict[season][home_team]['Total_Games_Played_Home']

    # Update helper metrics after the match for the away team
    metric_dict[season][away_team]['SeasonTotal_Full_Time_Goals_Scored'] += row['FTAG']
    metric_dict[season][away_team]['SeasonTotal_Full_Time_Goals_Scored_Away'] += row['FTAG']
    metric_dict[season][away_team]['SeasonTotal_Full_Time_Goals_Conceded'] += row['FTHG']
    metric_dict[season][away_team]['SeasonTotal_Full_Time_Goals_Conceded_Away'] += row['FTHG']
    metric_dict[season][away_team]['SeasonTotal_Half_Time_Goals_Scored'] += row['HTAG']
    metric_dict[season][away_team]['SeasonTotal_Half_Time_Goals_Scored_Away'] += row['HTAG']
    metric_dict[season][away_team]['SeasonTotal_Half_Time_Goals_Conceded'] += row['HTHG']
    metric_dict[season][away_team]['SeasonTotal_Half_Time_Goals_Conceded_Away'] += row['HTHG']
    metric_dict[season][away_team]['SeasonTotal_Shots'] += row['AS']
    metric_dict[season][away_team]['SeasonTotal_Shots_Away'] += row['AS']
    metric_dict[season][away_team]['SeasonTotal_Oponent_Shots'] += row['HS']
    metric_dict[season][away_team]['SeasonTotal_Oponent_Shots_Away'] += row['HS']
    metric_dict[season][away_team]['SeasonTotal_Shots_On_Target'] += row['AST']
    metric_dict[season][away_team]['SeasonTotal_Shots_On_Target_Away'] += row['AST']
    metric_dict[season][away_team]['SeasonTotal_Oponent_Shots_On_Target'] += row['HST']
    metric_dict[season][away_team]['SeasonTotal_Oponent_Shots_On_Target_Away'] += row['HST']
    away_saves = max(0, row['HST'] - row['FTHG'])
    metric_dict[season][away_team]['SeasonTotal_Saves'] += away_saves
    metric_dict[season][away_team]['SeasonTotal_Saves_Away'] += away_saves
    metric_dict[season][away_team]['SeasonTotal_Corners'] += row['AC']
    metric_dict[season][away_team]['SeasonTotal_Corners_Away'] += row['AC']
    metric_dict[season][away_team]['SeasonTotal_Fouls'] += row['AF']
    metric_dict[season][away_team]['SeasonTotal_Fouls_Away'] += row['AF']
    metric_dict[season][away_team]['SeasonTotal_Yellow_Cards'] += row['AY']
    metric_dict[season][away_team]['SeasonTotal_Yellow_Cards_Away'] += row['AY']
    metric_dict[season][away_team]['SeasonTotal_Red_Cards'] += row['AR']
    metric_dict[season][away_team]['SeasonTotal_Red_Cards_Away'] += row['AR']
    if row['FTR'] == 'A':
        metric_dict[season][away_team]['SeasonTotal_Wins'] += 1
        metric_dict[season][away_team]['SeasonTotal_Wins_Away'] += 1
    elif row['FTR'] == 'D':
        metric_dict[season][away_team]['SeasonTotal_Draws'] += 1
        metric_dict[season][away_team]['SeasonTotal_Draws_Away'] += 1
    else:
        metric_dict[season][away_team]['SeasonTotal_Losses'] += 1
        metric_dict[season][away_team]['SeasonTotal_Losses_Away'] += 1

    # Update metrics after the match for the away team
    metric_dict[season][away_team]['Total_Games_Played'] += 1
    metric_dict[season][away_team]['Total_Games_Played_Away'] += 1
    metric_dict[season][away_team]['Full_Time_Goals_Scored'] = metric_dict[season][away_team]['SeasonTotal_Full_Time_Goals_Scored'] / metric_dict[season][away_team]['Total_Games_Played']
    metric_dict[season][away_team]['Full_Time_Goals_Scored_Away'] = metric_dict[season][away_team]['SeasonTotal_Full_Time_Goals_Scored_Away'] / metric_dict[season][away_team]['Total_Games_Played_Away']
    metric_dict[season][away_team]['Full_Time_Goals_Conceded'] = metric_dict[season][away_team]['SeasonTotal_Full_Time_Goals_Conceded'] / metric_dict[season][away_team]['Total_Games_Played']
    metric_dict[season][away_team]['Full_Time_Goals_Conceded_Away'] = metric_dict[season][away_team]['SeasonTotal_Full_Time_Goals_Conceded_Away'] / metric_dict[season][away_team]['Total_Games_Played_Away']
    metric_dict[season][away_team]['Half_Time_Goals_Scored'] = metric_dict[season][away_team]['SeasonTotal_Half_Time_Goals_Scored'] / metric_dict[season][away_team]['Total_Games_Played']
    metric_dict[season][away_team]['Half_Time_Goals_Scored_Away'] = metric_dict[season][away_team]['SeasonTotal_Half_Time_Goals_Scored_Away'] / metric_dict[season][away_team]['Total_Games_Played_Away']
    metric_dict[season][away_team]['Half_Time_Goals_Conceded'] = metric_dict[season][away_team]['SeasonTotal_Half_Time_Goals_Conceded'] / metric_dict[season][away_team]['Total_Games_Played']
    metric_dict[season][away_team]['Half_Time_Goals_Conceded_Away'] = metric_dict[season][away_team]['SeasonTotal_Half_Time_Goals_Conceded_Away'] / metric_dict[season][away_team]['Total_Games_Played_Away']
    metric_dict[season][away_team]['Shots'] = metric_dict[season][away_team]['SeasonTotal_Shots'] / metric_dict[season][away_team]['Total_Games_Played']
    metric_dict[season][away_team]['Shots_Away'] = metric_dict[season][away_team]['SeasonTotal_Shots_Away'] / metric_dict[season][away_team]['Total_Games_Played_Away']
    metric_dict[season][away_team]['Oponent_Shots'] = metric_dict[season][away_team]['SeasonTotal_Oponent_Shots'] / metric_dict[season][away_team]['Total_Games_Played']
    metric_dict[season][away_team]['Oponent_Shots_Away'] = metric_dict[season][away_team]['SeasonTotal_Oponent_Shots_Away'] / metric_dict[season][away_team]['Total_Games_Played_Away']
    metric_dict[season][away_team]['Shots_On_Target'] = metric_dict[season][away_team]['SeasonTotal_Shots_On_Target'] / metric_dict[season][away_team]['Total_Games_Played']
    metric_dict[season][away_team]['Shots_On_Target_Away'] = metric_dict[season][away_team]['SeasonTotal_Shots_On_Target_Away'] / metric_dict[season][away_team]['Total_Games_Played_Away']
    metric_dict[season][away_team]['Oponent_Shots_On_Target'] = metric_dict[season][away_team]['SeasonTotal_Oponent_Shots_On_Target'] / metric_dict[season][away_team]['Total_Games_Played']
    metric_dict[season][away_team]['Oponent_Shots_On_Target_Away'] = metric_dict[season][away_team]['SeasonTotal_Oponent_Shots_On_Target_Away'] / metric_dict[season][away_team]['Total_Games_Played_Away']
    if metric_dict[season][away_team]['SeasonTotal_Shots'] == 0:
        metric_dict[season][away_team]['Accuracy'] = 0
    else:
        metric_dict[season][away_team]['Accuracy'] = metric_dict[season][away_team]['SeasonTotal_Shots_On_Target'] / metric_dict[season][away_team]['SeasonTotal_Shots'] ###############
    if metric_dict[season][away_team]['SeasonTotal_Shots_Away'] == 0:
        metric_dict[season][away_team]['Accuracy_Away'] = 0
    else:
        metric_dict[season][away_team]['Accuracy_Away'] = metric_dict[season][away_team]['SeasonTotal_Shots_On_Target_Away'] / metric_dict[season][away_team]['SeasonTotal_Shots_Away'] ###############
    if metric_dict[season][away_team]['SeasonTotal_Oponent_Shots'] == 0:
        metric_dict[season][away_team]['Oponent_Accuracy'] = 0
    else:
        metric_dict[season][away_team]['Oponent_Accuracy'] = metric_dict[season][away_team]['SeasonTotal_Oponent_Shots_On_Target'] / metric_dict[season][away_team]['SeasonTotal_Oponent_Shots'] ###############
    if metric_dict[season][away_team]['SeasonTotal_Oponent_Shots_Away'] == 0:
        metric_dict[season][away_team]['Oponent_Accuracy_Away'] = 0
    else:
        metric_dict[season][away_team]['Oponent_Accuracy_Away'] = metric_dict[season][away_team]['SeasonTotal_Oponent_Shots_On_Target_Away'] / metric_dict[season][away_team]['SeasonTotal_Oponent_Shots_Away'] ###############
    metric_dict[season][away_team]['Saves'] = metric_dict[season][away_team]['SeasonTotal_Saves'] / metric_dict[season][away_team]['Total_Games_Played']
    metric_dict[season][away_team]['Saves_Away'] = metric_dict[season][away_team]['SeasonTotal_Saves_Away'] / metric_dict[season][away_team]['Total_Games_Played_Away']
    if metric_dict[season][away_team]['SeasonTotal_Oponent_Shots_On_Target'] == 0:
        metric_dict[season][away_team]['Save_Percent'] = 0
    else:
        metric_dict[season][away_team]['Save_Percent'] = metric_dict[season][away_team]['Saves'] / metric_dict[season][away_team]['SeasonTotal_Oponent_Shots_On_Target'] ###############
    if metric_dict[season][away_team]['SeasonTotal_Oponent_Shots_On_Target_Away'] == 0:
        metric_dict[season][away_team]['Save_Percent_Away'] = 0
    else:
        metric_dict[season][away_team]['Save_Percent_Away'] = metric_dict[season][away_team]['Saves_Away'] / metric_dict[season][away_team]['SeasonTotal_Oponent_Shots_On_Target_Away'] ###############
    metric_dict[season][away_team]['Corners'] = metric_dict[season][away_team]['SeasonTotal_Corners'] / metric_dict[season][away_team]['Total_Games_Played']
    metric_dict[season][away_team]['Corners_Away'] = metric_dict[season][away_team]['SeasonTotal_Corners_Away'] / metric_dict[season][away_team]['Total_Games_Played_Away']
    metric_dict[season][away_team]['Fouls'] = metric_dict[season][away_team]['SeasonTotal_Fouls'] / metric_dict[season][away_team]['Total_Games_Played']
    metric_dict[season][away_team]['Fouls_Away'] = metric_dict[season][away_team]['SeasonTotal_Fouls_Away'] / metric_dict[season][away_team]['Total_Games_Played_Away']
    metric_dict[season][away_team]['Yellow_Cards'] = metric_dict[season][away_team]['SeasonTotal_Yellow_Cards'] / metric_dict[season][away_team]['Total_Games_Played']
    metric_dict[season][away_team]['Yellow_Cards_Away'] = metric_dict[season][away_team]['SeasonTotal_Yellow_Cards_Away'] / metric_dict[season][away_team]['Total_Games_Played_Away']
    metric_dict[season][away_team]['Red_Cards'] = metric_dict[season][away_team]['SeasonTotal_Red_Cards'] / metric_dict[season][away_team]['Total_Games_Played']
    metric_dict[season][away_team]['Red_Cards_Away'] = metric_dict[season][away_team]['SeasonTotal_Red_Cards_Away'] / metric_dict[season][away_team]['Total_Games_Played_Away']
    metric_dict[season][away_team]['Win_Percent'] = metric_dict[season][away_team]['SeasonTotal_Wins'] / metric_dict[season][away_team]['Total_Games_Played']
    metric_dict[season][away_team]['Win_Percent_Away'] = metric_dict[season][away_team]['SeasonTotal_Wins_Away'] / metric_dict[season][away_team]['Total_Games_Played_Away']
    metric_dict[season][away_team]['Draw_Percent'] = metric_dict[season][away_team]['SeasonTotal_Draws'] / metric_dict[season][away_team]['Total_Games_Played']
    metric_dict[season][away_team]['Draw_Percent_Away'] = metric_dict[season][away_team]['SeasonTotal_Draws_Away'] / metric_dict[season][away_team]['Total_Games_Played_Away']
    metric_dict[season][away_team]['Loss_Percent'] = metric_dict[season][away_team]['SeasonTotal_Losses'] / metric_dict[season][away_team]['Total_Games_Played']
    metric_dict[season][away_team]['Loss_Percent_Away'] = metric_dict[season][away_team]['SeasonTotal_Losses_Away'] / metric_dict[season][away_team]['Total_Games_Played_Away']

# Drop unneeded columns
columns_to_keep = ['MatchID'] + [f"HomeTeam_{metric}" for metric in metrics] + [f"AwayTeam_{metric}" for metric in metrics]
df = df[columns_to_keep]

#### Normalize dataset

In [13]:
scaler = MinMaxScaler()

df_normalized = df.copy()

metric_columns = [f"HomeTeam_{metric}" for metric in metrics] + [f"AwayTeam_{metric}" for metric in metrics]
df_normalized[metric_columns] = scaler.fit_transform(df_normalized[metric_columns])

#### Save dataset

In [14]:
running_season_averages_dataset_dir = f"../processed/Running_Season_Averages_Dataset.csv"
df.to_csv(running_season_averages_dataset_dir, index=False)

running_season_averages_normalized_dataset_dir = f"../processed/Running_Season_Averages_Normalized_Dataset.csv"
df_normalized.to_csv(running_season_averages_normalized_dataset_dir, index=False)