In [3]:
# Get inputs needed 
# 1/2. home/away win rates per team
# 3/4. average goals scored/conceded per match per team
# 5. current team league standings
import pandas as pd
import keras
import random as r
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.callbacks import EarlyStopping


def file_to_pd(filename):

    home_wins = {}
    away_wins = {}
    losses = {}
    draws = {}

    goals_scored = {}
    goals_conceded = {}

    # table with updated points of each team 
    # in regulation with the Premier League
    # https://www.premierleague.com/premier-league-explained
    # after each match:
        # add 3 points to the winning team
        # add 1 point to both teams for a tie 
        # and 0 points to the losing team
    table = {}

    # notice that the data is already sorted by date allowing us to 
    # keep our table updated after every match day 
    # (we don't care about the difference in times for match day)
    curr_row = 0
    for row in filename.iterrows(): 
        # get home and away team names
        home = filename.iloc[curr_row]['HomeTeam']
        away = filename.iloc[curr_row]['AwayTeam']

        # identify a home win
        if filename.iloc[curr_row]['FTR'] == 'H':
            home_wins[home] = home_wins.get(home, 0) + 1
            losses[away] = losses.get(away, 0) + 1

            # only sum 3 to the winning team
            # no need to add 0 to the losing team
            table[home] = table.get(home, 0) + 3

        # identify an away win
        if filename.iloc[curr_row]['FTR'] == 'A':
            losses[home] = losses.get(home, 0) + 1
            away_wins[away] = away_wins.get(away, 0) + 1 

            # only sum 3 to the winning team
            # no need to add 0 to the losing team
            table[away] = table.get(away, 0) + 3

        # identify draws
        if filename.iloc[curr_row]['FTR'] == 'D':        
            draws[home] = draws.get(home, 0) + 1
            draws[away] = draws.get(away, 0) + 1 

            # add 1 to both teams
            table[home] = table.get(home, 0) + 1
            table[away] = table.get(away, 0) + 1

        # sum to goals scored dictionary
        goals_scored[home] = goals_scored.get(home, 0) + filename.iloc[curr_row]['FTHG']
        goals_scored[away] = goals_scored.get(away, 0) + filename.iloc[curr_row]['FTAG']

        # sum to goals conceded dictionary
        goals_conceded[home] = goals_conceded.get(home, 0) + filename.iloc[curr_row]['FTAG']
        goals_conceded[away] = goals_conceded.get(away, 0) + filename.iloc[curr_row]['FTHG']

        # increment row
        curr_row += 1
    
    # 1/2. home/away win rates per team
    home_win_rates = {}
    away_win_rates = {}

    for key, value in home_wins.items():
        # half of the 38 matches played per team are played at home
        home_win_rates[key] = value / 19

    for key, value in away_wins.items():
        # half of the 38 matches played per team are played away
        away_win_rates[key] = value / 19

    # 3/4. average goals scored/conceded per match per team
    av_goals_scored = {}
    av_goals_conceded = {}

    for key, value in goals_scored.items():
        # total o 38 matches per team
        av_goals_scored[key] = value / 38

    for key, value in goals_conceded.items():
        # total o 38 matches per team
        av_goals_conceded[key] = value / 38

    # 5. current team league standings
    final_table = {}

    sorted_table = sorted(table.items(), reverse=True, key=lambda item: item[1])

    tied_teams = set()
    prev_pair = ""

    curr_position = 1
    for team_point_pair in sorted_table:
        # don't add to the final_table unless their are no ties in points
        # otherwise proceed with Premier League regulation by:
            # breaking a tie by using the goal difference
            # then the number of goals scored
            # "If the teams still cannot be separated, 
            # they will be awarded the same position in the table."
        if prev_pair and prev_pair[1] == team_point_pair[1]:
            tied_teams.add(prev_pair)
            tied_teams.add(team_point_pair)

        else:
            # break the ties first adding to the final table
            if len(tied_teams) > 0:
                goal_difference = {}
                # breaking a tie by using the goal difference
                for team in tied_teams:
                    goal_difference[team[0]] = goals_scored[team[0]] - goals_conceded[team[0]]
                goal_difference_lst = sorted(goal_difference.items(), reverse=True, key=lambda item: item[1])
                to_be_removed_from_copy_of_goal_diff_lst = []

                for i in range(len(goal_difference_lst)):
                    # check if teams are still tied by goal difference
                    if i + 1 < len(goal_difference_lst) and goal_difference_lst[i][1] == goal_difference_lst[i + 1][1]:
                        tied_team_goals_scored = {}
                        copy_goal_difference_lst = goal_difference_lst

                        for item in to_be_removed_from_copy_of_goal_diff_lst:
                            copy_goal_difference_lst.remove(item)

                        for team in copy_goal_difference_lst:
                            tied_team_goals_scored[team[0]] = goals_scored[team[0]]
                        # reorder teams by number of goals 
                        goals_scored_lst = sorted(tied_team_goals_scored.items(), reverse=True, key=lambda item: item[1]) 

                        num_tied_teams = 0
                        tied_goals_scored = -1
                        for j in range(len(goals_scored_lst)):
                            # check if teams are still tied by goals scored
                            if j + 1 < len(goals_scored_lst) and goals_scored_lst[j][1] == goals_scored_lst[j + 1][1]:
                                final_table[str(curr_position) + "(" + str(num_tied_teams) + ")"] = (goals_scored_lst[j][0], table[goals_scored_lst[j][0]])
                                num_tied_teams += 1
                                # remember the total number of goals scored of those that were tied
                                tied_goals_scored = goals_scored_lst[j][1]

                            # no teams are tied in goals scored
                            else: 
                                if goals_scored_lst[j][1] == tied_goals_scored:
                                    # format the last tied team appropriately
                                    final_table[str(curr_position) + "(" + str(num_tied_teams) + ")"] = (goals_scored_lst[j][0], table[goals_scored_lst[j][0]])
                                else:
                                    final_table[curr_position] = (goals_scored_lst[j][0], table[goals_scored_lst[j][0]])
                                curr_position += 1

                        # change current_position to reflect positions after tied teams have been added 
                        curr_position += num_tied_teams

                        # break out of outer for loop
                        break

                    # not tied and proceed to add to table
                    else:
                        final_table[curr_position] = (goal_difference_lst[i][0], table[goal_difference_lst[i][0]])
                        tied_teams.remove((goal_difference_lst[i][0], table[goal_difference_lst[i][0]]))
                        to_be_removed_from_copy_of_goal_diff_lst.append((goal_difference_lst[i][0], goal_difference[goal_difference_lst[i][0]]))
                        curr_position += 1
                    
                    
            # no tied teams and can proceed to add to final_table as usual
            else:
                if prev_pair and prev_pair not in tied_teams:
                    final_table[curr_position] = prev_pair
                    curr_position += 1
            # clear tied_teams list
            tied_teams = set()

        # prepair next iteration
        prev_pair = team_point_pair

    # add last team but first check if the last place teams are tied
    if len(tied_teams) > 0:
        for team in tied_teams:
            final_table[curr_position] = team
            curr_position += 1
    else:
        final_table[curr_position] = prev_pair
    
    positions_lst = []
    points_lst = []
    teams_lst = []
    wins_lst = []
    losses_lst = []
    draws_lst = []
    goals_for = []
    goals_against = []
    goals_difference = []
    home_wins_rate_lst = []
    away_win_rates_lst = []
    av_goals_scored_lst = []
    av_goals_conceded_lst = []

    # used to remember position of the previous year 
    positions_dict = {}
    
    ordered_away_wins = []
    # populate lsts for final table as a dataframe
    for key, value in final_table.items():
        positions_dict[value[0]] = key
        positions_lst.append(key)
        points_lst.append(value[1])
        teams_lst.append(value[0])
        try: 
            # check to see if there are any away wins for this team
            away_wins[value[0]]
        except:
            # assign 0 away wins to this team(value[0])
            away_wins[value[0]] = 0 
            # assign 0 percent away_win_rates
            away_win_rates[value[0]] = 0 
        try: 
            # check to see if there are any home wins for this team
            home_wins[value[0]]
        except:
            # assign 0 home wins to this team(value[0])
            home_wins[value[0]] = 0 
            home_win_rates[value[0]] = 0
            
        wins_lst.append(home_wins[value[0]] + away_wins[value[0]])
        
        # add to ordered_away_wins to point out that they are a key component in our predictions 
        ordered_away_wins.append(away_wins[value[0]])
        try: 
            # check to see if there are draws for this team
            draws[value[0]]
        except:
            # assign 0 draws to this team(value[0])
            draws[value[0]] = 0 
        draws_lst.append(draws[value[0]])
        
        try: 
            # check to see if there are any losses for this team
            losses[value[0]]
        except:
            # assign 0 losses to this team(value[0])
            losses[value[0]] = 0 
        losses_lst.append(losses[value[0]])
        goals_for.append(goals_scored[value[0]])
        goals_against.append(goals_conceded[value[0]])
        goals_difference.append(goals_scored[value[0]] - goals_conceded[value[0]])
        home_wins_rate_lst.append(home_win_rates[value[0]])
        away_win_rates_lst.append(away_win_rates[value[0]])
        av_goals_scored_lst.append(av_goals_scored[value[0]])
        av_goals_conceded_lst.append(av_goals_conceded[value[0]])

    d = {'position': positions_lst, 'team': teams_lst, 'points': points_lst, 'wins': wins_lst,
         'home win rates': home_wins_rate_lst, 'away win rates': away_win_rates_lst, 'draws': draws_lst, 
         'losses': losses_lst, 'goals scored': goals_for, 'goals conceded': goals_against, 'goal difference': goals_difference,
         'average goals scored': av_goals_scored_lst, 'average goals conceded': av_goals_conceded_lst}

    df_table = pd.DataFrame(data = d)
    df_table = df_table.set_index('position')
    return df_table, positions_dict, ordered_away_wins 

Using TensorFlow backend.


In [4]:
def finalize_input_data(data, curr_positions, league_years):
    this_seasons_finish = []
    for key, value in curr_positions.items():
        this_seasons_finish.append(value)

    # actual final league standings 
    final_league_standings = data.drop(['home win rates', 'away win rates', 'average goals scored', 'average goals conceded'], axis=1)
    print("Actual Final League Standings " + league_years + ":")
    display(final_league_standings)

    input_data = data.set_index('team')
    input_data = input_data[['home win rates', 'away win rates', 'average goals scored', 'average goals conceded']]
    input_data['current season position finish'] = this_seasons_finish
    return input_data

In [5]:
file_15_16 = '2015-2016.csv'
data_15_16 = pd.read_csv(file_15_16)
# keep match results
scores_15_16 = data_15_16[['HomeTeam', 'AwayTeam', 'FTHG', 'FTAG']]

# store dataframe 
data_15_16, positions_15_16, ordered_away_wins = file_to_pd(data_15_16)

# actual final league standings 
final_table_15_16 = data_15_16.drop(['home win rates', 'away win rates', 'average goals scored', 'average goals conceded'], axis=1)
input_data_15_16 = finalize_input_data(data_15_16, positions_15_16, '2015-2016')
display(input_data_15_16)

Actual Final League Standings 2015-2016:


Unnamed: 0_level_0,team,points,wins,draws,losses,goals scored,goals conceded,goal difference
position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,Leicester,81,23,12,3,68,36,32
2,Arsenal,71,20,11,7,65,36,29
3,Tottenham,70,19,13,6,69,35,34
4,Man City,66,19,9,10,71,41,30
5,Man United,66,19,9,10,49,35,14
6,Southampton,63,18,9,11,59,41,18
7,West Ham,62,16,14,8,65,51,14
8,Liverpool,60,16,12,10,63,50,13
9,Stoke,51,14,9,15,41,55,-14
10,Chelsea,50,12,14,12,59,53,6


Unnamed: 0_level_0,home win rates,away win rates,average goals scored,average goals conceded,current season position finish
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Leicester,0.631579,0.578947,1.789474,0.947368,1
Arsenal,0.631579,0.421053,1.710526,0.947368,2
Tottenham,0.526316,0.473684,1.815789,0.921053,3
Man City,0.631579,0.368421,1.868421,1.078947,4
Man United,0.631579,0.368421,1.289474,0.921053,5
Southampton,0.578947,0.368421,1.552632,1.078947,6
West Ham,0.473684,0.368421,1.710526,1.342105,7
Liverpool,0.421053,0.421053,1.657895,1.315789,8
Stoke,0.421053,0.315789,1.078947,1.447368,9
Chelsea,0.263158,0.368421,1.552632,1.394737,10


In [6]:
data = pd.read_csv('2016-2017.csv')
# keep match results
match_results_16_17 = data[['HomeTeam', 'AwayTeam', 'FTHG', 'FTAG']]

# store dataframe inputs using our file_to_pd function 
data, positions_16_17, ordered_away_wins = file_to_pd(data)

input_data_16_17 = finalize_input_data(data, positions_16_17, '2016-2017')
print("Neural Network Input Data 2016-2017:")
display(input_data_16_17)

Actual Final League Standings 2016-2017:


Unnamed: 0_level_0,team,points,wins,draws,losses,goals scored,goals conceded,goal difference
position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,Chelsea,93,30,3,5,85,33,52
2,Tottenham,86,26,8,4,86,26,60
3,Man City,78,23,9,6,80,39,41
4,Liverpool,76,22,10,6,78,42,36
5,Arsenal,75,23,6,9,77,44,33
6,Man United,69,18,15,5,54,29,25
7,Everton,61,17,10,11,62,44,18
8,Southampton,46,12,10,16,41,48,-7
9,Bournemouth,46,12,10,16,55,67,-12
10,West Brom,45,12,9,17,43,51,-8


Neural Network Input Data 2016-2017:


Unnamed: 0_level_0,home win rates,away win rates,average goals scored,average goals conceded,current season position finish
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Chelsea,0.894737,0.684211,2.236842,0.868421,1
Tottenham,0.894737,0.473684,2.263158,0.684211,2
Man City,0.578947,0.631579,2.105263,1.026316,3
Liverpool,0.631579,0.526316,2.052632,1.105263,4
Arsenal,0.736842,0.473684,2.026316,1.157895,5
Man United,0.421053,0.526316,1.421053,0.763158,6
Everton,0.684211,0.210526,1.631579,1.157895,7
Southampton,0.315789,0.315789,1.078947,1.263158,8
Bournemouth,0.473684,0.157895,1.447368,1.763158,9
West Brom,0.473684,0.157895,1.131579,1.342105,10


In [7]:
data = pd.read_csv('2017-2018.csv')
# keep match results
match_results_17_18 = data[['HomeTeam', 'AwayTeam', 'FTHG', 'FTAG']]

# store dataframe inputs using our file_to_pd function 
data, positions_17_18, ordered_away_wins = file_to_pd(data)

input_data_17_18 = finalize_input_data(data, positions_17_18, '2017-2018')
print("Neural Network Input Data 2017-2018:")
display(input_data_17_18)

Actual Final League Standings 2017-2018:


Unnamed: 0_level_0,team,points,wins,draws,losses,goals scored,goals conceded,goal difference
position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,Man City,100,32,4,2,106,27,79
2,Man United,81,25,6,7,68,28,40
3,Tottenham,77,23,8,7,74,36,38
4,Liverpool,75,21,12,5,84,38,46
5,Chelsea,70,21,7,10,62,38,24
6,Arsenal,63,19,6,13,74,51,23
7,Burnley,54,14,12,12,36,39,-3
8,Everton,49,13,10,15,44,58,-14
9,Leicester,47,12,11,15,56,60,-4
10,Newcastle,44,12,8,18,39,47,-8


Neural Network Input Data 2017-2018:


Unnamed: 0_level_0,home win rates,away win rates,average goals scored,average goals conceded,current season position finish
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Man City,0.842105,0.842105,2.789474,0.710526,1
Man United,0.789474,0.526316,1.789474,0.736842,2
Tottenham,0.684211,0.526316,1.947368,0.947368,3
Liverpool,0.631579,0.473684,2.210526,1.0,4
Chelsea,0.578947,0.526316,1.631579,1.0,5
Arsenal,0.789474,0.210526,1.947368,1.342105,6
Burnley,0.368421,0.368421,0.947368,1.026316,7
Everton,0.526316,0.157895,1.157895,1.526316,8
Leicester,0.368421,0.263158,1.473684,1.578947,9
Newcastle,0.421053,0.210526,1.026316,1.236842,10


In [8]:
data = pd.read_csv('2018-2019.csv')
# keep match results
match_results_18_19 = data[['HomeTeam', 'AwayTeam', 'FTHG', 'FTAG']]

# store dataframe inputs using our file_to_pd function 
data, positions_18_19, ordered_away_wins = file_to_pd(data)

input_data_18_19 = finalize_input_data(data, positions_18_19, '2018-2019')
print("Neural Network Input Data 2018-2019:")
display(input_data_18_19)

Actual Final League Standings 2018-2019:


Unnamed: 0_level_0,team,points,wins,draws,losses,goals scored,goals conceded,goal difference
position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,Man City,98,32,2,4,95,23,72
2,Liverpool,97,30,7,1,89,22,67
3,Chelsea,72,21,9,8,63,39,24
4,Tottenham,71,23,2,13,67,39,28
5,Arsenal,70,21,7,10,73,51,22
6,Man United,66,19,9,10,65,54,11
7,Wolves,57,16,9,13,47,46,1
8,Everton,54,15,9,14,54,46,8
9,Leicester,52,15,7,16,51,48,3
10,West Ham,52,15,7,16,52,55,-3


Neural Network Input Data 2018-2019:


Unnamed: 0_level_0,home win rates,away win rates,average goals scored,average goals conceded,current season position finish
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Man City,0.947368,0.736842,2.5,0.605263,1
Liverpool,0.894737,0.684211,2.342105,0.578947,2
Chelsea,0.631579,0.473684,1.657895,1.026316,3
Tottenham,0.631579,0.578947,1.763158,1.026316,4
Arsenal,0.736842,0.368421,1.921053,1.342105,5
Man United,0.526316,0.473684,1.710526,1.421053,6
Wolves,0.526316,0.315789,1.236842,1.210526,7
Everton,0.526316,0.263158,1.421053,1.210526,8
Leicester,0.421053,0.368421,1.342105,1.263158,9
West Ham,0.473684,0.315789,1.368421,1.447368,10


In [24]:
# Get inputs needed for x_train:
    # hometeam home win rates 
    # awayteam away win rates
    # average goals scored
    # average goals conceded
    # previous team league standings

# use the match fixtures of the current year but the input_data should be that of the previous year
# since we are relying on previous information to predict this year's match results 
def get_x_train(match_fixtures, input_data, input_data_bottom_half):
    # we will assume that the newly promoted teams perform on par with the 
    # average performance of the previous bottom 10 teams times a random number between 0 and 1
    relegated_teams_av_home_win_rates = input_data_bottom_half['home win rates'].mean()
    relegated_teams_av_goals_scored = input_data_bottom_half['average goals scored'].mean()
    relegated_teams_av_goals_conceded = input_data_bottom_half['average goals conceded'].mean()
    
    # we don't need the homeTeam away win rates if they are playing from home
    homeTeam_home_win_rates = []
    homeTeam_av_goals_scored = []
    homeTeam_av_goals_conceded = []
    homeTeam_prev_position = []

    # we don't need the awayTeam home win rates if they are playing away
    awayTeam_away_win_rates = []
    awayTeam_av_goals_scored = []
    awayTeam_av_goals_conceded = []
    awayTeam_prev_position = []

    curr_row = 0
    for row in match_fixtures.iterrows():
        # use a random number between 0 and 1 
        random_num = r.random()
        
        homeTeam = match_fixtures.iloc[curr_row]['HomeTeam']        
        awayTeam = match_fixtures.iloc[curr_row]['AwayTeam']        
        
        try:
            homeTeam_home_win_rates.append(input_data.loc[homeTeam]['home win rates'])
            homeTeam_av_goals_scored.append(input_data.loc[homeTeam]['average goals scored'])
            homeTeam_av_goals_conceded.append(input_data.loc[homeTeam]['average goals conceded'])
            # this is the previous position of said team because we are looking at a previous season's data
            homeTeam_prev_position.append(input_data.loc[homeTeam]['current season position finish'])
        except:
            homeTeam_home_win_rates.append(relegated_teams_av_home_win_rates * random_num)
            homeTeam_av_goals_scored.append(relegated_teams_av_goals_scored * random_num)
            homeTeam_av_goals_conceded.append(relegated_teams_av_goals_conceded * random_num)
            # tried -1 for previous position but that would actually make newly promoted teams perform better
                # I'm assuming because its better to be in 1st than in 20th 
                # so the model assumes a smaller number is better
            # therefore, I opted for a random number between 20 and 30
            # to emphasize they newly promoted teams were in a lower division
            homeTeam_prev_position.append(r.randint(20, 30))
            
        try:
            # awayTeam data
            awayTeam_away_win_rates.append(input_data.loc[awayTeam]['away win rates'])
            awayTeam_av_goals_scored.append(input_data.loc[awayTeam]['average goals scored'])
            awayTeam_av_goals_conceded.append(input_data.loc[awayTeam]['average goals conceded'])
            awayTeam_prev_position.append(input_data.loc[awayTeam]['current season position finish'])
        except:
            awayTeam_away_win_rates.append(relegated_teams_av_home_win_rates * random_num)
            awayTeam_av_goals_scored.append(relegated_teams_av_goals_scored * random_num)
            awayTeam_av_goals_conceded.append(relegated_teams_av_goals_conceded * random_num)
            awayTeam_prev_position.append(r.randint(20, 30))
            
        curr_row += 1

    input_df = match_fixtures.copy(deep=True)
    # add above elements to input_df    
    input_df['homeTeam home win rates'] = homeTeam_home_win_rates
    input_df['homeTeam average goals scored'] = homeTeam_av_goals_scored
    input_df['homeTeam average goals conceded'] = homeTeam_av_goals_conceded
    input_df['homeTeam previous position'] = homeTeam_prev_position

    input_df['awayTeam away win rates'] = awayTeam_away_win_rates
    input_df['awayTeam average goals scored'] = awayTeam_av_goals_scored
    input_df['awayTeam average goals conceded'] = awayTeam_av_goals_conceded
    input_df['awayTeam previous position'] = awayTeam_prev_position

    x_train = input_df.drop(['HomeTeam', 'AwayTeam', 'FTHG', 'FTAG'], axis = 1)
    return x_train

In [42]:
x_train = get_x_train(match_results_16_17, input_data_15_16, input_data_15_16.tail(10))
y_train = match_results_16_17.drop(['HomeTeam', 'AwayTeam'], axis =1)
display(x_train)

# get number of columns in training data
n_cols = x_train.shape[1]

model = Sequential([ 
    Dense(32, input_shape=(n_cols,)), 
    Dense(16),
    Dense(8),
    Dense(4),
    # we want team goals scored for the match
    Dense(2) 
])

# compile model using mean_squared_error as a measure of model performance
model.compile(optimizer='adam', loss='mean_squared_error')

# set early stopping monitor so the model stops training when it won't improve anymore
early_stopping_monitor = EarlyStopping(patience=3)

# train model with 2016-2017 results
model.fit(x_train, y_train, validation_split=0.2, epochs=30, callbacks=[early_stopping_monitor])

# also train model with 2017-2018 results
x_train = get_x_train(match_results_17_18, input_data_16_17, input_data_16_17.tail(10))
y_train = match_results_17_18.drop(['HomeTeam', 'AwayTeam'], axis =1)
model.fit(x_train, y_train, validation_split=0.2, epochs=30, callbacks=[early_stopping_monitor])

x_test = get_x_train(match_results_18_19, input_data_17_18, input_data_17_18.tail(10))
y_test_predictions = model.predict(x_test)

Unnamed: 0,homeTeam home win rates,homeTeam average goals scored,homeTeam average goals conceded,homeTeam previous position,awayTeam away win rates,awayTeam average goals scored,awayTeam average goals conceded,awayTeam previous position
0,0.282232,1.014575,1.442788,26.0,0.210526,1.105263,1.368421,12.0
1,0.315789,1.026316,1.342105,15.0,0.210526,0.894737,1.263158,14.0
2,0.315789,1.552632,1.447368,11.0,0.473684,1.815789,0.921053,3.0
3,0.135945,0.488698,0.694959,22.0,0.578947,1.789474,0.947368,1.0
4,0.631579,1.868421,1.078947,4.0,0.157895,1.263158,1.631579,17.0
...,...,...,...,...,...,...,...,...
375,0.421053,1.657895,1.315789,8.0,0.080284,0.288608,0.410418,22.0
376,0.631579,1.289474,0.921053,5.0,0.263158,1.026316,1.342105,15.0
377,0.578947,1.552632,1.078947,6.0,0.315789,1.078947,1.447368,9.0
378,0.421053,1.105263,1.368421,12.0,0.210526,0.894737,1.263158,14.0


Train on 304 samples, validate on 76 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Train on 304 samples, validate on 76 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30


In [26]:
def make_table_from_predictions(predictions_arr, match_teams, league_season):
    fthg_lst = [] # home team goals
    ftag_lst = [] # away team goals
    ftr_lst = [] # full time result
    
    for score in predictions_arr: 
        fthg_lst.append(score[0])
        ftag_lst.append(score[1])
        
        # decide whether the match is a home win (H), away win(A) or draw(D)
        if score[0] > score[1]:
            ftr_lst.append('H')
        elif score[0] < score[1]:
            ftr_lst.append('A')
        else:
            ftr_lst.append('D')

    match_teams['FTHG'] = fthg_lst
    match_teams['FTAG'] = ftag_lst
    match_teams['FTR'] = ftr_lst
    filename = 'myPredictions_' + league_season + '.csv'
    try:
        match_teams.to_csv(filename)
        file_data = pd.read_csv(filename)
    except:
        # use original predictions if we already have it 
        file_data = pd.read_csv(filename)
    
    # store dataframe 
    data, positions, ordered_away_wins = file_to_pd(file_data)
    # actual final league standings 
    final_table = data.drop(['home win rates', 'away win rates', 'average goals scored', 'average goals conceded'], axis=1)
    final_table['away wins'] = ordered_away_wins
    standings_Predictions = 'standingsPredictions_' + league_season + '.csv'
    final_table['Total Matches'] = final_table[['wins', 'draws', 'losses']].sum(axis=1)
    try:
        # save most recent standings 
        final_table.to_csv(standings_Predictions)
    except:
        # read from the findings already stored
        print("Reading from standingsPredictions already stored \n")
        final_table = pd.read_csv(standings_Predictions)
    print("Predictions for the Final League Standings " + league_season)
    display(final_table)
    print(final_table.shape)

In [43]:
rounded_predictions = []
for score in y_test_predictions:
    rounded_predictions.append([round(num) for num in score])

arr = np.array(rounded_predictions)
make_table_from_predictions(arr, match_results_18_19[['HomeTeam', 'AwayTeam']].copy(), '2018-2019')

Predictions for the Final League Standings 2018-2019


Unnamed: 0_level_0,team,points,wins,draws,losses,goals scored,goals conceded,goal difference,away wins,Total Matches
position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,Man City,90,26,12,0,69.0,37.0,32.0,12,38
2,Liverpool,82,23,13,2,66.0,44.0,22.0,10,38
3,Tottenham,80,21,17,0,63.0,40.0,23.0,10,38
4,Arsenal,74,21,11,6,63.0,45.0,18.0,9,38
5,Man United,66,14,24,0,51.0,34.0,17.0,11,38
6,Chelsea,66,14,24,0,52.0,36.0,16.0,11,38
7,Leicester,65,16,17,5,63.0,51.0,12.0,9,38
8,Burnley,62,12,26,0,52.0,38.0,14.0,10,38
9,Everton,57,10,27,1,52.0,42.0,10.0,9,38
10,Newcastle,56,11,23,4,48.0,39.0,9.0,9,38


(20, 10)


In [44]:
# out of curiousity, modified the following for the 2019-2020 fixtures
# https://soccersoda.com/soccer-leagues-fixtures/#C4
# to predict the outcome of this seasons standings given the suspension of the league
    # modifications included team names such as 'Man Utd' to Man United
match_fixtures_2019_2020 = 'Modified Premier League Fixtures 2019 2020.xlsx'
match_fixtures = pd.read_excel(match_fixtures_2019_2020)

# train model with 2018-2019 results
x_train = get_x_train(match_results_18_19, input_data_17_18, input_data_17_18.tail(10))
y_train = match_results_18_19.drop(['HomeTeam', 'AwayTeam'], axis =1)

model.fit(x_train, y_train, validation_split=0.2, epochs=30, callbacks=[early_stopping_monitor])

# predict 2019-2020 season
x_test = get_x_train(match_fixtures, input_data_18_19, input_data_18_19.tail(10))
y_test_predictions = model.predict(x_test)

rounded_predictions = []
for score in y_test_predictions:
    rounded_predictions.append([round(num) for num in score])

arr = np.array(rounded_predictions)
make_table_from_predictions(arr, match_fixtures[['HomeTeam', 'AwayTeam']].copy(), '2019-2020')

Train on 304 samples, validate on 76 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Predictions for the Final League Standings 2019-2020


Unnamed: 0_level_0,team,points,wins,draws,losses,goals scored,goals conceded,goal difference,away wins,Total Matches
position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,Man City,98,30,8,0,68.0,32.0,36.0,11,38
2,Liverpool,91,27,10,1,64.0,34.0,30.0,9,38
3,Tottenham,88,27,7,4,65.0,36.0,29.0,11,38
4,Chelsea,88,27,7,4,65.0,37.0,28.0,11,38
5,Man United,83,25,8,5,63.0,38.0,25.0,8,38
6,Arsenal,83,25,8,5,64.0,42.0,22.0,8,38
7,Everton,64,17,13,8,57.0,45.0,12.0,6,38
8,Leicester,63,17,12,9,54.0,44.0,10.0,6,38
9,Wolves,62,15,17,6,55.0,42.0,13.0,7,38
10,West Ham,61,18,7,13,55.0,47.0,8.0,6,38


(20, 10)
