<a href="https://colab.research.google.com/github/AkothCarole/PremierLeaguePredictions/blob/master/Predicting_Premier_League_Standings.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# Import libraries
import pandas as pd
import numpy as np
from scipy.stats import poisson

In [0]:
#Get data from Football data.co.uk

dfList = []
for i in range(5, 19):
    df = pd.read_csv("http://www.football-data.co.uk/mmz4281/{0:02d}{1:02d}/E0.csv".format(i,i+1))
    df = df[['HomeTeam','AwayTeam','FTHG','FTAG']]
    df = df.rename(columns={'FTHG': 'HomeGoals', 'FTAG': 'AwayGoals'})
    dfList.append(df)

    eplData = dfList[0].append(dfList[1:])

In [0]:
# Fetch current standings
eplPointsTable = pd.read_excel('PointsTable.xlsx')[['Team', 'Points']]
eplPointsTable.index = np.arange(1, len(eplPointsTable) + 1)

# Fetch upcoming match fixures
eplMatchesLeft = pd.read_excel('MatchesLeft.xlsx')

In [7]:
'PAST MATCH DATA: {} rows'.format(len(eplData))
eplData.head()
'CURRENT POINTS TABLE'
eplPointsTable
'UPCOMING MATCH FIXTURES: {} rows'.format(len(eplMatchesLeft))
eplMatchesLeft.head()

Unnamed: 0,HomeTeam,AwayTeam
0,Arsenal,Cardiff
1,Bournemouth,Chelsea
2,Fulham,Brighton
3,Huddersfield,Everton
4,Wolves,West Ham


In [9]:
]:
# Calculate Team and League stats

# For each team - Average {HomeScored, HomeConceded, AwayScored, AwayConceded} 
eplHomeTeam = eplData[['HomeTeam', 'HomeGoals', 'AwayGoals']].rename(
    columns={'HomeTeam':'Team', 'HomeGoals':'HomeScored', 'AwayGoals':'HomeConceded'}).groupby(
    ['Team'], as_index=False)[['HomeScored', 'HomeConceded']].mean()

eplAwayTeam = eplData[['AwayTeam', 'HomeGoals', 'AwayGoals']].rename(
    columns={'AwayTeam':'Team', 'HomeGoals':'AwayConceded', 'AwayGoals':'AwayScored'}).groupby(
    ['Team'], as_index=False)[['AwayScored', 'AwayConceded']].mean()

# Overall - Average {leagueHomeScored, leagueHomeConceded, leagueAwayScored, leagueAwayConceded}
leagueHomeScored, leagueHomeConceded = eplHomeTeam['HomeScored'].mean(), eplHomeTeam['HomeConceded'].mean()
leagueAwayScored, leagueAwayConceded = eplAwayTeam['AwayScored'].mean(), eplAwayTeam['AwayConceded'].mean()

eplTeamStrength = pd.merge(eplHomeTeam, eplAwayTeam, on='Team')

assert(leagueHomeScored != 0)
assert(leagueHomeConceded != 0)
assert(leagueAwayScored != 0)
assert(leagueAwayConceded != 0)

# Normalize the parameters 
# For each team - {HomeAttack, HomeDefence, AwayAttack, AwayDefense}
eplTeamStrength['HomeScored'] /= leagueHomeScored
eplTeamStrength['HomeConceded'] /= leagueHomeConceded
eplTeamStrength['AwayScored'] /= leagueAwayScored
eplTeamStrength['AwayConceded'] /= leagueAwayConceded

eplTeamStrength.columns=['Team','HomeAttack','HomeDefense','AwayAttack','AwayDefense']
eplTeamStrength.set_index('Team', inplace=True)

# Overall - {overallHomeScored, overallAwayScored}
overallHomeScored = (leagueHomeScored+leagueAwayConceded)/2
overallAwayScored = (leagueHomeConceded+leagueAwayScored)/2

SyntaxError: ignored

In [0]:
'TEAM STRENGTH: {} rows'.format(len(eplTeamStrength))
eplTeamStrength.head()
'Overall Home scored = {}'.format(overallHomeScored)
'Overall Away scored = {}'.format(overallAwayScored)


In [0]:
# Predict outcome of match and assign points to the teams

def predictMatchScore(home, away):
    if home in eplTeamStrength.index and away in eplTeamStrength.index:
        lambdH = eplTeamStrength.at[home,'HomeAttack'] * eplTeamStrength.at[away,'AwayDefense'] * overallHomeScored
        lambdA = eplTeamStrength.at[away,'AwayAttack'] * eplTeamStrength.at[home,'HomeDefense'] * overallAwayScored
        probH, probA, probT = 0, 0, 0  # Probability of Home win(H), Away win(A) or Tie(T) 
        for X in range(0,11):
            for Y in range(0, 11):
                p = poisson.pmf(X, lambdH) * poisson.pmf(Y, lambdA)
                if X == Y:
                    probT += p
                elif X > Y:
                    probH += p
                else:
                    probA += p
        scoreH = 3 * probH + probT
        scoreA = 3 * probA + probT
        return (scoreH, scoreA)
    else:
        return (0, 0)

In [0]:
#  Simulate the matches to predict final standings
for index, row in eplMatchesLeft.iterrows():
    home, away = row['HomeTeam'], row['AwayTeam']
    assert(home in eplPointsTable.Team.values and away in eplPointsTable.Team.values)
    sH, sA = predictMatchScore(home, away)
    eplPointsTable.loc[eplPointsTable.Team == home, 'Points'] += sH
    eplPointsTable.loc[eplPointsTable.Team == away, 'Points'] += sA

In [0]:
'PREDICTED FINAL STANDINGS'
eplPointsTable = eplPointsTable.sort_values('Points', ascending=False)
eplPointsTable.index = np.arange(1, len(eplPointsTable)+1) 
eplPointsTable.round(2)