In [2]:
import numpy as np
import pandas as pd
from scipy.stats import norm
from scipy.stats import skellam
import seaborn as sns
%matplotlib inline

import matplotlib.pyplot as plt

In [3]:
Pred_Data=pd.read_csv("../Data/2009-2010.csv")
Pred_Data=Pred_Data[['HomeTeam','AwayTeam','FTHG','FTAG','FTR']]
Pred_Data.columns=['Home','Away','HG','AG','Result']
Team=Pred_Data.Home.value_counts().index
Pred_Data['diff_score']=Pred_Data['HG']-Pred_Data['AG']


In [4]:
len(Pred_Data[Pred_Data['Result']=='H'])/len(Pred_Data)

0.48947368421052634

# Useful function for our predictions


In [5]:
def score_coef(att,dif,delta,Data):
    mu1=np.array([])
    mu2=np.array([])
    for i in range(len(Pred_Data)):
        m1=np.exp(delta+att.loc[Data.loc[i].Home]-dif.loc[Data.loc[i].Away])
        m2=np.exp(att.loc[Data.loc[i].Away]-dif.loc[Data.loc[i].Home])
        mu1=np.append(mu1,m1)
        mu2=np.append(mu2,m2)
    return mu1, mu2

In [6]:
#Function to calculate the mean for a given match
def calc_score(att_h,dif_h,att_a,dif_a,delta):
    m1=np.exp(delta+att_h-dif_a)
    m2=np.exp(att_a-dif_h)
    return m1, m2


In [7]:
#Calculate the probability of winning against another team without home coef
def probawin(A,B,k=0):
    mu_h,mu_a=calc_score(A[0],A[1],B[0],B[1],delta=0)
    return 1-skellam.cdf(k,  mu_h,  mu_a)
    

# Prediction Using Metropolis within Gibbs results

In [8]:
Coef_club=pd.read_csv('../Data/Coef_MCMC_Skellam.csv')
Coef_club=Coef_club.set_index('Team').sort_index()
Team=Pred_Data.Home.value_counts().index
delta=Coef_club['Home_adv'][0]
classement_mean=pd.DataFrame({'Team':Team})


In [9]:
Sum_Coef_club= Coef_club[['Sum_Att','Sum_Dif']]
Sum_Coef_club


Unnamed: 0_level_0,Sum_Att,Sum_Dif
Team,Unnamed: 1_level_1,Unnamed: 2_level_1
Atalanta,0.022639,-0.156036
Bari,0.1889,-0.196121
Bologna,-0.075426,-0.217236
Cagliari,-0.151764,0.015559
Catania,-0.063157,-0.061736
Chievo,-0.335091,0.0931
Fiorentina,0.160275,0.022506
Genoa,0.011635,-0.082672
Inter,0.292167,0.683262
Juventus,0.196484,0.21234


In [10]:
#Generate the means of the Home and Away teams 
mu1 ,mu2=score_coef(Coef_club['Sum_Att'],Coef_club['Sum_Dif'],delta,Pred_Data)


In [11]:
#Generate 1000 simulations of Skellam distribution
totdif= list()
for i in range(1000):
    dif_pred = "iter"+str(i)
    Pred_Data[dif_pred]=skellam.rvs(mu1,mu2)
    totdif.append(dif_pred)

avg_dif=0
for i in totdif:
    avg_dif+=Pred_Data[i]
avg_dif=(avg_dif/len(totdif))
Pred_Data['New_pred_diff_score']=avg_dif

pred_result=np.array([])
for i in range (len(Pred_Data)):
    if Pred_Data.iloc[i]['New_pred_diff_score']>0.25:
        pred_result=np.concatenate([pred_result,['H']])
    elif Pred_Data.iloc[i]['New_pred_diff_score']<0:
        pred_result=np.concatenate([pred_result,['A']])
    else : pred_result=np.concatenate([pred_result,['D']])
Pred_Data['New_pred_result']=pred_result

In [12]:
#Count number of well predicted results
diff_score=np.array([])
wrong_res=0
for i in range(379):
    if Pred_Data['Result'][i]==Pred_Data['New_pred_result'][i]:
        diff_score=np.append(diff_score,0)
    else:
        diff_score=np.append(diff_score,1)
        wrong_res+=1

(380-wrong_res)/380

0.4921052631578947

In [13]:
#Create a Ranking with the new results obtained
Score=np.array([])
for i in Team :
    s=0
    dom=Pred_Data[Pred_Data.Home==i]
    ext=Pred_Data[Pred_Data.Away==i]
    for j in range(len(dom)):
        if dom.iloc[j]['New_pred_result']=='H':
                s+=3
        elif dom.iloc[j]['New_pred_result']=='D':
                s+=1
    for j in range(len(ext)):
        if ext.iloc[j]['New_pred_result']=='A':
                s+=3
        elif ext.iloc[j]['New_pred_result']=='D':
                s+=1   
    Score=np.concatenate([Score,[s]])
classement_mean['Classifica']=Score
#classement=classement.set_index('Team')
classement_mean.sort_values(by='Classifica',ascending=False)

Unnamed: 0,Team,Classifica
18,Inter,114.0
10,Roma,103.0
5,Juventus,100.0
3,Milan,98.0
16,Fiorentina,78.0
0,Palermo,60.0
9,Bari,59.0
6,Sampdoria,58.0
14,Udinese,58.0
19,Napoli,58.0


In [14]:
#Create Matrix with all the odds of beating any other team
proba_tot=[]
for i in range(len(Sum_Coef_club)):
    A=Sum_Coef_club.iloc[i]
    proba_A=[]
    for j in range(len(Sum_Coef_club)):
        if j !=i:
            B=Sum_Coef_club.iloc[j]
            proba_A.append(probawin(A,B))
        if j==i:
            proba_A.append(0)
    proba_tot.append(proba_A)


In [15]:
prob=pd.DataFrame({'Team':Team})
prob=prob.set_index('Team').sort_index()
ordered=Team.sort_values()
j=0
for i in ordered:
    prob[i]=np.transpose(proba_tot)[j]
    j+=1

In [16]:
prob.style.background_gradient(
    cmap = sns.palettes.diverging_palette(h_neg=0, h_pos=243, s=75, l=40,
                                          as_cmap=True))


Unnamed: 0_level_0,Atalanta,Bari,Bologna,Cagliari,Catania,Chievo,Fiorentina,Genoa,Inter,Juventus,Lazio,Livorno,Milan,Napoli,Palermo,Parma,Roma,Sampdoria,Siena,Udinese
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Atalanta,0.0,0.331421,0.406598,0.347012,0.351415,0.360429,0.273654,0.34036,0.119759,0.218391,0.345224,0.467554,0.213054,0.323786,0.317237,0.414448,0.202286,0.328251,0.407443,0.319546
Bari,0.409018,0.0,0.457968,0.392136,0.397435,0.406329,0.311848,0.385635,0.136302,0.249243,0.390768,0.522304,0.243665,0.367204,0.360379,0.464973,0.231356,0.373365,0.457664,0.362357
Bologna,0.315219,0.285191,0.0,0.304878,0.30755,0.319647,0.234239,0.296321,0.0996141,0.185229,0.301415,0.419212,0.179323,0.281314,0.27422,0.370704,0.169919,0.282669,0.362883,0.277812
Cagliari,0.345536,0.320233,0.386098,0.0,0.334836,0.338652,0.265891,0.325762,0.121556,0.214545,0.329598,0.439111,0.210883,0.310349,0.30545,0.38775,0.20081,0.317013,0.382826,0.306058
Catania,0.356106,0.328638,0.39894,0.340472,0.0,0.351652,0.272322,0.33563,0.122317,0.218783,0.33993,0.455656,0.214374,0.31959,0.313956,0.403211,0.203894,0.325406,0.397371,0.31528
Chievo,0.307659,0.285299,0.344008,0.292433,0.297791,0.0,0.23686,0.289879,0.109378,0.191422,0.293192,0.391571,0.188434,0.276082,0.271913,0.344198,0.17955,0.282402,0.340158,0.272176
Fiorentina,0.453859,0.427168,0.500005,0.430553,0.439006,0.439133,0.0,0.42952,0.168494,0.292169,0.433294,0.556159,0.289283,0.410601,0.406199,0.496035,0.276056,0.422683,0.491659,0.404866
Genoa,0.375711,0.34727,0.420242,0.359159,0.364612,0.370793,0.287913,0.0,0.129148,0.231383,0.358702,0.478725,0.226825,0.337437,0.33163,0.424431,0.215724,0.34383,0.418385,0.33289
Inter,0.632439,0.621916,0.66984,0.590819,0.608357,0.583804,0.541929,0.605672,0.0,0.466097,0.605438,0.703564,0.473273,0.585429,0.588432,0.636972,0.457184,0.615222,0.640218,0.577026
Juventus,0.510502,0.488563,0.554576,0.480822,0.492354,0.484456,0.414465,0.484915,0.207063,0.0,0.487455,0.603922,0.344212,0.465232,0.463112,0.54076,0.329825,0.483256,0.538894,0.458547


In [17]:
Pred_Data['Result'].value_counts()

H    186
D    102
A     92
Name: Result, dtype: int64

In [19]:
Pred_Data['New_pred_result'].value_counts()

H    223
A    102
D     55
Name: New_pred_result, dtype: int64

#  Prediction Using MALA results


In [21]:
Coef_club1=pd.read_csv('Coef_MALA.csv')
Coef_club1=Coef_club1.set_index('Team').sort_index()
Team=Pred_Data.Home.value_counts().index
delta1=Coef_club1['Home_Adv'][0]
classement_mean=pd.DataFrame({'Team':Team})
Sum_Coef_club1=Coef_club1[['Sum_Att','Sum_Dif']]


In [101]:
mu1 ,mu2=score_coef(Coef_club1['Sum_Att'],Coef_club1['Sum_Dif'],delta1,Pred_Data)
totdif_MALA= list()
for i in range(1000):
    dif_pred_MALA = "iter"+str(i)
    Pred_Data[dif_pred_MALA]=skellam.rvs(mu1,mu2)
    totdif_MALA.append(dif_pred_MALA)

avg_dif=0
for i in totdif_MALA:
    avg_dif+=Pred_Data[i]
avg_dif=avg_dif/len(totdif_MALA)
Pred_Data['New_pred_diff_score_MALA']=avg_dif

pred_result1=np.array([])
for i in range (len(Pred_Data)):
    if Pred_Data.iloc[i]['New_pred_diff_score_MALA']>0.25:
        pred_result1=np.concatenate([pred_result1,['H']])
    elif Pred_Data.iloc[i]['New_pred_diff_score_MALA']<0:
        pred_result1=np.concatenate([pred_result1,['A']])
    else : pred_result1=np.concatenate([pred_result1,['D']])
Pred_Data['New_pred_result_MALA']=pred_result1

diff_score=np.array([])
wrong_res=0
for i in range(379):
    if Pred_Data['Result'][i]==Pred_Data['New_pred_result_MALA'][i]:
        diff_score=np.append(diff_score,0)
    else:
        diff_score=np.append(diff_score,1)
        wrong_res+=1

(380-wrong_res)/380

0.5052631578947369

In [102]:
Score=np.array([])
for i in Team :
    s=0
    dom=Pred_Data[Pred_Data.Home==i]
    ext=Pred_Data[Pred_Data.Away==i]
    for j in range(len(dom)):
        if dom.iloc[j]['New_pred_result_MALA']=='H':
                s+=3
        elif dom.iloc[j]['New_pred_result_MALA']=='D':
                s+=1
    for j in range(len(ext)):
        if ext.iloc[j]['New_pred_result_MALA']=='A':
                s+=3
        elif ext.iloc[j]['New_pred_result_MALA']=='D':
                s+=1   
    Score=np.concatenate([Score,[s]])
classement_mean['Classifica']=Score
#classement=classement.set_index('Team')
classement_mean.sort_values(by='Classifica',ascending=False)

Unnamed: 0,Team,Classifica
12,Inter,90.0
3,Roma,82.0
5,Milan,78.0
1,Palermo,62.0
0,Juventus,60.0
10,Fiorentina,59.0
6,Bari,57.0
4,Sampdoria,56.0
2,Udinese,54.0
19,Lazio,53.0


In [47]:
Pred_Data['Result'].value_counts()

H    186
D    102
A     92
Name: Result, dtype: int64

In [103]:
Pred_Data['New_pred_result_MALA'].value_counts()

H    292
D     71
A     17
Name: New_pred_result_MALA, dtype: int64

In [22]:
prob=pd.DataFrame({'Team':Team})
prob=prob.set_index('Team').sort_index()

#Create Matrix containing all the probabilities
proba_tot=[]
for i in range(len(Sum_Coef_club1)):
    A=Sum_Coef_club1.iloc[i]
    proba_A=[]
    for j in range(len(Sum_Coef_club1)):
        if j !=i:
            B=Sum_Coef_club1.iloc[j]
            proba_A.append(probawin(A,B))
        if j==i:
            proba_A.append(0)
    proba_tot.append(proba_A)
XA=Team.sort_values()
j=0
for i in XA:
    prob[i]=np.transpose(proba_tot)[j]
    j+=1  
prob.style.background_gradient(
    cmap = sns.palettes.diverging_palette(h_neg=0, h_pos=243, s=75, l=40,
                                          as_cmap=True)).to_excel("proba_MALA_Skellam.xlsx")
