In [20]:
import numpy as np
import pandas as pd
from scipy.stats import norm
from scipy.stats import skellam
import seaborn as sns
%matplotlib inline
import matplotlib.pyplot as plt

In [21]:
#Data of 2009-2010
Pred_Data=pd.read_csv("../Data/2009-2010.csv")
Pred_Data=Pred_Data[['HomeTeam','AwayTeam','FTHG','FTAG','FTR']]
Pred_Data.columns=['Home','Away','HG','AG','Result']
Team=Pred_Data.Home.value_counts().index
Pred_Data['diff_score']=Pred_Data['HG']-Pred_Data['AG']

# Useful function for our predictions
 

In [22]:
#Calculate the means
def score_coef(att,dif,delta,Data):
    mu1=np.array([])
    mu2=np.array([])
    for i in range(len(Pred_Data)):
        m1=np.exp(delta+att.loc[Data.loc[i].Home]-dif.loc[Data.loc[i].Away])
        m2=np.exp(att.loc[Data.loc[i].Away]-dif.loc[Data.loc[i].Home])
        mu1=np.append(mu1,m1)
        mu2=np.append(mu2,m2)
    return mu1, mu2


In [23]:
#Predict the result using the new prediction for the Draw
def prediction(p,lamb1,lamb2):
    a=np.random.rand()
    if a<p:
        return 0
    else : return int(np.mean(skellam.rvs(lamb1,lamb2,1)))

In [24]:
#Function to calculate the mean for a given match
def calc_score(att_h,dif_h,att_a,dif_a,delta):
    m1=np.exp(delta+att_h-dif_a)
    m2=np.exp(att_a-dif_h)
    return m1, m2


In [25]:
#Define the probability of winning against an other team without home coef.
def probawin(A,B,k=0):
    mu_h,mu_a=calc_score(A[0],A[1],B[0],B[1],delta=0)
    return 1-skellam.cdf(k,  mu_h,  mu_a)
    

# Prediction Using Metropolis within Gibbs results


In [26]:
Coef_club=pd.read_csv('../Data/Coef_MCMC_ZPD.csv')
Coef_club=Coef_club.set_index('Team').sort_index()
Team=Pred_Data.Home.value_counts().index
delta=Coef_club['Home_adv'][0]
p=Coef_club['prob'][0]
classement_mean=pd.DataFrame({'Team':Team})


In [52]:
Sum_Coef_club= Coef_club[['Sum_Att','Sum_Dif']]
Sum_Coef_club


Unnamed: 0_level_0,Sum_Att,Sum_Dif
Team,Unnamed: 1_level_1,Unnamed: 2_level_1
Atalanta,-0.027717,-0.13189
Bari,0.24704,-0.048888
Bologna,0.006414,-0.303488
Cagliari,-0.313895,0.120504
Catania,-0.209007,-0.019794
Chievo,-0.176559,-0.020045
Fiorentina,0.300688,-0.157895
Genoa,-0.083507,0.039322
Inter,0.130735,0.918435
Juventus,0.341535,-0.01578


In [30]:
#Generate the means of the Home and Away teams 
mu1 ,mu2=score_coef(Coef_club['Sum_Att'],Coef_club['Sum_Dif'],delta,Pred_Data)


In [61]:
totdif= list()
#Create 1000 different new result
for i in range(1):
    dif_pred = "iter"+str(i)
    pred=[]
    for j in range(len(Pred_Data)):
        pred=np.concatenate([pred,[prediction(p,mu1[j],mu2[j])]])  
    Pred_Data[dif_pred]=pred
    totdif.append(dif_pred)

avg_dif=0
for i in totdif:
    avg_dif+=Pred_Data[i]
avg_dif=avg_dif/len(totdif)
Pred_Data['New_pred_diff_score']=avg_dif

#Predict the result averaging the 1000 simulation
pred_result=np.array([])
for i in range (len(Pred_Data)):
    if Pred_Data.iloc[i]['New_pred_diff_score']>0.25:
        pred_result=np.concatenate([pred_result,['H']])
    elif Pred_Data.iloc[i]['New_pred_diff_score']<0:
        pred_result=np.concatenate([pred_result,['A']])
    else : pred_result=np.concatenate([pred_result,['D']])
Pred_Data['New_pred_result']=pred_result



In [63]:
#Count the number of match mispredicted
diff_score=np.array([])
wrong_res=0
for i in range(379):
    if Pred_Data['Result'][i]==Pred_Data['New_pred_result'][i]:
        diff_score=np.append(diff_score,0)
    else:
        diff_score=np.append(diff_score,1)
        wrong_res+=1

(380-wrong_res)/380

0.48947368421052634

In [64]:
Score=np.array([])
for i in Team :
    s=0
    dom=Pred_Data[Pred_Data.Home==i]
    ext=Pred_Data[Pred_Data.Away==i]
    for j in range(len(dom)):
        if dom.iloc[j]['New_pred_result']=='H':
                s+=3
        elif dom.iloc[j]['New_pred_result']=='D':
                s+=1
    for j in range(len(ext)):
        if ext.iloc[j]['New_pred_result']=='A':
                s+=3
        elif ext.iloc[j]['New_pred_result']=='D':
                s+=1   
    Score=np.concatenate([Score,[s]])
classement_mean['Classifica']=Score
#classement=classement.set_index('Team')
classement_mean.sort_values(by='Classifica',ascending=False)

Unnamed: 0,Team,Classifica
16,Inter,71.0
6,Roma,69.0
0,Milan,63.0
11,Bari,63.0
4,Udinese,62.0
17,Lazio,61.0
13,Napoli,60.0
5,Fiorentina,54.0
7,Juventus,54.0
8,Cagliari,54.0


In [65]:
#Create a table containing all the probabilities 
proba_tot=[]
for i in range(len(Sum_Coef_club)):
    A=Sum_Coef_club.iloc[i]
    proba_A=[]
    for j in range(len(Sum_Coef_club)):
        if j !=i:
            B=Sum_Coef_club.iloc[j]
            proba_A.append(probawin(A,B))
        if j==i:
            proba_A.append(0)
    proba_tot.append(proba_A)


In [66]:
prob=pd.DataFrame({'Team':Team})
prob=prob.set_index('Team').sort_index()
ordered=Team.sort_values()
j=0
for i in ordered:
    prob[i]=np.transpose(proba_tot)[j]
    j+=1

In [67]:
prob.style.background_gradient(
    cmap = sns.palettes.diverging_palette(h_neg=0, h_pos=243, s=75, l=40,
                                          as_cmap=True))


Unnamed: 0_level_0,Atalanta,Bari,Bologna,Cagliari,Catania,Chievo,Fiorentina,Genoa,Inter,Juventus,Lazio,Livorno,Milan,Napoli,Palermo,Parma,Roma,Sampdoria,Siena,Udinese
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Atalanta,0.0,0.264247,0.404999,0.337528,0.359571,0.352876,0.281043,0.315346,0.110109,0.232772,0.338934,0.490906,0.21894,0.288452,0.305914,0.411606,0.184105,0.330501,0.407024,0.305386
Bari,0.472531,0.0,0.532406,0.442425,0.471703,0.464759,0.394636,0.422399,0.158196,0.333363,0.460979,0.619814,0.311436,0.389197,0.416611,0.524883,0.263515,0.446261,0.525215,0.413443
Bologna,0.323964,0.230467,0.0,0.314066,0.332922,0.325608,0.244795,0.287082,0.0932495,0.19907,0.303158,0.462644,0.188354,0.26133,0.273958,0.390219,0.157217,0.297881,0.380766,0.275155
Cagliari,0.3215,0.247184,0.366376,0.0,0.321294,0.31612,0.263122,0.285084,0.105642,0.221146,0.311474,0.438328,0.207111,0.261367,0.28044,0.3615,0.175329,0.301778,0.362085,0.278475
Catania,0.324238,0.243629,0.369957,0.305954,0.0,0.321102,0.259034,0.287677,0.102891,0.21573,0.310702,0.447897,0.202794,0.263288,0.280291,0.372493,0.171114,0.302467,0.369961,0.279355
Chievo,0.333796,0.25156,0.380609,0.314713,0.336305,0.0,0.26753,0.296259,0.106279,0.223001,0.320314,0.45957,0.209508,0.271249,0.288934,0.382512,0.176796,0.311654,0.380156,0.287864
Fiorentina,0.465753,0.358691,0.526172,0.43927,0.467466,0.459975,0.0,0.415771,0.150021,0.31996,0.451198,0.61816,0.299272,0.382215,0.407553,0.524872,0.251988,0.437807,0.522554,0.405329
Genoa,0.375423,0.289408,0.426373,0.351032,0.375618,0.369622,0.308103,0.0,0.123438,0.25893,0.363941,0.506463,0.24242,0.306453,0.32812,0.422011,0.205067,0.352737,0.421909,0.325953
Inter,0.599368,0.531533,0.660474,0.536104,0.577385,0.574115,0.56441,0.542034,0.0,0.504813,0.612695,0.716625,0.467611,0.507524,0.556754,0.602947,0.40725,0.5846,0.623688,0.544801
Juventus,0.517696,0.413821,0.580012,0.482994,0.514655,0.507782,0.44042,0.464427,0.179147,0.0,0.508431,0.665564,0.349987,0.429286,0.460416,0.567032,0.297176,0.491443,0.569338,0.456161


In [42]:
Pred_Data['Result'].value_counts()

H    186
D    102
A     92
Name: Result, dtype: int64

In [68]:
Pred_Data['New_pred_result'].value_counts()

H    276
D     68
A     36
Name: New_pred_result, dtype: int64

In [44]:
p


0.006876549776571544