In [272]:
import numpy as np
import pandas as pd
import calendar
pd.options.display.max_columns = 99
pd.options.display.max_rows = 2000

## Data-processing

In [273]:
# load dataset
E1415 = pd.read_csv('data/E1415.csv')
E1516 = pd.read_csv('data/E1516.csv')
E1617 = pd.read_csv('data/E1617.csv')
E1718 = pd.read_csv('data/E1718test.csv')

# concat dataset
frames = [E1617,E1718]
data = pd.concat(frames)[['Date','HomeTeam','AwayTeam','FTR','FTHG','FTAG','Referee','B365H','B365D','B365A']]

# remove nan
data.dropna(how='any',axis=0,inplace=True)

# get month cat
data['month'] = pd.DatetimeIndex(data['Date']).month.astype(int)
data['month']= data['month'].apply(lambda x: calendar.month_abbr[x])

In [274]:
data.head()

Unnamed: 0,Date,HomeTeam,AwayTeam,FTR,FTHG,FTAG,Referee,B365H,B365D,B365A,month
0,13/08/16,Burnley,Swansea,A,0,1,J Moss,2.4,3.3,3.25,Aug
1,13/08/16,Crystal Palace,West Brom,A,0,1,C Pawson,2.0,3.3,4.5,Aug
2,13/08/16,Everton,Tottenham,D,1,1,M Atkinson,3.2,3.4,2.4,Aug
3,13/08/16,Hull,Leicester,H,2,1,M Dean,4.5,3.6,1.91,Aug
4,13/08/16,Man City,Sunderland,H,2,1,R Madley,1.25,6.5,15.0,Aug


In [275]:
def data_processing(data):
    df_with_dummies = pd.get_dummies(data, columns = ['HomeTeam','AwayTeam','FTR','Referee','month'])
    df_with_dummies.drop(['Date','FTHG','FTAG'], axis=1, inplace=True)
    return(df_with_dummies)

In [276]:
processed_data=data_processing(data)

In [277]:
processed_data.head()

Unnamed: 0,B365H,B365D,B365A,HomeTeam_Arsenal,HomeTeam_Bournemouth,HomeTeam_Brighton,HomeTeam_Burnley,HomeTeam_Chelsea,HomeTeam_Crystal Palace,HomeTeam_Everton,HomeTeam_Huddersfield,HomeTeam_Hull,HomeTeam_Leicester,HomeTeam_Liverpool,HomeTeam_Man City,HomeTeam_Man United,HomeTeam_Middlesbrough,HomeTeam_Newcastle,HomeTeam_Southampton,HomeTeam_Stoke,HomeTeam_Sunderland,HomeTeam_Swansea,HomeTeam_Tottenham,HomeTeam_Watford,HomeTeam_West Brom,HomeTeam_West Ham,AwayTeam_Arsenal,AwayTeam_Bournemouth,AwayTeam_Brighton,AwayTeam_Burnley,AwayTeam_Chelsea,AwayTeam_Crystal Palace,AwayTeam_Everton,AwayTeam_Huddersfield,AwayTeam_Hull,AwayTeam_Leicester,AwayTeam_Liverpool,AwayTeam_Man City,AwayTeam_Man United,AwayTeam_Middlesbrough,AwayTeam_Newcastle,AwayTeam_Southampton,AwayTeam_Stoke,AwayTeam_Sunderland,AwayTeam_Swansea,AwayTeam_Tottenham,AwayTeam_Watford,AwayTeam_West Brom,AwayTeam_West Ham,FTR_A,FTR_D,FTR_H,Referee_A Marriner,Referee_A Taylor,Referee_C Kavanagh,Referee_C Pawson,Referee_G Scott,Referee_J Moss,Referee_K Friend,Referee_L Mason,Referee_L Probert,Referee_M Atkinson,Referee_M Clattenburg,Referee_M Dean,Referee_M Jones,Referee_M Oliver,Referee_N Swarbrick,Referee_P Tierney,Referee_R East,Referee_R Madley,Referee_S Attwell,Referee_l Mason,month_Apr,month_Aug,month_Dec,month_Feb,month_Jan,month_Jul,month_Jun,month_Mar,month_May,month_Nov,month_Oct,month_Sep
0,2.4,3.3,3.25,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1,2.0,3.3,4.5,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
2,3.2,3.4,2.4,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
3,4.5,3.6,1.91,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
4,1.25,6.5,15.0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0


In [278]:
processed_data.shape

(460, 84)

In [279]:
E1718.shape

(80, 65)

In [280]:
data_model = processed_data.head(processed_data.shape[0]-E1718.shape[0])

In [281]:
from sklearn.model_selection import train_test_split
train_set, validation_set = train_test_split(data_model, test_size = 0.3)

y_train = train_set[['FTR_A','FTR_D','FTR_H']]
x_train  = train_set.drop(['FTR_A','FTR_D','FTR_H'],axis=1)
y_validation = validation_set[['FTR_A','FTR_D','FTR_H']]
x_validation  = validation_set.drop(['FTR_A','FTR_D','FTR_H'],axis=1)


In [282]:
from keras.models import Sequential
from keras.layers import Dense

In [283]:
model = Sequential()
model.add(Dense(200, input_dim=x_train.shape[1], activation='relu'))
model.add(Dense(500, activation='relu'))
model.add(Dense(1000, activation='relu'))
model.add(Dense(500, activation='relu'))
model.add(Dense(200, activation='relu'))
model.add(Dense(200, activation='relu'))
model.add(Dense(200, activation='relu'))
model.add(Dense(200, activation='relu'))
model.add(Dense(200, activation='relu'))
model.add(Dense(3,activation='softmax'))

In [284]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [285]:
model.fit(x_train.values,y_train.values, epochs=20, batch_size=10)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x12872b550>

In [286]:
scores = model.evaluate(x_validation.values,y_validation.values)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

acc: 49.12%


In [335]:
data_end = processed_data.tail(80)
data_end = data_end[data_end['HomeTeam_Man City']==1]

In [336]:
data_end.head()

Unnamed: 0,B365H,B365D,B365A,HomeTeam_Arsenal,HomeTeam_Bournemouth,HomeTeam_Brighton,HomeTeam_Burnley,HomeTeam_Chelsea,HomeTeam_Crystal Palace,HomeTeam_Everton,HomeTeam_Huddersfield,HomeTeam_Hull,HomeTeam_Leicester,HomeTeam_Liverpool,HomeTeam_Man City,HomeTeam_Man United,HomeTeam_Middlesbrough,HomeTeam_Newcastle,HomeTeam_Southampton,HomeTeam_Stoke,HomeTeam_Sunderland,HomeTeam_Swansea,HomeTeam_Tottenham,HomeTeam_Watford,HomeTeam_West Brom,HomeTeam_West Ham,AwayTeam_Arsenal,AwayTeam_Bournemouth,AwayTeam_Brighton,AwayTeam_Burnley,AwayTeam_Chelsea,AwayTeam_Crystal Palace,AwayTeam_Everton,AwayTeam_Huddersfield,AwayTeam_Hull,AwayTeam_Leicester,AwayTeam_Liverpool,AwayTeam_Man City,AwayTeam_Man United,AwayTeam_Middlesbrough,AwayTeam_Newcastle,AwayTeam_Southampton,AwayTeam_Stoke,AwayTeam_Sunderland,AwayTeam_Swansea,AwayTeam_Tottenham,AwayTeam_Watford,AwayTeam_West Brom,AwayTeam_West Ham,FTR_A,FTR_D,FTR_H,Referee_A Marriner,Referee_A Taylor,Referee_C Kavanagh,Referee_C Pawson,Referee_G Scott,Referee_J Moss,Referee_K Friend,Referee_L Mason,Referee_L Probert,Referee_M Atkinson,Referee_M Clattenburg,Referee_M Dean,Referee_M Jones,Referee_M Oliver,Referee_N Swarbrick,Referee_P Tierney,Referee_R East,Referee_R Madley,Referee_S Attwell,Referee_l Mason,month_Apr,month_Aug,month_Dec,month_Feb,month_Jan,month_Jul,month_Jun,month_Mar,month_May,month_Nov,month_Oct,month_Sep
19,1.33,5.75,10.0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0
34,1.85,4.0,4.2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
53,1.14,10.0,21.0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
73,1.14,9.5,21.0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0


In [337]:
y_test = data_end[['FTR_A','FTR_D','FTR_H']]
x_test  = data_end.drop(['FTR_A','FTR_D','FTR_H'],axis=1)

In [338]:
predictions = pd.DataFrame(model.predict(x_test.values))

In [339]:
scores = model.evaluate(x_test.values,y_test.values)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


acc: 75.00%


In [340]:
predictions.columns=['FTR_A','FTR_D','FTR_H']
predictions.round(2).head(10)

Unnamed: 0,FTR_A,FTR_D,FTR_H
0,0.13,0.54,0.33
1,0.78,0.2,0.02
2,0.04,0.39,0.56
3,0.02,0.27,0.71


In [341]:
y_test.head(10)

Unnamed: 0,FTR_A,FTR_D,FTR_H
19,0,1,0
34,0,0,1
53,0,0,1
73,0,0,1


In [342]:
data[data.HomeTeam=='Man City']

Unnamed: 0,Date,HomeTeam,AwayTeam,FTR,FTHG,FTAG,Referee,B365H,B365D,B365A,month
4,13/08/16,Man City,Sunderland,H,2,1,R Madley,1.25,6.5,15.0,Aug
28,28/08/16,Man City,West Ham,H,3,1,A Marriner,1.25,6.0,11.0,Aug
44,17/09/16,Man City,Bournemouth,H,4,0,J Moss,1.29,6.0,12.0,Sep
74,15/10/16,Man City,Everton,D,1,1,M Oliver,1.5,4.75,7.0,Oct
89,23/10/16,Man City,Southampton,D,1,1,M Clattenburg,1.5,4.5,7.5,Oct
103,05/11/16,Man City,Middlesbrough,D,1,1,K Friend,1.22,7.0,15.0,May
131,03/12/16,Man City,Chelsea,A,1,3,A Taylor,2.15,3.6,3.6,Mar
153,14/12/16,Man City,Watford,H,2,0,K Friend,1.29,6.5,11.0,Dec
167,18/12/16,Man City,Arsenal,H,2,1,M Atkinson,2.15,3.6,3.5,Dec
191,02/01/17,Man City,Burnley,H,2,1,L Mason,1.17,8.5,19.0,Feb
