# Imports

In [130]:
import pandas as pd
import matplotlib.pyplot as plt
from ann_visualizer.visualize import ann_viz;

from sklearn.neural_network import MLPClassifier

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score

# Loads Data

In [4]:
df = pd.read_csv(r"League_Result_Data/Encoded_PremierLeague_Stats_From_2014to2021.csv")
df

Unnamed: 0.1,Unnamed: 0,Season,Season Encoding,Date,YearOfSeason,HomeTeam,HomeTeam Encoding,AwayTeam,AwayTeam Encoding,FTHG,...,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR
0,0,Fall,0,12/09/2020,2020/21,Fulham,8,Arsenal,0,0.0,...,2.0,6.0,12.0,12.0,2.0,3.0,2.0,2.0,0.0,0.0
1,1,Fall,0,12/09/2020,2020/21,Crystal Palace,6,Southampton,21,1.0,...,3.0,5.0,14.0,11.0,7.0,3.0,2.0,1.0,0.0,0.0
2,2,Fall,0,12/09/2020,2020/21,Liverpool,13,Leeds,11,4.0,...,6.0,3.0,9.0,6.0,9.0,0.0,1.0,0.0,0.0,0.0
3,3,Fall,0,12/09/2020,2020/21,West Ham,28,Newcastle,17,0.0,...,3.0,2.0,13.0,7.0,8.0,7.0,2.0,2.0,0.0,0.0
4,4,Fall,0,13/09/2020,2020/21,West Brom,27,Leicester,12,0.0,...,1.0,7.0,12.0,9.0,2.0,5.0,1.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2564,2564,Spring,2,24/05/15,2014/15,Everton,7,Tottenham,25,0.0,...,1.0,3.0,12.0,8.0,3.0,5.0,1.0,2.0,0.0,0.0
2565,2565,Spring,2,24/05/15,2014/15,Hull,10,Man United,15,0.0,...,6.0,1.0,12.0,15.0,8.0,1.0,2.0,2.0,0.0,1.0
2566,2566,Spring,2,24/05/15,2014/15,Leicester,12,QPR,19,5.0,...,7.0,2.0,7.0,6.0,5.0,6.0,0.0,0.0,0.0,0.0
2567,2567,Spring,2,24/05/15,2014/15,Man City,14,Southampton,21,2.0,...,6.0,4.0,13.0,8.0,8.0,4.0,1.0,1.0,0.0,0.0


In [17]:
# Features -- Drops FTR and any categorical value 
X = df.drop(columns=["Season", "YearOfSeason", "Date", "HomeTeam", "AwayTeam", "Referee", "FTR", "FTR Encoding", "HTR", "Referee"])
# Labels
y = df["FTR Encoding"]

X

Unnamed: 0.1,Unnamed: 0,Season Encoding,HomeTeam Encoding,AwayTeam Encoding,FTHG,FTAG,HTHG,HTAG,HTR Encoding,Referee Encoding,...,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR
0,0,0,8,0,0.0,3.0,0.0,1.0,1,7,...,2.0,6.0,12.0,12.0,2.0,3.0,2.0,2.0,0.0,0.0
1,1,0,6,21,1.0,0.0,1.0,0.0,0,27,...,3.0,5.0,14.0,11.0,7.0,3.0,2.0,1.0,0.0,0.0
2,2,0,13,11,4.0,3.0,3.0,2.0,0,28,...,6.0,3.0,9.0,6.0,9.0,0.0,1.0,0.0,0.0,0.0
3,3,0,28,17,0.0,2.0,0.0,0.0,2,11,...,3.0,2.0,13.0,7.0,8.0,7.0,2.0,2.0,0.0,0.0
4,4,0,27,12,0.0,3.0,0.0,0.0,2,10,...,1.0,7.0,12.0,9.0,2.0,5.0,1.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2564,2564,2,7,25,0.0,1.0,0.0,1.0,1,27,...,1.0,3.0,12.0,8.0,3.0,5.0,1.0,2.0,0.0,0.0
2565,2565,2,10,15,0.0,0.0,0.0,0.0,2,15,...,6.0,1.0,12.0,15.0,8.0,1.0,2.0,2.0,0.0,1.0
2566,2566,2,12,19,5.0,1.0,2.0,0.0,0,28,...,7.0,2.0,7.0,6.0,5.0,6.0,0.0,0.0,0.0,0.0
2567,2567,2,14,21,2.0,0.0,1.0,0.0,0,2,...,6.0,4.0,13.0,8.0,8.0,4.0,1.0,1.0,0.0,0.0


In [18]:
"""
Target Varible - Full Time Results
0 - Home Team Win
1 - Away Team Win
2 - Draw
"""

y

0       1
1       0
2       0
3       1
4       1
       ..
2564    1
2565    2
2566    0
2567    0
2568    0
Name: FTR Encoding, Length: 2569, dtype: int64

# Split the Data

In [312]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.5,random_state=0)

# Scale the Data

In [313]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

# Fitting & Evaluating the Model

In [314]:
nn_clf = MLPClassifier(solver='adam', alpha=1e-3,
                     hidden_layer_sizes=(5, 2), random_state=1,max_iter=400)

nn_clf.fit(X_train, y_train)

score = nn_clf.score(X_test, y_test)

print(score)

1.0


In [315]:
results = nn_clf.predict(X_test)

In [316]:
# Precision_Recall_F-Score
p_r_f = precision_recall_fscore_support(y_test, results, average='macro')
p_r_f

(1.0, 1.0, 1.0, None)

In [317]:
test_acc = accuracy_score(y_test, results)
test_acc

1.0

In [318]:
cv_dict = cross_validate(nn_clf, X, y, return_train_score=True)
cv_dict

{'fit_time': array([0.10399961, 0.09999967, 0.09700084, 0.10799932, 0.10699987]),
 'score_time': array([0.00199986, 0.00200224, 0.00199986, 0.00200152, 0.00199986]),
 'test_score': array([0.44552529, 0.44747082, 0.44747082, 0.44747082, 0.44834308]),
 'train_score': array([0.44768856, 0.44720195, 0.44720195, 0.44720195, 0.44698444])}

In [319]:
# Confusion Matrix

cv_score = cross_val_score(nn_clf, X_train, y_train, cv=3, scoring='accuracy')
print('Accuracy:',cv_score, '\n')

y_train_pred = cross_val_predict(nn_clf, X_train, y_train, cv=3)
print(confusion_matrix(y_train, y_train_pred))

Accuracy: [1. 1. 1.] 

[[587   0   0]
 [  0 395   0]
 [  0   0 302]]


# Evaluating the Model

In [320]:
nn_clf.classes_

array([0, 1, 2], dtype=int64)

In [321]:
nn_clf.loss_

0.0067902240755842586

In [322]:
nn_clf.best_loss_

0.0067902240755842586

In [323]:
nn_clf.loss_curve_

[1.1768594583219123,
 1.1532891043998086,
 1.1315642534298758,
 1.1129522229681554,
 1.0978987726609537,
 1.085180082354743,
 1.075140513582358,
 1.0665721059837414,
 1.059016241826448,
 1.0528383982339773,
 1.0473509797189295,
 1.0422873376421933,
 1.0376962976980655,
 1.0331598552607624,
 1.0289939616095727,
 1.0245579837174867,
 1.0203567969329475,
 1.0162259176791242,
 1.0122506589471627,
 1.007939567263918,
 1.003594557241608,
 0.9990825884284049,
 0.994725152561755,
 0.9899660380730568,
 0.9853508768800978,
 0.9806115025934424,
 0.9758172730880404,
 0.9710888120009656,
 0.9659937986850303,
 0.9605987864925354,
 0.955016095259666,
 0.9489924918042631,
 0.9431215701836667,
 0.9367025991339213,
 0.9299699204384325,
 0.9228105315555487,
 0.9156907266810688,
 0.907982967610881,
 0.9003542098593936,
 0.8925684026247126,
 0.8842785262805075,
 0.8756869402084924,
 0.8665131719921378,
 0.8568534162949533,
 0.8467160795574318,
 0.8360753823479288,
 0.8255439201337715,
 0.814829309328327,
 

In [324]:
str(nn_clf.t_) + " training instances seen during fitting"

'389052 training instances seen during fitting'

In [325]:
str(nn_clf.n_iter_) + "  iterations"

'303  iterations'

In [326]:
str(nn_clf.n_layers_) + " layers"

'4 layers'

In [327]:
str(nn_clf.n_outputs_) + " outputs"

'3 outputs'

In [328]:
"Output Activation: " + str(nn_clf.out_activation_)

'Output Activation: softmax'

In [329]:
nn_clf.get_params(nn_clf)

{'activation': 'relu',
 'alpha': 0.001,
 'batch_size': 'auto',
 'beta_1': 0.9,
 'beta_2': 0.999,
 'early_stopping': False,
 'epsilon': 1e-08,
 'hidden_layer_sizes': (5, 2),
 'learning_rate': 'constant',
 'learning_rate_init': 0.001,
 'max_fun': 15000,
 'max_iter': 400,
 'momentum': 0.9,
 'n_iter_no_change': 10,
 'nesterovs_momentum': True,
 'power_t': 0.5,
 'random_state': 1,
 'shuffle': True,
 'solver': 'adam',
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': False,
 'warm_start': False}