# Imports

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
from ann_visualizer.visualize import ann_viz;

from sklearn.neural_network import MLPClassifier

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import plot_roc_curve
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score

# Loads Data

In [4]:
df = pd.read_csv(r"League_Result_Data/Encoded_PremierLeague_Stats_From_2014to2021.csv")
df

Unnamed: 0.1,Unnamed: 0,Season,Season Encoding,Date,YearOfSeason,HomeTeam,HomeTeam Encoding,AwayTeam,AwayTeam Encoding,FTHG,...,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR
0,0,Fall,0,12/09/2020,2020/21,Fulham,8,Arsenal,0,0.0,...,2.0,6.0,12.0,12.0,2.0,3.0,2.0,2.0,0.0,0.0
1,1,Fall,0,12/09/2020,2020/21,Crystal Palace,6,Southampton,21,1.0,...,3.0,5.0,14.0,11.0,7.0,3.0,2.0,1.0,0.0,0.0
2,2,Fall,0,12/09/2020,2020/21,Liverpool,13,Leeds,11,4.0,...,6.0,3.0,9.0,6.0,9.0,0.0,1.0,0.0,0.0,0.0
3,3,Fall,0,12/09/2020,2020/21,West Ham,28,Newcastle,17,0.0,...,3.0,2.0,13.0,7.0,8.0,7.0,2.0,2.0,0.0,0.0
4,4,Fall,0,13/09/2020,2020/21,West Brom,27,Leicester,12,0.0,...,1.0,7.0,12.0,9.0,2.0,5.0,1.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2564,2564,Spring,2,24/05/15,2014/15,Everton,7,Tottenham,25,0.0,...,1.0,3.0,12.0,8.0,3.0,5.0,1.0,2.0,0.0,0.0
2565,2565,Spring,2,24/05/15,2014/15,Hull,10,Man United,15,0.0,...,6.0,1.0,12.0,15.0,8.0,1.0,2.0,2.0,0.0,1.0
2566,2566,Spring,2,24/05/15,2014/15,Leicester,12,QPR,19,5.0,...,7.0,2.0,7.0,6.0,5.0,6.0,0.0,0.0,0.0,0.0
2567,2567,Spring,2,24/05/15,2014/15,Man City,14,Southampton,21,2.0,...,6.0,4.0,13.0,8.0,8.0,4.0,1.0,1.0,0.0,0.0


In [5]:
# Features -- Drops FTR and any categorical value 
X = df.drop(columns=["Season", "Unnamed: 0", "YearOfSeason", "Date", "FTHG", "FTAG", "HomeTeam", "AwayTeam", "Referee", "FTR", "FTR Encoding", "HTR", "Referee"])
# Labels
y = df["FTR Encoding"]

X

Unnamed: 0,Season Encoding,HomeTeam Encoding,AwayTeam Encoding,HTHG,HTAG,HTR Encoding,Referee Encoding,Fouls Called Per Game,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR
0,0,8,0,0.0,1.0,1,7,14.0,5.0,13.0,2.0,6.0,12.0,12.0,2.0,3.0,2.0,2.0,0.0,0.0
1,0,6,21,1.0,0.0,0,27,14.0,5.0,9.0,3.0,5.0,14.0,11.0,7.0,3.0,2.0,1.0,0.0,0.0
2,0,13,11,3.0,2.0,0,28,14.0,22.0,6.0,6.0,3.0,9.0,6.0,9.0,0.0,1.0,0.0,0.0,0.0
3,0,28,17,0.0,0.0,2,11,13.0,15.0,15.0,3.0,2.0,13.0,7.0,8.0,7.0,2.0,2.0,0.0,0.0
4,0,27,12,0.0,0.0,2,10,15.0,7.0,13.0,1.0,7.0,12.0,9.0,2.0,5.0,1.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2564,2,7,25,0.0,1.0,1,27,14.0,9.0,16.0,1.0,3.0,12.0,8.0,3.0,5.0,1.0,2.0,0.0,0.0
2565,2,10,15,0.0,0.0,2,15,13.0,16.0,7.0,6.0,1.0,12.0,15.0,8.0,1.0,2.0,2.0,0.0,1.0
2566,2,12,19,2.0,0.0,0,28,14.0,22.0,18.0,7.0,2.0,7.0,6.0,5.0,6.0,0.0,0.0,0.0,0.0
2567,2,14,21,1.0,0.0,0,2,14.0,15.0,13.0,6.0,4.0,13.0,8.0,8.0,4.0,1.0,1.0,0.0,0.0


In [6]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2569 entries, 0 to 2568
Data columns (total 20 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Season Encoding        2569 non-null   int64  
 1   HomeTeam Encoding      2569 non-null   int64  
 2   AwayTeam Encoding      2569 non-null   int64  
 3   HTHG                   2569 non-null   float64
 4   HTAG                   2569 non-null   float64
 5   HTR Encoding           2569 non-null   int64  
 6   Referee Encoding       2569 non-null   int64  
 7   Fouls Called Per Game  2569 non-null   float64
 8   HS                     2569 non-null   float64
 9   AS                     2569 non-null   float64
 10  HST                    2569 non-null   float64
 11  AST                    2569 non-null   float64
 12  HF                     2569 non-null   float64
 13  AF                     2569 non-null   float64
 14  HC                     2569 non-null   float64
 15  AC  

In [7]:
"""
Target Varible - Full Time Results
0 - Home Team Win
1 - Away Team Win
2 - Draw
"""

y

0       1
1       0
2       0
3       1
4       1
       ..
2564    1
2565    2
2566    0
2567    0
2568    0
Name: FTR Encoding, Length: 2569, dtype: int64

# Split the Data

In [8]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.5,random_state=0)

# Scale the Data

In [9]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

# Fitting & Evaluating the Model

In [10]:
nn_clf = MLPClassifier(solver='adam', alpha=1e-3,
                     hidden_layer_sizes=(5, 2), random_state=1,max_iter=500)

nn_clf.fit(X_train, y_train)

score = nn_clf.score(X_test, y_test)

print(score)

0.642023346303502


In [11]:
results = nn_clf.predict(X_test)

In [12]:
# Precision_Recall_F-Score
p_r_f = precision_recall_fscore_support(y_test, results, average='macro')
p_r_f

(0.5989742599854123, 0.6021229259955654, 0.5993298267656001, None)

In [13]:
test_acc = accuracy_score(y_test, results)
test_acc

0.642023346303502

In [14]:
cv_dict = cross_validate(nn_clf, X, y, return_train_score=True)
cv_dict

{'fit_time': array([1.74213862, 2.03204799, 1.83902454, 2.0221889 , 2.03702474]),
 'score_time': array([0.00197458, 0.00097966, 0.00197482, 0.00200009, 0.00200033]),
 'test_score': array([0.64980545, 0.66536965, 0.6692607 , 0.68287938, 0.66276803]),
 'train_score': array([0.66812652, 0.6676399 , 0.66180049, 0.66277372, 0.66731518])}

In [None]:
# Confusion Matrix

cv_score = cross_val_score(nn_clf, X_train, y_train, cv=3, scoring='accuracy')
print('Accuracy:',cv_score, '\n')

y_train_pred = cross_val_predict(nn_clf, X_train, y_train, cv=3)
print(confusion_matrix(y_train, y_train_pred))

# Evaluating the Model

In [None]:
nn_clf.classes_

In [None]:
nn_clf.loss_

In [None]:
nn_clf.best_loss_

In [None]:
nn_clf.loss_curve_

In [None]:
str(nn_clf.t_) + " training instances seen during fitting"

In [None]:
str(nn_clf.n_iter_) + "  iterations"

In [None]:
str(nn_clf.n_layers_) + " layers"

In [None]:
str(nn_clf.n_outputs_) + " outputs"

In [None]:
"Output Activation: " + str(nn_clf.out_activation_)

In [None]:
nn_clf.get_params(nn_clf)

In [None]:
nn_clf.coefs_

In [None]:
len(nn_clf.coefs_)

In [None]:
len(nn_clf.coefs_[0])

In [None]:
len(nn_clf.coefs_[1])

In [None]:
len(nn_clf.coefs_[2])