# Imports

In [37]:
import pandas as pd
import matplotlib.pyplot as plt
from ann_visualizer.visualize import ann_viz;

from sklearn.neural_network import MLPClassifier

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import plot_roc_curve
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score

# Loads Data

In [8]:
df = pd.read_csv(r"League_Result_Data/Encoded_PremierLeague_Stats_From_2014to2021.csv")
df

Unnamed: 0.1,Unnamed: 0,Season,Season Encoding,Date,YearOfSeason,HomeTeam,HomeTeam Encoding,AwayTeam,AwayTeam Encoding,FTHG,...,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR
0,0,Fall,0,12/09/2020,2020/21,Fulham,8,Arsenal,0,0.0,...,2.0,6.0,12.0,12.0,2.0,3.0,2.0,2.0,0.0,0.0
1,1,Fall,0,12/09/2020,2020/21,Crystal Palace,6,Southampton,21,1.0,...,3.0,5.0,14.0,11.0,7.0,3.0,2.0,1.0,0.0,0.0
2,2,Fall,0,12/09/2020,2020/21,Liverpool,13,Leeds,11,4.0,...,6.0,3.0,9.0,6.0,9.0,0.0,1.0,0.0,0.0,0.0
3,3,Fall,0,12/09/2020,2020/21,West Ham,28,Newcastle,17,0.0,...,3.0,2.0,13.0,7.0,8.0,7.0,2.0,2.0,0.0,0.0
4,4,Fall,0,13/09/2020,2020/21,West Brom,27,Leicester,12,0.0,...,1.0,7.0,12.0,9.0,2.0,5.0,1.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2564,2564,Spring,2,24/05/15,2014/15,Everton,7,Tottenham,25,0.0,...,1.0,3.0,12.0,8.0,3.0,5.0,1.0,2.0,0.0,0.0
2565,2565,Spring,2,24/05/15,2014/15,Hull,10,Man United,15,0.0,...,6.0,1.0,12.0,15.0,8.0,1.0,2.0,2.0,0.0,1.0
2566,2566,Spring,2,24/05/15,2014/15,Leicester,12,QPR,19,5.0,...,7.0,2.0,7.0,6.0,5.0,6.0,0.0,0.0,0.0,0.0
2567,2567,Spring,2,24/05/15,2014/15,Man City,14,Southampton,21,2.0,...,6.0,4.0,13.0,8.0,8.0,4.0,1.0,1.0,0.0,0.0


In [9]:
# Features -- Drops FTR and any categorical value 
X = df.drop(columns=["Season", "Unnamed: 0", "YearOfSeason", "Date", "HomeTeam", "AwayTeam", "Referee", "FTR", "FTR Encoding", "HTR", "Referee"])
# Labels
y = df["FTR Encoding"]

X

Unnamed: 0,Season Encoding,HomeTeam Encoding,AwayTeam Encoding,FTHG,FTAG,HTHG,HTAG,HTR Encoding,Referee Encoding,Fouls Called Per Game,...,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR
0,0,8,0,0.0,3.0,0.0,1.0,1,7,14.0,...,2.0,6.0,12.0,12.0,2.0,3.0,2.0,2.0,0.0,0.0
1,0,6,21,1.0,0.0,1.0,0.0,0,27,14.0,...,3.0,5.0,14.0,11.0,7.0,3.0,2.0,1.0,0.0,0.0
2,0,13,11,4.0,3.0,3.0,2.0,0,28,14.0,...,6.0,3.0,9.0,6.0,9.0,0.0,1.0,0.0,0.0,0.0
3,0,28,17,0.0,2.0,0.0,0.0,2,11,13.0,...,3.0,2.0,13.0,7.0,8.0,7.0,2.0,2.0,0.0,0.0
4,0,27,12,0.0,3.0,0.0,0.0,2,10,15.0,...,1.0,7.0,12.0,9.0,2.0,5.0,1.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2564,2,7,25,0.0,1.0,0.0,1.0,1,27,14.0,...,1.0,3.0,12.0,8.0,3.0,5.0,1.0,2.0,0.0,0.0
2565,2,10,15,0.0,0.0,0.0,0.0,2,15,13.0,...,6.0,1.0,12.0,15.0,8.0,1.0,2.0,2.0,0.0,1.0
2566,2,12,19,5.0,1.0,2.0,0.0,0,28,14.0,...,7.0,2.0,7.0,6.0,5.0,6.0,0.0,0.0,0.0,0.0
2567,2,14,21,2.0,0.0,1.0,0.0,0,2,14.0,...,6.0,4.0,13.0,8.0,8.0,4.0,1.0,1.0,0.0,0.0


In [10]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2569 entries, 0 to 2568
Data columns (total 22 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Season Encoding        2569 non-null   int64  
 1   HomeTeam Encoding      2569 non-null   int64  
 2   AwayTeam Encoding      2569 non-null   int64  
 3   FTHG                   2569 non-null   float64
 4   FTAG                   2569 non-null   float64
 5   HTHG                   2569 non-null   float64
 6   HTAG                   2569 non-null   float64
 7   HTR Encoding           2569 non-null   int64  
 8   Referee Encoding       2569 non-null   int64  
 9   Fouls Called Per Game  2569 non-null   float64
 10  HS                     2569 non-null   float64
 11  AS                     2569 non-null   float64
 12  HST                    2569 non-null   float64
 13  AST                    2569 non-null   float64
 14  HF                     2569 non-null   float64
 15  AF  

In [11]:
"""
Target Varible - Full Time Results
0 - Home Team Win
1 - Away Team Win
2 - Draw
"""

y

0       1
1       0
2       0
3       1
4       1
       ..
2564    1
2565    2
2566    0
2567    0
2568    0
Name: FTR Encoding, Length: 2569, dtype: int64

# Split the Data

In [12]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.5,random_state=0)

# Scale the Data

In [13]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

# Fitting & Evaluating the Model

In [14]:
nn_clf = MLPClassifier(solver='adam', alpha=1e-3,
                     hidden_layer_sizes=(5, 2), random_state=1,max_iter=500)

nn_clf.fit(X_train, y_train)

score = nn_clf.score(X_test, y_test)

print(score)

1.0


In [15]:
results = nn_clf.predict(X_test)

In [16]:
# Precision_Recall_F-Score
p_r_f = precision_recall_fscore_support(y_test, results, average='macro')
p_r_f

(1.0, 1.0, 1.0, None)

In [17]:
test_acc = accuracy_score(y_test, results)
test_acc

1.0

In [18]:
cv_dict = cross_validate(nn_clf, X, y, return_train_score=True)
cv_dict

{'fit_time': array([1.40206575, 1.51199889, 1.65700078, 1.47799993, 1.99299979]),
 'score_time': array([0.0019989 , 0.0020349 , 0.00200295, 0.00200105, 0.00100088]),
 'test_score': array([1., 1., 1., 1., 1.]),
 'train_score': array([1., 1., 1., 1., 1.])}

In [19]:
# Confusion Matrix

cv_score = cross_val_score(nn_clf, X_train, y_train, cv=3, scoring='accuracy')
print('Accuracy:',cv_score, '\n')

y_train_pred = cross_val_predict(nn_clf, X_train, y_train, cv=3)
print(confusion_matrix(y_train, y_train_pred))



Accuracy: [1.         0.99766355 0.99766355] 





[[586   1   0]
 [  0 394   1]
 [  0   0 302]]




# Evaluating the Model

In [20]:
nn_clf.classes_

array([0, 1, 2], dtype=int64)

In [21]:
nn_clf.loss_

0.017720121022525716

In [22]:
nn_clf.best_loss_

0.017720121022525716

In [23]:
nn_clf.loss_curve_

[1.4343579466087308,
 1.4179859631072311,
 1.400438074634753,
 1.3827040774811414,
 1.3633333757614563,
 1.3428330577915628,
 1.321610768857512,
 1.2993307833035244,
 1.2758962705423083,
 1.2526235284897858,
 1.2292182735266974,
 1.206019798940905,
 1.1822760320898824,
 1.1596437307634273,
 1.1369791560874711,
 1.1150016637289397,
 1.0944484293499557,
 1.0744049128779598,
 1.0545376427496675,
 1.0361554518858755,
 1.0178203810040882,
 1.0005579516472403,
 0.9843018329412062,
 0.9692043413653295,
 0.9547239921826473,
 0.9408630222364794,
 0.9275623344771496,
 0.914812267771263,
 0.9029640993475472,
 0.8908532474165625,
 0.878743586970066,
 0.8664649133420286,
 0.8549892528894637,
 0.8428002851574717,
 0.8319400465729254,
 0.8206288723223978,
 0.8103871798349702,
 0.8009660425733247,
 0.7915694030127332,
 0.7830482640662635,
 0.7743361123347473,
 0.7664228141632119,
 0.758625737744606,
 0.7510250785003418,
 0.7439260234377096,
 0.7372789660530313,
 0.730802910155639,
 0.7242299563185451,

In [24]:
str(nn_clf.t_) + " training instances seen during fitting"

'589356 training instances seen during fitting'

In [25]:
str(nn_clf.n_iter_) + "  iterations"

'459  iterations'

In [26]:
str(nn_clf.n_layers_) + " layers"

'4 layers'

In [27]:
str(nn_clf.n_outputs_) + " outputs"

'3 outputs'

In [28]:
"Output Activation: " + str(nn_clf.out_activation_)

'Output Activation: softmax'

In [29]:
nn_clf.get_params(nn_clf)

{'activation': 'relu',
 'alpha': 0.001,
 'batch_size': 'auto',
 'beta_1': 0.9,
 'beta_2': 0.999,
 'early_stopping': False,
 'epsilon': 1e-08,
 'hidden_layer_sizes': (5, 2),
 'learning_rate': 'constant',
 'learning_rate_init': 0.001,
 'max_fun': 15000,
 'max_iter': 500,
 'momentum': 0.9,
 'n_iter_no_change': 10,
 'nesterovs_momentum': True,
 'power_t': 0.5,
 'random_state': 1,
 'shuffle': True,
 'solver': 'adam',
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': False,
 'warm_start': False}

In [30]:
nn_clf.coefs_

[array([[-6.53206485e-02, -1.68373067e-02, -3.50208073e-03,
         -1.82561190e-02, -2.08281952e-02],
        [ 4.59039243e-02, -8.10120174e-03,  1.37616877e-03,
         -5.98283569e-04, -1.50434816e-02],
        [-7.05373811e-02,  3.61230782e-03,  1.71137995e-02,
         -9.32769583e-03, -1.17250678e-02],
        [ 7.50707096e-01,  8.64651143e-01, -8.36582061e-01,
         -1.38690937e+00, -1.23775878e+00],
        [-6.29245501e-01, -6.77240440e-01,  5.47564707e-01,
          1.40649944e+00,  1.20528243e+00],
        [ 5.64478330e-01, -1.23247020e-01, -1.81266101e-01,
          6.74669344e-02, -4.98940015e-02],
        [-6.52215050e-01, -6.63274767e-02,  3.51380069e-01,
         -2.54680482e-01, -1.69544248e-02],
        [-1.67417164e-01,  1.98901033e-01,  2.95230264e-01,
         -5.87393536e-02,  6.73206735e-02],
        [ 8.30234065e-02, -1.21966822e-02,  9.38871061e-03,
          5.46158354e-03, -1.47320521e-02],
        [-7.60505358e-02,  1.95925713e-03, -1.86996300e-02,
    

In [31]:
len(nn_clf.coefs_)

3

In [32]:
len(nn_clf.coefs_[0])

22

In [33]:
len(nn_clf.coefs_[1])

5

In [34]:
len(nn_clf.coefs_[2])

2