# Imports

In [90]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math

from sklearn.neural_network import MLPClassifier

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import plot_roc_curve
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score

# Loads Data

In [11]:
df = pd.read_csv(r"League_Result_Data/Encoded_PremierLeague_Stats_From_2014to2021.csv")
df.dropna()
df

Unnamed: 0.1,Unnamed: 0,Season,Season Encoding,Date,YearOfSeason,HomeTeam,HomeTeam Encoding,AwayTeam,AwayTeam Encoding,FTHG,...,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR
0,0,Fall,0,12/09/2020,2020/21,Fulham,8,Arsenal,0,0.0,...,2.0,6.0,12.0,12.0,2.0,3.0,2.0,2.0,0.0,0.0
1,1,Fall,0,12/09/2020,2020/21,Crystal Palace,6,Southampton,21,1.0,...,3.0,5.0,14.0,11.0,7.0,3.0,2.0,1.0,0.0,0.0
2,2,Fall,0,12/09/2020,2020/21,Liverpool,13,Leeds,11,4.0,...,6.0,3.0,9.0,6.0,9.0,0.0,1.0,0.0,0.0,0.0
3,3,Fall,0,12/09/2020,2020/21,West Ham,28,Newcastle,17,0.0,...,3.0,2.0,13.0,7.0,8.0,7.0,2.0,2.0,0.0,0.0
4,4,Fall,0,13/09/2020,2020/21,West Brom,27,Leicester,12,0.0,...,1.0,7.0,12.0,9.0,2.0,5.0,1.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2564,2564,Spring,2,24/05/15,2014/15,Everton,7,Tottenham,25,0.0,...,1.0,3.0,12.0,8.0,3.0,5.0,1.0,2.0,0.0,0.0
2565,2565,Spring,2,24/05/15,2014/15,Hull,10,Man United,15,0.0,...,6.0,1.0,12.0,15.0,8.0,1.0,2.0,2.0,0.0,1.0
2566,2566,Spring,2,24/05/15,2014/15,Leicester,12,QPR,19,5.0,...,7.0,2.0,7.0,6.0,5.0,6.0,0.0,0.0,0.0,0.0
2567,2567,Spring,2,24/05/15,2014/15,Man City,14,Southampton,21,2.0,...,6.0,4.0,13.0,8.0,8.0,4.0,1.0,1.0,0.0,0.0


## # Encodes the YearOfSeason values

In [42]:
df = df.dropna()

In [56]:
seasons = set(df['YearOfSeason'].values)
seasons = list(seasons)
seasons

['2020/21', '2014/15', '2015/16', '2019/20', '2018/19', '2016/17', '2017/18']

In [58]:
# re-orders the list 
seasons = ['2014/15', '2015/16', '2016/17', '2017/18', '2018/19', '2019/20', '2020/21']
seasons

['2014/15', '2015/16', '2016/17', '2017/18', '2018/19', '2019/20', '2020/21']

In [62]:
df['YearOfSeason Encoding'] = -9

for index, value in df['YearOfSeason'].iteritems():
    
    # 2014/15: 0
    if value == seasons[0]:
        df.at[index, 'YearOfSeason Encoding'] = 0
    
    # 2015/16: 1
    if value == seasons[1]:
        df.at[index, 'YearOfSeason Encoding'] = 1
        
    # 2016/17: 2
    if value == seasons[2]:
        df.at[index, 'YearOfSeason Encoding'] = 2
    
    # 2017/18: 3
    if value == seasons[3]:
        df.at[index, 'YearOfSeason Encoding'] = 3
        
    # 2018/19: 4
    if value == seasons[4]:
        df.at[index, 'YearOfSeason Encoding'] = 4
        
    # 2019/20: 5
    if value == seasons[5]:
        df.at[index, 'YearOfSeason Encoding'] = 5
    
    # 2020/21: 6
    if value == seasons[6]:
        df.at[index, 'YearOfSeason Encoding'] = 6 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [63]:
df

Unnamed: 0.1,Unnamed: 0,Season,Season Encoding,Date,YearOfSeason,HomeTeam,HomeTeam Encoding,AwayTeam,AwayTeam Encoding,FTHG,...,AST,HF,AF,HC,AC,HY,AY,HR,AR,YearOfSeason Encoding
0,0,Fall,0,12/09/2020,2020/21,Fulham,8,Arsenal,0,0.0,...,6.0,12.0,12.0,2.0,3.0,2.0,2.0,0.0,0.0,6
1,1,Fall,0,12/09/2020,2020/21,Crystal Palace,6,Southampton,21,1.0,...,5.0,14.0,11.0,7.0,3.0,2.0,1.0,0.0,0.0,6
2,2,Fall,0,12/09/2020,2020/21,Liverpool,13,Leeds,11,4.0,...,3.0,9.0,6.0,9.0,0.0,1.0,0.0,0.0,0.0,6
3,3,Fall,0,12/09/2020,2020/21,West Ham,28,Newcastle,17,0.0,...,2.0,13.0,7.0,8.0,7.0,2.0,2.0,0.0,0.0,6
4,4,Fall,0,13/09/2020,2020/21,West Brom,27,Leicester,12,0.0,...,7.0,12.0,9.0,2.0,5.0,1.0,1.0,0.0,0.0,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2564,2564,Spring,2,24/05/15,2014/15,Everton,7,Tottenham,25,0.0,...,3.0,12.0,8.0,3.0,5.0,1.0,2.0,0.0,0.0,0
2565,2565,Spring,2,24/05/15,2014/15,Hull,10,Man United,15,0.0,...,1.0,12.0,15.0,8.0,1.0,2.0,2.0,0.0,1.0,0
2566,2566,Spring,2,24/05/15,2014/15,Leicester,12,QPR,19,5.0,...,2.0,7.0,6.0,5.0,6.0,0.0,0.0,0.0,0.0,0
2567,2567,Spring,2,24/05/15,2014/15,Man City,14,Southampton,21,2.0,...,4.0,13.0,8.0,8.0,4.0,1.0,1.0,0.0,0.0,0


In [64]:
# Features -- Drops FTR and any categorical value 
X = df.drop(columns=["Season", "Unnamed: 0", "YearOfSeason", "Date", "HomeTeam", "AwayTeam", "Referee", "FTR", "FTR Encoding", "HTR", "Referee"])
# Labels
y = df["FTR Encoding"]

X

Unnamed: 0,Season Encoding,HomeTeam Encoding,AwayTeam Encoding,FTHG,FTAG,HTHG,HTAG,HTR Encoding,Referee Encoding,Fouls Called Per Game,...,AST,HF,AF,HC,AC,HY,AY,HR,AR,YearOfSeason Encoding
0,0,8,0,0.0,3.0,0.0,1.0,1,7,14.0,...,6.0,12.0,12.0,2.0,3.0,2.0,2.0,0.0,0.0,6
1,0,6,21,1.0,0.0,1.0,0.0,0,27,14.0,...,5.0,14.0,11.0,7.0,3.0,2.0,1.0,0.0,0.0,6
2,0,13,11,4.0,3.0,3.0,2.0,0,28,14.0,...,3.0,9.0,6.0,9.0,0.0,1.0,0.0,0.0,0.0,6
3,0,28,17,0.0,2.0,0.0,0.0,2,11,13.0,...,2.0,13.0,7.0,8.0,7.0,2.0,2.0,0.0,0.0,6
4,0,27,12,0.0,3.0,0.0,0.0,2,10,15.0,...,7.0,12.0,9.0,2.0,5.0,1.0,1.0,0.0,0.0,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2564,2,7,25,0.0,1.0,0.0,1.0,1,27,14.0,...,3.0,12.0,8.0,3.0,5.0,1.0,2.0,0.0,0.0,0
2565,2,10,15,0.0,0.0,0.0,0.0,2,15,13.0,...,1.0,12.0,15.0,8.0,1.0,2.0,2.0,0.0,1.0,0
2566,2,12,19,5.0,1.0,2.0,0.0,0,28,14.0,...,2.0,7.0,6.0,5.0,6.0,0.0,0.0,0.0,0.0,0
2567,2,14,21,2.0,0.0,1.0,0.0,0,2,14.0,...,4.0,13.0,8.0,8.0,4.0,1.0,1.0,0.0,0.0,0


In [65]:
X.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2414 entries, 0 to 2568
Data columns (total 23 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Season Encoding        2414 non-null   int64  
 1   HomeTeam Encoding      2414 non-null   int64  
 2   AwayTeam Encoding      2414 non-null   int64  
 3   FTHG                   2414 non-null   float64
 4   FTAG                   2414 non-null   float64
 5   HTHG                   2414 non-null   float64
 6   HTAG                   2414 non-null   float64
 7   HTR Encoding           2414 non-null   int64  
 8   Referee Encoding       2414 non-null   int64  
 9   Fouls Called Per Game  2414 non-null   float64
 10  HS                     2414 non-null   float64
 11  AS                     2414 non-null   float64
 12  HST                    2414 non-null   float64
 13  AST                    2414 non-null   float64
 14  HF                     2414 non-null   float64
 15  AF  

In [66]:
"""
Target Varible - Full Time Results
0 - Home Team Win
1 - Away Team Win
2 - Draw
"""

y

0       1
1       0
2       0
3       1
4       1
       ..
2564    1
2565    2
2566    0
2567    0
2568    0
Name: FTR Encoding, Length: 2414, dtype: int64

# Split the Data

In [67]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.5,random_state=0)

# Scale the Data

In [68]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

# Fitting & Evaluating the Model

In [69]:
nn_clf = MLPClassifier(solver='adam', alpha=1e-3,
                     hidden_layer_sizes=(5, 2), random_state=1,max_iter=500)

nn_clf.fit(X_train, y_train)

score = nn_clf.score(X_test, y_test)

print(score)

1.0


In [70]:
results = nn_clf.predict(X_test)

In [71]:
# Precision_Recall_F-Score
p_r_f = precision_recall_fscore_support(y_test, results, average='macro')
p_r_f

(1.0, 1.0, 1.0, None)

In [72]:
test_acc = accuracy_score(y_test, results)
test_acc

1.0

In [73]:
cv_dict = cross_validate(nn_clf, X, y, return_train_score=True)
cv_dict



{'fit_time': array([2.48006201, 2.5050261 , 2.5510006 , 2.51102352, 2.46600008]),
 'score_time': array([0.00200272, 0.0029757 , 0.00202417, 0.00200057, 0.00199986]),
 'test_score': array([1., 1., 1., 1., 1.]),
 'train_score': array([1., 1., 1., 1., 1.])}

In [74]:
# Confusion Matrix

cv_score = cross_val_score(nn_clf, X_train, y_train, cv=3, scoring='accuracy')
print('Accuracy:',cv_score, '\n')

y_train_pred = cross_val_predict(nn_clf, X_train, y_train, cv=3)
print(confusion_matrix(y_train, y_train_pred))



Accuracy: [1. 1. 1.] 





[[531   0   0]
 [  0 379   0]
 [  0   0 297]]


# Evaluating the Model

In [75]:
nn_clf.classes_

array([0, 1, 2], dtype=int64)

In [76]:
nn_clf.loss_

0.008395337348143296

In [77]:
nn_clf.best_loss_

0.008395337348143296

In [78]:
nn_clf.loss_curve_

[1.0863709868438052,
 1.0739473933714714,
 1.0631249837035954,
 1.0534235445064,
 1.04394523661179,
 1.0351982333254366,
 1.026718884378879,
 1.0184189430379051,
 1.0110581660019584,
 1.00358572168779,
 0.9964019361443167,
 0.989192781031318,
 0.9821539985270626,
 0.9749229244223906,
 0.9682767087731224,
 0.961568572766745,
 0.9554491381132322,
 0.9490728795529789,
 0.9422338297156032,
 0.9359285771302532,
 0.9294101159272196,
 0.922658241363273,
 0.9160637548234422,
 0.9098729551971095,
 0.9037521540562998,
 0.8974939313179291,
 0.89151525191363,
 0.8852192819938511,
 0.8786955501063675,
 0.8721505401178197,
 0.865656826675827,
 0.8595391518622358,
 0.8532700145401788,
 0.8474368310698644,
 0.8412654117556361,
 0.8350349664886523,
 0.8284311474068008,
 0.8220574359753814,
 0.8155470067632993,
 0.8085943723990008,
 0.8019018344034571,
 0.7950679629938823,
 0.7882057214281922,
 0.781619136929207,
 0.7750541252314611,
 0.7679305357582622,
 0.7614164196807494,
 0.7544337120801138,
 0.7475

In [79]:
str(nn_clf.t_) + " training instances seen during fitting"

'388654 training instances seen during fitting'

In [80]:
str(nn_clf.n_iter_) + "  iterations"

'322  iterations'

In [81]:
str(nn_clf.n_layers_) + " layers"

'4 layers'

In [82]:
str(nn_clf.n_outputs_) + " outputs"

'3 outputs'

In [83]:
"Output Activation: " + str(nn_clf.out_activation_)

'Output Activation: softmax'

In [84]:
nn_clf.get_params(nn_clf)

{'activation': 'relu',
 'alpha': 0.001,
 'batch_size': 'auto',
 'beta_1': 0.9,
 'beta_2': 0.999,
 'early_stopping': False,
 'epsilon': 1e-08,
 'hidden_layer_sizes': (5, 2),
 'learning_rate': 'constant',
 'learning_rate_init': 0.001,
 'max_fun': 15000,
 'max_iter': 500,
 'momentum': 0.9,
 'n_iter_no_change': 10,
 'nesterovs_momentum': True,
 'power_t': 0.5,
 'random_state': 1,
 'shuffle': True,
 'solver': 'adam',
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': False,
 'warm_start': False}

In [85]:
nn_clf.coefs_

[array([[-2.11746406e-02,  1.49346797e-01, -7.90319676e-02,
         -5.01201693e-02, -2.22428996e-01],
        [-1.12483521e-01, -1.01306133e-01, -7.25894546e-03,
         -4.17104731e-02,  6.62760201e-03],
        [ 9.34531088e-02,  5.20489770e-02, -5.92467522e-02,
         -7.46005387e-04, -1.67440244e-01],
        [ 7.76009464e-01,  9.03668056e-02,  1.25967549e+00,
         -1.58342720e+00, -9.07557605e-01],
        [-5.28039040e-01,  2.48117597e-01, -1.34281732e+00,
          1.31355286e+00,  7.84656588e-01],
        [ 1.82990301e-02, -2.89804654e-01, -8.18221168e-02,
         -1.24878080e-01,  5.08653956e-02],
        [-2.56710230e-01, -2.14872614e-01,  1.20254919e-01,
          9.87819719e-02,  1.95879116e-01],
        [-9.56699472e-02,  2.72841368e-02,  9.06005446e-02,
         -5.12231437e-03,  2.01294055e-01],
        [ 1.65212184e-01, -3.15773993e-02, -3.81626560e-02,
          5.15606902e-02, -1.45554331e-01],
        [-8.87651824e-02,  1.70900588e-01, -4.76160875e-02,
    

In [86]:
len(nn_clf.coefs_)

3

In [87]:
len(nn_clf.coefs_[0])

23

In [88]:
len(nn_clf.coefs_[1])

5

In [89]:
len(nn_clf.coefs_[2])

2