In [4]:
#!pip install scikit-optimize --user --quiet

In [26]:
import pandas as pd
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import confusion_matrix, mean_squared_error, make_scorer, mean_absolute_error

import numpy as np

from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

from sklearn.metrics import classification_report

from sklearn.preprocessing import StandardScaler

from sklearn.decomposition import PCA
from sklearn.manifold import LocallyLinearEmbedding

from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor

from sklearn.model_selection import GridSearchCV,train_test_split, cross_val_score, TimeSeriesSplit

from yellowbrick.classifier.rocauc import roc_auc
from yellowbrick.target.feature_correlation import feature_correlation
from yellowbrick.classifier import precision_recall_curve

import warnings

from skopt import BayesSearchCV

pd.set_option('display.precision', 3)

In [27]:
from IPython.display import display, HTML
show_html = lambda html: display(HTML(html))

In [28]:
df = pd.read_csv('../../Data/international_matches_clean.csv')
df.head()

Unnamed: 0,away_team_continent,away_team_fifa_rank,away_team_goalkeeper_score,away_team_mean_defense_score,away_team_mean_midfield_score,away_team_mean_offense_score,away_team_total_fifa_points,home_team_continent,home_team_fifa_rank,home_team_goalkeeper_score,home_team_mean_defense_score,home_team_mean_midfield_score,home_team_mean_offense_score,home_team_total_fifa_points,shoot_out,result
0,0.4,0.349,0.729,0.652,0.707,0.601,0.0,0.4,0.012,0.938,0.842,0.898,0.907,0.0,0.0,0.0
1,0.4,0.032,0.812,0.961,0.977,0.871,0.0,0.4,0.517,0.708,0.536,0.473,0.496,0.0,0.0,0.0
2,0.4,0.397,0.521,0.399,0.374,0.48,0.0,0.4,0.14,0.583,0.664,0.61,0.64,0.0,0.0,3.0
3,0.4,0.212,0.604,0.36,0.612,0.601,0.0,0.4,0.459,0.604,0.315,0.341,0.597,0.0,0.0,-2.0
4,0.4,0.196,0.625,0.622,0.626,0.649,0.0,0.4,0.047,1.0,1.0,0.843,0.982,0.0,0.0,1.0


In [49]:
X = df[df.columns.difference(['result'])]
y = np.array(df[['result']])

In [50]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

# **Random Forest**

In [None]:
param = {'n_estimators': [5, 10, 25, 40, 50, 75, 100, 200], 
         'criterion': ['gini', 'entropy'], 
         'max_depth': [None, 1, 2, 3, 5, 8, 9,10,15],
         'min_samples_leaf': [1,2,3,5,10]}

cv = 5
iter = 15

rf = RandomForestClassifier(random_state=0);
rf_bs = BayesSearchCV(rf, param, n_iter=iter, cv=cv, n_jobs=-1, refit=True, random_state=0);
rf_bs.fit(X_train, y_train.squeeze());

In [53]:
show_html(pd.DataFrame(rf_bs.cv_results_).loc[:,['params', 'mean_test_score','rank_test_score']].sort_values(by='rank_test_score').head().to_html())

Unnamed: 0,params,mean_test_score,rank_test_score
9,"{'criterion': 'entropy', 'max_depth': 5, 'min_samples_leaf': 2, 'n_estimators': 100}",0.291,1
12,"{'criterion': 'entropy', 'max_depth': 5, 'min_samples_leaf': 1, 'n_estimators': 100}",0.291,2
14,"{'criterion': 'entropy', 'max_depth': 5, 'min_samples_leaf': 2, 'n_estimators': 75}",0.288,3
10,"{'criterion': 'entropy', 'max_depth': 5, 'min_samples_leaf': 2, 'n_estimators': 200}",0.285,4
8,"{'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 5, 'n_estimators': 200}",0.285,5


In [56]:
print(classification_report(rf_bs.predict(X_test), y_test.squeeze()))

              precision    recall  f1-score   support

        -5.0       0.00      0.00      0.00         0
        -4.0       0.00      0.00      0.00         0
        -3.0       0.00      0.00      0.00         0
        -2.0       0.00      0.00      0.00         0
        -1.0       0.07      0.26      0.11        58
         0.0       0.63      0.30      0.41       749
         1.0       0.45      0.26      0.33       479
         2.0       0.01      0.25      0.01         4
         3.0       0.01      1.00      0.03         1
         4.0       0.00      0.00      0.00         0
         5.0       0.00      0.00      0.00         0
         6.0       0.00      0.00      0.00         0
         7.0       0.00      0.00      0.00         0

    accuracy                           0.28      1291
   macro avg       0.09      0.16      0.07      1291
weighted avg       0.54      0.28      0.36      1291



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# **SVM con kernel RBF**

In [None]:
param = {'C':10**np.linspace(-3,3,101), 'gamma':['scale','auto']}

niter = 15

rbsvc =  SVC(kernel='rbf', max_iter=25000, random_state=0)
rbsvc_gs = BayesSearchCV(rbsvc,param,n_iter=niter, cv=cv, n_jobs=-1, refit=True, random_state=0)
rbsvc_gs.fit(X_train, y_train.squeeze());

In [58]:
show_html(pd.DataFrame(rbsvc_gs.cv_results_).loc[:,['params', 'mean_test_score','rank_test_score']].sort_values(by='rank_test_score').head().to_html())

Unnamed: 0,params,mean_test_score,rank_test_score
13,"{'C': 125.89254117941661, 'gamma': 'auto'}",0.288,1
7,"{'C': 1.1481536214968828, 'gamma': 'scale'}",0.288,2
2,"{'C': 1.513561248436207, 'gamma': 'auto'}",0.286,3
0,"{'C': 1.513561248436207, 'gamma': 'scale'}",0.286,4
10,"{'C': 331.13112148259074, 'gamma': 'auto'}",0.282,5


In [59]:
print(classification_report(rbsvc_gs.predict(X_test), y_test.squeeze()))

              precision    recall  f1-score   support

        -5.0       0.00      0.00      0.00         0
        -4.0       0.00      0.00      0.00         0
        -3.0       0.00      0.00      0.00         0
        -2.0       0.00      0.00      0.00         0
        -1.0       0.07      0.33      0.11        42
         0.0       0.67      0.29      0.41       819
         1.0       0.43      0.28      0.34       430
         2.0       0.00      0.00      0.00         0
         3.0       0.00      0.00      0.00         0
         4.0       0.00      0.00      0.00         0
         5.0       0.00      0.00      0.00         0
         6.0       0.00      0.00      0.00         0
         7.0       0.00      0.00      0.00         0

    accuracy                           0.29      1291
   macro avg       0.09      0.07      0.07      1291
weighted avg       0.57      0.29      0.37      1291



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# **MLP**

In [57]:
sdscaler = StandardScaler()

X_train_sd = sdscaler.fit_transform(X_train)
X_test_sd = sdscaler.transform(X_test)

In [None]:
mlp = MLPClassifier(max_iter=10000, early_stopping=True, n_iter_no_change=15, random_state=0)
print(np.mean(cross_val_score(mlp,X_train,y_train.squeeze(),cv=10)))

In [None]:
param = {'hidden_layer_sizes':[10, 50, 100, 200], 
         'activation':['relu', 'logistic', 'identity'], 
         'learning_rate_init': [0.001, 0.01, 0.1]  }

mlp =  MLPClassifier(max_iter=10000, early_stopping=True, n_iter_no_change=20,learning_rate='adaptive',random_state=0)
mlp_gs =  GridSearchCV(mlp,param,cv=cv, n_jobs=-1, refit=True)
mlp_gs.fit(X_train, y_train);

In [62]:
show_html(pd.DataFrame(mlp_gs.cv_results_).loc[:,['params', 'mean_test_score','rank_test_score']].sort_values(by='rank_test_score').head().to_html())

Unnamed: 0,params,mean_test_score,rank_test_score
5,"{'activation': 'relu', 'hidden_layer_sizes': 50, 'learning_rate_init': 0.1}",0.297,1
4,"{'activation': 'relu', 'hidden_layer_sizes': 50, 'learning_rate_init': 0.01}",0.292,2
26,"{'activation': 'identity', 'hidden_layer_sizes': 10, 'learning_rate_init': 0.1}",0.291,3
27,"{'activation': 'identity', 'hidden_layer_sizes': 50, 'learning_rate_init': 0.001}",0.29,4
9,"{'activation': 'relu', 'hidden_layer_sizes': 200, 'learning_rate_init': 0.001}",0.289,5
