## MultiLayer Perceptron

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


df = pd.read_csv('../../FeatureEngineering/MetaData/data6_&_odds.csv')
df.dropna(inplace=True)

In [20]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

import time

import warnings
warnings.filterwarnings('ignore')


In [4]:
train_data = df.loc[(df.season < 2016) & (df.season >= 2007)]
test_data = df.loc[df.season >= 2016]

X, y = train_data.drop(columns=['home_team_wins']), train_data.home_team_wins
test_X, test_y = test_data.drop(columns=['home_team_wins']), test_data.home_team_wins

# Split our data
X_train, y_train = train_data.drop(columns=["game_date_est","season","game_id","home_team","visitor_team","home_team_id","visitor_team_id","home_team_wins","conference","conference_visitor"]), train_data.home_team_wins
X_test, y_test = test_data.drop(columns=["game_date_est","season","game_id","home_team","visitor_team","home_team_id","visitor_team_id","home_team_wins","conference","conference_visitor"]), test_data.home_team_wins



In [5]:
def evaluate(model, X_train, X_test, y_train, y_test):
    y_test_pred = model.predict(X_test)
    y_train_pred = model.predict(X_train)

    print("Training Results: \n===============================")
    clf_report = classification_report(y_train, y_train_pred)
    print(f"Confusion Matrix:\n{confusion_matrix(y_train, y_train_pred)}")
    print(f"Accuracy Score:\n{accuracy_score(y_train, y_train_pred):.4f}")
    print(f"Classification Report:\n{clf_report}")

    print("Testing Results: \n===============================")
    clf_report = classification_report(y_test, y_test_pred)
    print(f"Confusion Matrix:\n{confusion_matrix(y_test, y_test_pred)}")
    print(f"Accuracy Score:\n{accuracy_score(y_test, y_test_pred):.4f}")
    print(f"Classification Report:\n{clf_report}")

### MLP with GridSearchCV

In [5]:
# Multilayer Perceptron
model = MLPClassifier(alpha=1e-05, hidden_layer_sizes=(20, 10, 5), max_iter=150, solver='lbfgs')
model.fit(X_train, y_train)

start_time = time.time()
model.fit(X_train, y_train)

preds = model.predict(X_test)
test_score = model.score(X_test, y_test)

target_names = ['home_loss', 'home_win']

print("Συνολικός χρόνος fit και predict: %s seconds" % (time.time() - start_time))
print(classification_report(y_test, preds, target_names=target_names))

y_fit = model.predict(X_test)
print("test score", test_score)


Συνολικός χρόνος fit και predict: 5.904833793640137 seconds
              precision    recall  f1-score   support

   home_loss       0.60      0.54      0.57      1935
    home_win       0.69      0.74      0.71      2648

    accuracy                           0.66      4583
   macro avg       0.65      0.64      0.64      4583
weighted avg       0.65      0.66      0.65      4583

test score 0.6561204451232817


### MLP Classifier with Univariate Feature Selection

In [6]:
X_train_uni = train_data[['diff_curr_win_pct','diff_curr_away_record','odds_home','odds_away','elo_diff']]
y_train_uni = y_train

X_test_uni = test_data[['diff_curr_win_pct','diff_curr_away_record','odds_home','odds_away','elo_diff']]
y_test_uni = y_test

In [7]:
# Multilayer Perceptron
model = MLPClassifier(alpha=1e-05, hidden_layer_sizes=(20, 10, 5), max_iter=250, solver='lbfgs')
model.fit(X_train_uni, y_train_uni)

MLPClassifier(alpha=1e-05, hidden_layer_sizes=(20, 10, 5), max_iter=250,
              solver='lbfgs')

In [8]:
start_time = time.time()
model.fit(X_train_uni, y_train_uni)

preds = model.predict(X_test_uni)
test_score = model.score(X_test_uni, y_test_uni)

target_names = ['home_loss', 'home_win']

print("Συνολικός χρόνος fit και predict: %s seconds" % (time.time() - start_time))
print(classification_report(y_test_uni, preds, target_names=target_names))

y_fit = model.predict(X_test_uni)

print("test score", test_score)


Συνολικός χρόνος fit και predict: 3.868842124938965 seconds
              precision    recall  f1-score   support

   home_loss       0.63      0.50      0.56      1935
    home_win       0.68      0.78      0.73      2648

    accuracy                           0.66      4583
   macro avg       0.66      0.64      0.64      4583
weighted avg       0.66      0.66      0.66      4583

test score 0.6639755618590443


### MLP Classifier with SelectFromModel(LassoCV)

In [7]:
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import LassoCV
from sklearn.pipeline import Pipeline

In [8]:
pipe_model = Pipeline([
  ('feature_selection', SelectFromModel(LassoCV())),
  ('classification', MLPClassifier(alpha=1e-05, hidden_layer_sizes=(20, 10, 5), max_iter=150, solver='lbfgs'))
])
pipe_model.fit(X_train, y_train)

Pipeline(steps=[('feature_selection', SelectFromModel(estimator=LassoCV())),
                ('classification',
                 MLPClassifier(alpha=1e-05, hidden_layer_sizes=(20, 10, 5),
                               max_iter=150, solver='lbfgs'))])

In [9]:
# Predict
preds = pipe_model.predict(X_test)
test_score = pipe_model.score(X_test, y_test)

target_names=['home loss', 'home win']

print("Συνολικός χρόνος fit και predict: %s seconds" % (time.time() - start_time))
print(classification_report(y_test, preds, target_names=target_names))

print("test score", test_score)


Συνολικός χρόνος fit και predict: 117.09415102005005 seconds
              precision    recall  f1-score   support

   home loss       0.63      0.50      0.56      1935
    home win       0.68      0.79      0.73      2648

    accuracy                           0.66      4583
   macro avg       0.66      0.64      0.64      4583
weighted avg       0.66      0.66      0.66      4583

test score 0.6646301549203578


### MLP with ExtraTreesClassifier

In [9]:
X_train_extra = train_data[['odds_home', 'odds_away', 'home_elo', 'visitor_elo', 'elo_diff',
                    'eff_diff', 'eff_visitor', 'top_player_diff', 'diff_win_pct_prev_season',
                    'diff_home_record_last_season', 'ROAD_RECORD_home',
                    'diff_road_record_last_season', 'diff_win_pct_7_last_games', 'W_PCT_home',
                    'W_PCT_away', 'W_PCT_prev_away', 'diff_curr_away_record', 'HOME_RECORD_home', 'diff_curr_home_record',
                    'diff_curr_win_pct']]

y_train_extra = y_train


X_test_extra = test_data[['odds_home', 'odds_away', 'home_elo', 'visitor_elo', 'elo_diff',
                    'eff_diff', 'eff_visitor', 'top_player_diff', 'diff_win_pct_prev_season',
                    'diff_home_record_last_season', 'ROAD_RECORD_home',
                    'diff_road_record_last_season', 'diff_win_pct_7_last_games', 'W_PCT_home',
                    'W_PCT_away', 'W_PCT_prev_away', 'diff_curr_away_record', 'HOME_RECORD_home', 'diff_curr_home_record',
                    'diff_curr_win_pct']]

y_test_extra = y_test

In [10]:
# Multilayer Perceptron
model = MLPClassifier(alpha=1e-05, hidden_layer_sizes=(20, 10, 5), max_iter=200, solver='lbfgs')
model.fit(X_train_extra, y_train_extra)


MLPClassifier(alpha=1e-05, hidden_layer_sizes=(20, 10, 5), solver='lbfgs')

In [11]:
start_time = time.time()
model.fit(X_train_extra, y_train_extra)

preds = model.predict(X_test_extra)
test_score = model.score(X_test_extra, y_test_extra)

target_names = ['home_loss', 'home_win']

print("Συνολικός χρόνος fit και predict: %s seconds" % (time.time() - start_time))
print(classification_report(y_test_extra, preds, target_names=target_names))

# y_fit = model.predict(X_test_extra)

print("test score", test_score)


Συνολικός χρόνος fit και predict: 3.268824338912964 seconds
              precision    recall  f1-score   support

   home_loss       0.63      0.49      0.55      1935
    home_win       0.68      0.79      0.73      2648

    accuracy                           0.66      4583
   macro avg       0.66      0.64      0.64      4583
weighted avg       0.66      0.66      0.66      4583

test score 0.6639755618590443


### Reduced Extra Trees dataset

In [13]:
X_train_extra = train_data[['odds_home', 'odds_away', 'home_elo', 'visitor_elo',
                    'eff_diff', 'eff_visitor', 'top_player_diff', 'diff_win_pct_prev_season',
                    'diff_win_pct_7_last_games', 'W_PCT_home',
                    'W_PCT_away']]
y_train_extra = y_train

X_test_extra = test_data[['odds_home', 'odds_away', 'home_elo', 'visitor_elo',
                    'eff_diff', 'eff_visitor', 'top_player_diff', 'diff_win_pct_prev_season',
                    'diff_win_pct_7_last_games', 'W_PCT_home',
                    'W_PCT_away']]
y_test_extra = y_test

In [14]:
from sklearn.preprocessing import StandardScaler

# Scaling features
scaler = StandardScaler()
X_train_standard = scaler.fit_transform(X_train_extra)
X_test_standard = scaler.transform(X_test_extra)

In [15]:
model = MLPClassifier(alpha=1e-05, hidden_layer_sizes=(20, 10, 5), max_iter=100, solver='sgd')
model.fit(X_train_standard, y_train_extra)


MLPClassifier(alpha=1e-05, hidden_layer_sizes=(20, 10, 5), max_iter=100,
              solver='sgd')

In [16]:
start_time = time.time()

preds = model.predict(X_test_standard)
test_score = model.score(X_test_standard, y_test_extra)

target_names = ['home_loss', 'home_win']

print("Συνολικός χρόνος fit και predict: %s seconds" % (time.time() - start_time))
print(classification_report(y_test_extra, preds, target_names=target_names))
print("test score", test_score)


Συνολικός χρόνος fit και predict: 0.008679389953613281 seconds
              precision    recall  f1-score   support

   home_loss       0.64      0.51      0.57      1935
    home_win       0.69      0.79      0.74      2648

    accuracy                           0.67      4583
   macro avg       0.66      0.65      0.65      4583
weighted avg       0.67      0.67      0.67      4583

test score 0.672703469343225


In [21]:
evaluate(model, X_train_standard, X_test_standard, y_train, y_test)

Training Results: 
Confusion Matrix:
[[1971 1824]
 [1027 4498]]
Accuracy Score:
0.6941
Classification Report:
              precision    recall  f1-score   support

           0       0.66      0.52      0.58      3795
           1       0.71      0.81      0.76      5525

    accuracy                           0.69      9320
   macro avg       0.68      0.67      0.67      9320
weighted avg       0.69      0.69      0.69      9320

Testing Results: 
Confusion Matrix:
[[ 991  944]
 [ 556 2092]]
Accuracy Score:
0.6727
Classification Report:
              precision    recall  f1-score   support

           0       0.64      0.51      0.57      1935
           1       0.69      0.79      0.74      2648

    accuracy                           0.67      4583
   macro avg       0.66      0.65      0.65      4583
weighted avg       0.67      0.67      0.67      4583



### Backwards

In [10]:
X_train_back_sfs = train_data[['num_possible_outcomes', 'odds_home', 'odds_away', 'HOME_RECORD_home',
                     'W_PCT_away', 'W_PCT_prev_away', 'HOME_RECORD_prev_away',
                     'ROAD_RECORD_prev_away', 'FT_PCT_home_3g', 'FG3_PCT_home_3g', 'PTS_away_3g',
                     'FG_PCT_away_3g', 'FT_PCT_away_3g', 'FG3_PCT_away_3g', 'PTS_home_7g',
                     'FG_PCT_home_7g', 'AST_home_7g', 'AST_away_7g', 'REB_away_7g',
                     'diff_avg_pts_away', 'diff_avg_ast_home', 'diff_avg_ast_away',
                     'diff_avg_fg3_pct_home', 'top_players', 'top_players_visitor', 'eff_visitor',
                     'G_7days', 'back2back', 'HG_7days_VISITOR', 'AG_7days_VISITOR',
                     'G_7days_VISITOR', 'back2back_visitor', 'home_elo', 'elo_diff',
                     'missing_player_diff', 'eff_diff', 'Home_Last_5_Avg_AST_home',
                     'Home_Last_5_Avg_REB_home', 'Home_Last_5_Avg_REB_away',
                     'Home_Last_5_Avg_FG3_PCT_away', 'Away_Last_5_Avg_PTS_home',
                     'Away_Last_5_Avg_FG3_PCT_home', 'Away_Last_5_Avg_AST_home',
                     'Away_Last_5_Avg_FT_PCT_away', 'diff_fg3_pct_last_3_games',
                     'diff_fg3_pct_last_7_games', 'diff_ft_pct_last_3_games',
                     'diff_ast_last_7_games', 'diff_reb_last_3_games',
                     'diff_win_pct_3_last_games']]

y_train_back_sfs = y_train


X_test_back_sfs = test_data[['num_possible_outcomes', 'odds_home', 'odds_away', 'HOME_RECORD_home',
                     'W_PCT_away', 'W_PCT_prev_away', 'HOME_RECORD_prev_away',
                     'ROAD_RECORD_prev_away', 'FT_PCT_home_3g', 'FG3_PCT_home_3g', 'PTS_away_3g',
                     'FG_PCT_away_3g', 'FT_PCT_away_3g', 'FG3_PCT_away_3g', 'PTS_home_7g',
                     'FG_PCT_home_7g', 'AST_home_7g', 'AST_away_7g', 'REB_away_7g',
                     'diff_avg_pts_away', 'diff_avg_ast_home', 'diff_avg_ast_away',
                     'diff_avg_fg3_pct_home', 'top_players', 'top_players_visitor', 'eff_visitor',
                     'G_7days', 'back2back', 'HG_7days_VISITOR', 'AG_7days_VISITOR',
                     'G_7days_VISITOR', 'back2back_visitor', 'home_elo', 'elo_diff',
                     'missing_player_diff', 'eff_diff', 'Home_Last_5_Avg_AST_home',
                     'Home_Last_5_Avg_REB_home', 'Home_Last_5_Avg_REB_away',
                     'Home_Last_5_Avg_FG3_PCT_away', 'Away_Last_5_Avg_PTS_home',
                     'Away_Last_5_Avg_FG3_PCT_home', 'Away_Last_5_Avg_AST_home',
                     'Away_Last_5_Avg_FT_PCT_away', 'diff_fg3_pct_last_3_games',
                     'diff_fg3_pct_last_7_games', 'diff_ft_pct_last_3_games',
                     'diff_ast_last_7_games', 'diff_reb_last_3_games',
                     'diff_win_pct_3_last_games']]

y_test_back_sfs = y_test

In [15]:
# Multilayer Perceptron
model = MLPClassifier()
model.fit(X_train_back_sfs, y_train_back_sfs)

# defining parameter range
param_grid = {'max_iter': list(range(100, 300, 50)),
              # 'activation':['identity', 'logistic', 'tanh', 'relu'],
              'hidden_layer_sizes': [(20, 10, 5)],
              'solver': ['lbfgs', 'sgd', 'adam'],
              'alpha': [1e-05],
              }


grid = GridSearchCV(model, param_grid, scoring='accuracy', n_jobs=-1)


In [17]:
start_time = time.time()
grid.fit(X_train_back_sfs, y_train_back_sfs)

preds = grid.predict(X_test_back_sfs)
test_score = grid.score(X_test_back_sfs, y_test_back_sfs)

target_names = ['home loss', 'home win']

print("Συνολικός χρόνος fit και predict: %s seconds" % (time.time() - start_time))
print(classification_report(y_test_back_sfs, preds, target_names=target_names))

model = grid.best_estimator_
y_fit = model.predict(X_test_back_sfs)

print(grid.best_params_)
print("best score:", grid.best_score_)
print("test score", test_score)


Συνολικός χρόνος fit και predict: 32.0255286693573 seconds
              precision    recall  f1-score   support

   home loss       0.68      0.37      0.48      1935
    home win       0.65      0.87      0.75      2648

    accuracy                           0.66      4583
   macro avg       0.67      0.62      0.61      4583
weighted avg       0.66      0.66      0.63      4583

val score: 0.6826446280991736
{'alpha': 1e-05, 'hidden_layer_sizes': (20, 10, 5), 'max_iter': 200, 'solver': 'adam'}
best score: 0.6755072463768116
test score 0.6598298058040585
