## SVM Classifier

In [10]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, StratifiedKFold
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import time

%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats

# use seaborn plotting defaults
import seaborn as sns; sns.set()

import warnings


In [2]:
warnings.filterwarnings('ignore')


In [3]:
df = pd.read_csv('../../FeatureEngineering/MetaData/data6_&_odds.csv')
df.dropna(inplace=True)

In [4]:
train_data = df.loc[(df.season < 2016) & (df.season >= 2007)]
test_data = df.loc[df.season >= 2016]

X, y = train_data.drop(columns=['home_team_wins']), train_data.home_team_wins
test_X, test_y = test_data.drop(columns=['home_team_wins']), test_data.home_team_wins

# Split our data
X_train, y_train = train_data.drop(columns=["game_date_est","season","game_id","home_team","visitor_team","home_team_id","visitor_team_id","home_team_wins","conference","conference_visitor"]), train_data.home_team_wins
X_test, y_test = test_data.drop(columns=["game_date_est","season","game_id","home_team","visitor_team","home_team_id","visitor_team_id","home_team_wins","conference","conference_visitor"]), test_data.home_team_wins



In [5]:
def evaluate(model, X_train, X_test, y_train, y_test):
    y_test_pred = model.predict(X_test)
    y_train_pred = model.predict(X_train)

    print("Training Results: \n===============================")
    clf_report = classification_report(y_train, y_train_pred)
    print(f"Confusion Matrix:\n{confusion_matrix(y_train, y_train_pred)}")
    print(f"Accuracy Score:\n{accuracy_score(y_train, y_train_pred):.4f}")
    print(f"Classification Report:\n{clf_report}")

    print("Testing Results: \n===============================")
    clf_report = classification_report(y_test, y_test_pred)
    print(f"Confusion Matrix:\n{confusion_matrix(y_test, y_test_pred)}")
    print(f"Accuracy Score:\n{accuracy_score(y_test, y_test_pred):.4f}")
    print(f"Classification Report:\n{clf_report}")

### GridSearchCV

In [6]:
# Support vector classifier
model = SVC(C=1, gamma=0.0001, kernel='rbf')
model.fit(X_train, y_train)


start_time = time.time()
model.fit(X_train, y_train)

SVC(C=1, gamma=0.0001)

In [7]:
preds = model.predict(X_test)
test_score = model.score(X_test, y_test)

target_names = ['home_loss', 'home_win']


print("Συνολικός χρόνος fit και predict: %s seconds" % (time.time() - start_time))
print(classification_report(y_test, preds, target_names=target_names))
print("test score", test_score)


Συνολικός χρόνος fit και predict: 85.15830206871033 seconds
              precision    recall  f1-score   support

   home_loss       0.50      0.00      0.01      1935
    home_win       0.58      1.00      0.73      2648

    accuracy                           0.58      4583
   macro avg       0.54      0.50      0.37      4583
weighted avg       0.55      0.58      0.43      4583

test score 0.5777874754527602


### SVM with RFECV

In [6]:
X_train_rcv = train_data[['missing_player_diff','odds_home','odds_away','visitor_elo']]
y_train_rcv = y_train

X_test_rcv = test_data[['missing_player_diff','odds_home','odds_away','visitor_elo']]
y_test_rcv = y_test

In [7]:
# Support vector classifier
model = SVC(C=10, gamma=0.01, kernel='rbf')
model.fit(X_train_rcv, y_train_rcv)

SVC(C=10, gamma=0.01)

In [8]:
start_time = time.time()
model.fit(X_train_rcv, y_train_rcv)

preds = model.predict(X_test_rcv)
test_score = model.score(X_test_rcv, y_test_rcv)
target_names = ['home_loss', 'home_win']


print("Συνολικός χρόνος fit και predict: %s seconds" % (time.time() - start_time))
print(classification_report(y_test_rcv, preds, target_names=target_names))
print("test score", test_score)


Συνολικός χρόνος fit και predict: 14.251168251037598 seconds
              precision    recall  f1-score   support

   home_loss       0.64      0.46      0.54      1935
    home_win       0.67      0.81      0.74      2648

    accuracy                           0.66      4583
   macro avg       0.66      0.64      0.64      4583
weighted avg       0.66      0.66      0.65      4583

test score 0.6637573641719398


In [11]:
evaluate(model, X_train_rcv, X_test_rcv, y_train_rcv, y_test_rcv)

Training Results: 
Confusion Matrix:
[[1878 1917]
 [ 865 4660]]
Accuracy Score:
0.7015
Classification Report:
              precision    recall  f1-score   support

           0       0.68      0.49      0.57      3795
           1       0.71      0.84      0.77      5525

    accuracy                           0.70      9320
   macro avg       0.70      0.67      0.67      9320
weighted avg       0.70      0.70      0.69      9320

Testing Results: 
Confusion Matrix:
[[ 896 1039]
 [ 502 2146]]
Accuracy Score:
0.6638
Classification Report:
              precision    recall  f1-score   support

           0       0.64      0.46      0.54      1935
           1       0.67      0.81      0.74      2648

    accuracy                           0.66      4583
   macro avg       0.66      0.64      0.64      4583
weighted avg       0.66      0.66      0.65      4583



### SVM with Univariate Feature Selection

In [13]:
X_train_uni = train_data[['diff_curr_win_pct','diff_curr_away_record','odds_home','odds_away','elo_diff']]
y_train_uni = y_train


X_test_uni = test_data[['diff_curr_win_pct','diff_curr_away_record','odds_home','odds_away','elo_diff']]
y_test_uni = y_test


In [14]:
svc = SVC(C=1000, gamma=0.0001, kernel='rbf')
svc.fit(X_train_uni, y_train_uni)

SVC(C=1000, gamma=0.0001)

In [16]:
start_time = time.time()

preds = svc.predict(X_test_uni)
test_score = svc.score(X_test_uni, y_test_uni)

target_names = ['home_loss', 'home_win']


print("Συνολικός χρόνος fit και predict: %s seconds" % (time.time() - start_time))
print(classification_report(y_test_uni, preds, target_names=target_names))
print("test score", test_score)


Συνολικός χρόνος fit και predict: 6.803206443786621 seconds
              precision    recall  f1-score   support

   home_loss       0.63      0.50      0.55      1935
    home_win       0.68      0.79      0.73      2648

    accuracy                           0.66      4583
   macro avg       0.66      0.64      0.64      4583
weighted avg       0.66      0.66      0.66      4583

test score 0.6639755618590443


### SVM with SelectFromModel(LassoCV)

In [20]:
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import LassoCV
from sklearn.pipeline import Pipeline

In [21]:
pipe_model = Pipeline([
  ('feature_selection', SelectFromModel(LassoCV())),
  ('classification', SVC(C=1, gamma=0.0001, kernel='rbf'))
])
pipe_model.fit(X_train, y_train)

Pipeline(steps=[('feature_selection', SelectFromModel(estimator=LassoCV())),
                ('classification', SVC(C=1, gamma=0.0001))])

In [22]:
# Predict
preds = pipe_model.predict(X_test)
test_score = pipe_model.score(X_test, y_test)

target_names=['home loss', 'home win']

print("Συνολικός χρόνος fit και predict: %s seconds" % (time.time() - start_time))
print(classification_report(y_test, preds, target_names=target_names))

print("test score", test_score)


Συνολικός χρόνος fit και predict: 558.7029147148132 seconds
              precision    recall  f1-score   support

   home loss       0.61      0.46      0.53      1935
    home win       0.67      0.79      0.72      2648

    accuracy                           0.65      4583
   macro avg       0.64      0.63      0.63      4583
weighted avg       0.65      0.65      0.64      4583

test score 0.6506655029456688


#### Use lasso_data

In [17]:
X_train_lasso = train_data[['elo_diff', 'odds_away', 'odds_home', 'eff_diff', 'eff_visitor',
              'missing_players', 'top_players', 'FT_PCT_home_7g', 'diff_avg_reb_away',
              'home_elo']]
y_train_lasso = y_train


X_test_lasso = test_data[['elo_diff', 'odds_away', 'odds_home', 'eff_diff', 'eff_visitor',
              'missing_players', 'top_players', 'FT_PCT_home_7g', 'diff_avg_reb_away',
              'home_elo']]
y_test_lasso = y_test

In [18]:
svc = SVC(C=1, gamma=0.0001, kernel='rbf')
svc.fit(X_train_lasso, y_train_lasso)

SVC(C=1, gamma=0.0001)

In [19]:
start_time = time.time()

preds = svc.predict(X_test_lasso)
test_score = svc.score(X_test_lasso, y_test_lasso)

target_names = ['home loss', 'home win']

print("Συνολικός χρόνος fit και predict: %s seconds" % (time.time() - start_time))
print(classification_report(y_test_lasso, preds, target_names=target_names))
print("test score", test_score)

Συνολικός χρόνος fit και predict: 7.458824157714844 seconds
              precision    recall  f1-score   support

   home loss       0.62      0.49      0.55      1935
    home win       0.68      0.78      0.72      2648

    accuracy                           0.66      4583
   macro avg       0.65      0.63      0.63      4583
weighted avg       0.65      0.66      0.65      4583

test score 0.6556840497490727
