# Gaussian Naive Bayes Classifier

In [3]:
import pandas as pd
df = pd.read_csv('../FeatureEngineering/MetaData/data6_&_odds.csv')
df.dropna(inplace=True)

In [4]:
import warnings
warnings.filterwarnings('ignore')

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report

In [6]:
train_data = df.loc[(df.season <= 2013) & (df.season >= 2007)]
valid_data = df.loc[(df.season > 2013) & (df.season < 2016)]
test_data = df.loc[df.season >= 2016]
full_train_data = pd.concat([train_data, valid_data], axis=0)

X_train, y_train = train_data.drop(columns=["game_date_est","season","game_id","home_team","visitor_team","home_team_id","visitor_team_id","home_team_wins","conference","conference_visitor"]), train_data.home_team_wins
X_val, y_val = valid_data.drop(columns=["game_date_est","season","game_id","home_team","visitor_team","home_team_id","visitor_team_id","home_team_wins","conference","conference_visitor"]), valid_data.home_team_wins
X_test, y_test = test_data.drop(columns=["game_date_est","season","game_id","home_team","visitor_team","home_team_id","visitor_team_id","home_team_wins","conference","conference_visitor"]), test_data.home_team_wins



### GNB with Low Correlation Data

In [6]:
X_train_cor = train_data[['HG_7days','AG_7days','back2back','HG_7days_VISITOR','AG_7days_VISITOR','back2back_visitor',
'home_elo','visitor_elo','odds_home','odds_away','elo_diff','diff_avg_pts_home','diff_avg_pts_away',
'diff_avg_fg3_pct_away','diff_avg_reb_home']]
y_train_cor = y_train

X_val_cor = valid_data[['HG_7days','AG_7days','back2back','HG_7days_VISITOR','AG_7days_VISITOR','back2back_visitor',
'home_elo','visitor_elo','odds_home','odds_away','elo_diff','diff_avg_pts_home','diff_avg_pts_away',
'diff_avg_fg3_pct_away','diff_avg_reb_home']]
y_val_cor = y_val

win_accuracy = {}

In [7]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
# κάνουμε εκπαίδευση (fit) δηλαδή ουσιαστικά υπολογίζουμε μέση τιμή και διακύμανση για όλα τα χαρακτηριστικά και κλάσεις στο training set
model = gnb.fit(X_train_cor, y_train_cor)
# η GaussianNB έχει builtin μέθοδο υπολογισμό accuracy. Αποθηκεύουμε την τιμή της στον πίνακά μας με τα αποτελέσματα από τα άλλα classifiers
win_accuracy['gaussian naive bayes'] = gnb.score(X_val_cor, y_val_cor)
# και ξανατυπώνουμε τα sorted αποτελέσματα
print("Classification Accuracy on the NBA Games Dataset (40% test set)\n")
sorted_accuracy = [(k, win_accuracy[k]) for k in sorted(win_accuracy, key=win_accuracy.get, reverse=True)]
for k, v in sorted_accuracy:
  print(k,v)

Classification Accuracy on the NBA Games Dataset (40% test set)

gaussian naive bayes 0.6925619834710743


In [8]:
# making predictions on the validation set
y_pred = gnb.predict(X_val_cor)
target_names = ['home_loss', 'home_win']

# comparing actual response values (y_test) with predicted response values (y_pred)
print("Gaussian Naive Bayes model accuracy(in %):", accuracy_score(y_val_cor, y_pred))
print(f"These are the results for the model\n")
print(classification_report(y_val_cor, y_pred, target_names=target_names))

Gaussian Naive Bayes model accuracy(in %): 0.6925619834710743
These are the results for the model

              precision    recall  f1-score   support

   home_loss       0.63      0.65      0.64      1011
    home_win       0.74      0.72      0.73      1409

    accuracy                           0.69      2420
   macro avg       0.68      0.69      0.69      2420
weighted avg       0.69      0.69      0.69      2420



### GNB with Univariate Statistical Tests for Feature Selection

Μετά από εφαρμογή των tests σε ξεχωριστό αρχείο με όνομα 'univariate_selection' καταλήξαμε μέσω grid search και χρήση της SelectKBest στα εξής αποτελέσματα : 5 features with function <mutual_info_classif>

In [9]:
X_train_uni = train_data[['diff_curr_win_pct','diff_curr_away_record','odds_home','odds_away','elo_diff']]
y_train_uni = y_train

X_val_uni = valid_data[['diff_curr_win_pct','diff_curr_away_record','odds_home','odds_away','elo_diff']]
y_val_uni = y_val

win_accuracy = {}

In [10]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
# κάνουμε εκπαίδευση (fit) δηλαδή ουσιαστικά υπολογίζουμε μέση τιμή και διακύμανση για όλα τα χαρακτηριστικά και κλάσεις στο training set
model = gnb.fit(X_train_uni, y_train_uni)
# η GaussianNB έχει builtin μέθοδο υπολογισμό accuracy. Αποθηκεύουμε την τιμή της στον πίνακά μας με τα αποτελέσματα από τα άλλα classifiers
win_accuracy['gaussian naive bayes'] = gnb.score(X_val_uni, y_val_uni)
# και ξανατυπώνουμε τα sorted αποτελέσματα
print("Classification Accuracy on the NBA Games Dataset (40% test set)\n")
sorted_accuracy = [(k, win_accuracy[k]) for k in sorted(win_accuracy, key=win_accuracy.get, reverse=True)]
for k, v in sorted_accuracy:
  print(k,v)

Classification Accuracy on the NBA Games Dataset (40% test set)

gaussian naive bayes 0.8198347107438017


In [11]:
# making predictions on the validation set
y_pred = gnb.predict(X_val_uni)
target_names = ['home_loss', 'home_win']

# comparing actual response values (y_test) with predicted response values (y_pred)
print("Gaussian Naive Bayes model accuracy(in %):", accuracy_score(y_val_uni, y_pred))
print(f"These are the results for the model\n")
print(classification_report(y_val_uni, y_pred, target_names=target_names))

Gaussian Naive Bayes model accuracy(in %): 0.8198347107438017
These are the results for the model

              precision    recall  f1-score   support

   home_loss       0.79      0.78      0.78      1011
    home_win       0.84      0.85      0.85      1409

    accuracy                           0.82      2420
   macro avg       0.82      0.81      0.81      2420
weighted avg       0.82      0.82      0.82      2420



### GNB with ExtraTreesClassifier

#### Scaled data

In [12]:
X_train_extra = train_data[['score_home','score_away','odds_home','odds_away','home_elo','visitor_elo','elo_diff','eff_diff','eff_visitor',
'diff_home_record_last_season','ROAD_RECORD_away','ROAD_RECORD_home','diff_road_record_last_season','diff_win_pct_prev_season','W_PCT_home',
'W_PCT_away','diff_curr_away_record','HOME_RECORD_home','diff_curr_home_record','diff_curr_win_pct']]
y_train_extra = y_train

X_val_extra = valid_data[['score_home','score_away','odds_home','odds_away','home_elo','visitor_elo','elo_diff','eff_diff','eff_visitor',
'diff_home_record_last_season','ROAD_RECORD_away','ROAD_RECORD_home','diff_road_record_last_season','diff_win_pct_prev_season','W_PCT_home',
'W_PCT_away','diff_curr_away_record','HOME_RECORD_home','diff_curr_home_record','diff_curr_win_pct']]
y_val_extra = y_val

win_accuracy = {}

In [13]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
# κάνουμε εκπαίδευση (fit) δηλαδή ουσιαστικά υπολογίζουμε μέση τιμή και διακύμανση για όλα τα χαρακτηριστικά και κλάσεις στο training set
model = gnb.fit(X_train_extra, y_train_extra)
# η GaussianNB έχει builtin μέθοδο υπολογισμό accuracy. Αποθηκεύουμε την τιμή της στον πίνακά μας με τα αποτελέσματα από τα άλλα classifiers
win_accuracy['gaussian naive bayes'] = gnb.score(X_val_extra, y_val_extra)
# και ξανατυπώνουμε τα sorted αποτελέσματα
print("Classification Accuracy on the NBA Games Dataset (40% test set)\n")
sorted_accuracy = [(k, win_accuracy[k]) for k in sorted(win_accuracy, key=win_accuracy.get, reverse=True)]
for k, v in sorted_accuracy:
  print(k,v)

Classification Accuracy on the NBA Games Dataset (40% test set)

gaussian naive bayes 0.7256198347107438


In [14]:
# making predictions on the validation set
y_pred = gnb.predict(X_val_extra)
target_names = ['home_loss', 'home_win']

# comparing actual response values (y_test) with predicted response values (y_pred)
print("Gaussian Naive Bayes model accuracy(in %):", accuracy_score(y_val_extra, y_pred))
print(f"These are the results for the model\n")
print(classification_report(y_val_extra, y_pred, target_names=target_names))

Gaussian Naive Bayes model accuracy(in %): 0.7256198347107438
These are the results for the model

              precision    recall  f1-score   support

   home_loss       0.66      0.72      0.69      1011
    home_win       0.78      0.73      0.76      1409

    accuracy                           0.73      2420
   macro avg       0.72      0.72      0.72      2420
weighted avg       0.73      0.73      0.73      2420



#### Initial data (unchanged)

In [16]:
X_train_extra = train_data[['home_elo','visitor_elo','elo_diff','eff_diff','eff_visitor','diff_win_pct_7_last_games','top_player_diff',
'diff_home_record_last_season','HOME_RECORD_away','ROAD_RECORD_home','diff_road_record_last_season','diff_win_pct_prev_season','W_PCT_home',
'W_PCT_away','diff_curr_away_record','HOME_RECORD_home','diff_curr_home_record','diff_curr_win_pct','WIN_PRCT_away_7g','WIN_PRCT_home_7g']]
y_train_extra = y_train

X_val_extra = valid_data[['home_elo','visitor_elo','elo_diff','eff_diff','eff_visitor','diff_win_pct_7_last_games','top_player_diff',
'diff_home_record_last_season','HOME_RECORD_away','ROAD_RECORD_home','diff_road_record_last_season','diff_win_pct_prev_season','W_PCT_home',
'W_PCT_away','diff_curr_away_record','HOME_RECORD_home','diff_curr_home_record','diff_curr_win_pct','WIN_PRCT_away_7g','WIN_PRCT_home_7g']]
y_val_extra = y_val

win_accuracy = {}

In [17]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
# κάνουμε εκπαίδευση (fit) δηλαδή ουσιαστικά υπολογίζουμε μέση τιμή και διακύμανση για όλα τα χαρακτηριστικά και κλάσεις στο training set
model = gnb.fit(X_train_extra, y_train_extra)
# η GaussianNB έχει builtin μέθοδο υπολογισμό accuracy. Αποθηκεύουμε την τιμή της στον πίνακά μας με τα αποτελέσματα από τα άλλα classifiers
win_accuracy['gaussian naive bayes'] = gnb.score(X_val_extra, y_val_extra)
# και ξανατυπώνουμε τα sorted αποτελέσματα
print("Classification Accuracy on the NBA Games Dataset (40% test set)\n")
sorted_accuracy = [(k, win_accuracy[k]) for k in sorted(win_accuracy, key=win_accuracy.get, reverse=True)]
for k, v in sorted_accuracy:
  print(k,v)

Classification Accuracy on the NBA Games Dataset (40% test set)

gaussian naive bayes 0.6789256198347108


In [18]:
# making predictions on the validation set
y_pred = gnb.predict(X_val_extra)
target_names = ['home_loss', 'home_win']

# comparing actual response values (y_test) with predicted response values (y_pred)
print("Gaussian Naive Bayes model accuracy(in %):", accuracy_score(y_val_extra, y_pred))
print(f"These are the results for the model\n")
print(classification_report(y_val_extra, y_pred, target_names=target_names))

Gaussian Naive Bayes model accuracy(in %): 0.6789256198347108
These are the results for the model

              precision    recall  f1-score   support

   home_loss       0.61      0.65      0.63      1011
    home_win       0.74      0.70      0.72      1409

    accuracy                           0.68      2420
   macro avg       0.67      0.68      0.67      2420
weighted avg       0.68      0.68      0.68      2420



### GNB with PCA

In [1]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [8]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [26]:
# Choose number of components = 30 based on a feature selection coding process we already executed
n_components = 30

pca = PCA(n_components=n_components).fit(X_train)
X_train_pca = pca.transform(X_train)
X_val_pca = pca.transform(X_val)
X_test_pca = pca.transform(X_test)

In [27]:
win_accuracy = {}

from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
# κάνουμε εκπαίδευση (fit) δηλαδή ουσιαστικά υπολογίζουμε μέση τιμή και διακύμανση για όλα τα χαρακτηριστικά και κλάσεις στο training set
model = gnb.fit(X_train_pca, y_train)
# η GaussianNB έχει builtin μέθοδο υπολογισμό accuracy. Αποθηκεύουμε την τιμή της στον πίνακά μας με τα αποτελέσματα από τα άλλα classifiers
win_accuracy['gaussian naive bayes'] = gnb.score(X_val_pca, y_val)
# και ξανατυπώνουμε τα sorted αποτελέσματα
print("Classification Accuracy on the NBA Games Dataset (40% test set)\n")
sorted_accuracy = [(k, win_accuracy[k]) for k in sorted(win_accuracy, key=win_accuracy.get, reverse=True)]
for k, v in sorted_accuracy:
  print(k,v)

Classification Accuracy on the NBA Games Dataset (40% test set)

gaussian naive bayes 0.702892561983471


In [28]:
# making predictions on the validation set
y_pred = gnb.predict(X_val_pca)
target_names = ['home_loss', 'home_win']

# comparing actual response values (y_test) with predicted response values (y_pred)
print("Gaussian Naive Bayes model accuracy(in %):", accuracy_score(y_val, y_pred))
print(f"These are the results for the model\n")
print(classification_report(y_val, y_pred, target_names=target_names))

Gaussian Naive Bayes model accuracy(in %): 0.702892561983471
These are the results for the model

              precision    recall  f1-score   support

   home_loss       0.69      0.53      0.60      1011
    home_win       0.71      0.82      0.76      1409

    accuracy                           0.70      2420
   macro avg       0.70      0.68      0.68      2420
weighted avg       0.70      0.70      0.70      2420



### GNB with SelectFromModel(LassoCV)

Already executed and gave the features

In [30]:
X_train_sfm = train_data[['score_home','score_away','eff','diff_curr_win_pct','diff_road_record_last_season','odds_home','W_PCT_prev_away',
'diff_avg_reb_home','ROAD_RECORD_prev_away','diff_avg_pts_home']]
y_train_sfm = y_train

X_val_sfm = valid_data[['score_home','score_away','eff','diff_curr_win_pct','diff_road_record_last_season','odds_home','W_PCT_prev_away',
'diff_avg_reb_home','ROAD_RECORD_prev_away','diff_avg_pts_home']]
y_val_sfm = y_val

win_accuracy = {}

In [31]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
# κάνουμε εκπαίδευση (fit) δηλαδή ουσιαστικά υπολογίζουμε μέση τιμή και διακύμανση για όλα τα χαρακτηριστικά και κλάσεις στο training set
model = gnb.fit(X_train_sfm, y_train_sfm)
# η GaussianNB έχει builtin μέθοδο υπολογισμό accuracy. Αποθηκεύουμε την τιμή της στον πίνακά μας με τα αποτελέσματα από τα άλλα classifiers
win_accuracy['gaussian naive bayes'] = gnb.score(X_val_sfm, y_val_sfm)
# και ξανατυπώνουμε τα sorted αποτελέσματα
print("Classification Accuracy on the NBA Games Dataset (40% test set)\n")
sorted_accuracy = [(k, win_accuracy[k]) for k in sorted(win_accuracy, key=win_accuracy.get, reverse=True)]
for k, v in sorted_accuracy:
  print(k,v)

Classification Accuracy on the NBA Games Dataset (40% test set)

gaussian naive bayes 0.7838842975206611


In [32]:
# making predictions on the validation set
y_pred = gnb.predict(X_val_sfm)
target_names = ['home_loss', 'home_win']

# comparing actual response values (y_test) with predicted response values (y_pred)
print("Gaussian Naive Bayes model accuracy(in %):", accuracy_score(y_val_sfm, y_pred))
print(f"These are the results for the model\n")
print(classification_report(y_val_sfm, y_pred, target_names=target_names))

Gaussian Naive Bayes model accuracy(in %): 0.7838842975206611
These are the results for the model

              precision    recall  f1-score   support

   home_loss       0.79      0.66      0.72      1011
    home_win       0.78      0.87      0.82      1409

    accuracy                           0.78      2420
   macro avg       0.78      0.77      0.77      2420
weighted avg       0.78      0.78      0.78      2420



### GNB with Sequential Feature Selection

#### Forward

In [34]:
X_train_for_sfs = train_data[['num_possible_outcomes','score_home','score_away','HG_7days_VISITOR','Home_Last_5_Avg_PTS_home',
'Home_Last_5_Avg_FG3_PCT_home','Home_Last_5_Avg_FG3_PCT_away','Home_Last_5_Avg_AST_home','Away_Last_5_Avg_FG3_PCT_home',
'Away_Last_5_Avg_FG_PCT_home','Away_Last_5_Avg_FG_PCT_away','Away_Last_5_Avg_PTS_home','Away_Last_5_Avg_AST_away',
'Away_Last_5_Avg_REB_home','Away_Last_5_Avg_REB_away','odds_away','diff_avg_pts_home','diff_avg_fg3_pct_away',
'diff_avg_reb_home','G_home','HOME_RECORD_away','WIN_PRCT_home_3g','PTS_home_3g','FT_PCT_home_3g','REB_home_3g',
'PTS_away_3g','FG_PCT_away_3g','FG3_PCT_away_3g','WIN_PRCT_home_7g','PTS_home_7g','FT_PCT_home_7g','AST_home_7g','REB_home_7g',
'PTS_away_7g','FG_PCT_away_7g','FT_PCT_away_7g','FG3_PCT_away_7g','REB_away_7g','diff_avg_fg3_pct_home','diff_avg_ft_pct_home',
'diff_avg_ft_pct_away','top_players_visitor','eff','missing_player_diff','month','diff_pts_last_3_games','diff_ft_pct_last_7_games',
'diff_ast_last_3_games','diff_reb_last_7_games','diff_win_pct_prev_season','diff_curr_win_pct','diff_curr_away_record']]
y_train_for_sfs = y_train

X_val_for_sfs = valid_data[['num_possible_outcomes','score_home','score_away','HG_7days_VISITOR','Home_Last_5_Avg_PTS_home',
'Home_Last_5_Avg_FG3_PCT_home','Home_Last_5_Avg_FG3_PCT_away','Home_Last_5_Avg_AST_home','Away_Last_5_Avg_FG3_PCT_home',
'Away_Last_5_Avg_FG_PCT_home','Away_Last_5_Avg_FG_PCT_away','Away_Last_5_Avg_PTS_home','Away_Last_5_Avg_AST_away',
'Away_Last_5_Avg_REB_home','Away_Last_5_Avg_REB_away','odds_away','diff_avg_pts_home','diff_avg_fg3_pct_away',
'diff_avg_reb_home','G_home','HOME_RECORD_away','WIN_PRCT_home_3g','PTS_home_3g','FT_PCT_home_3g','REB_home_3g',
'PTS_away_3g','FG_PCT_away_3g','FG3_PCT_away_3g','WIN_PRCT_home_7g','PTS_home_7g','FT_PCT_home_7g','AST_home_7g','REB_home_7g',
'PTS_away_7g','FG_PCT_away_7g','FT_PCT_away_7g','FG3_PCT_away_7g','REB_away_7g','diff_avg_fg3_pct_home','diff_avg_ft_pct_home',
'diff_avg_ft_pct_away','top_players_visitor','eff','missing_player_diff','month','diff_pts_last_3_games','diff_ft_pct_last_7_games',
'diff_ast_last_3_games','diff_reb_last_7_games','diff_win_pct_prev_season','diff_curr_win_pct','diff_curr_away_record']]
y_val_for_sfs = y_val

win_accuracy = {}

In [35]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
# κάνουμε εκπαίδευση (fit) δηλαδή ουσιαστικά υπολογίζουμε μέση τιμή και διακύμανση για όλα τα χαρακτηριστικά και κλάσεις στο training set
model = gnb.fit(X_train_for_sfs, y_train_for_sfs)
# η GaussianNB έχει builtin μέθοδο υπολογισμό accuracy. Αποθηκεύουμε την τιμή της στον πίνακά μας με τα αποτελέσματα από τα άλλα classifiers
win_accuracy['gaussian naive bayes'] = gnb.score(X_val_for_sfs, y_val_for_sfs)
# και ξανατυπώνουμε τα sorted αποτελέσματα
print("Classification Accuracy on the NBA Games Dataset (40% test set)\n")
sorted_accuracy = [(k, win_accuracy[k]) for k in sorted(win_accuracy, key=win_accuracy.get, reverse=True)]
for k, v in sorted_accuracy:
  print(k,v)

Classification Accuracy on the NBA Games Dataset (40% test set)

gaussian naive bayes 0.7553719008264462


In [36]:
# making predictions on the validation set
y_pred = gnb.predict(X_val_for_sfs)
target_names = ['home_loss', 'home_win']

# comparing actual response values (y_test) with predicted response values (y_pred)
print("Gaussian Naive Bayes model accuracy(in %):", accuracy_score(y_val_for_sfs, y_pred))
print(f"These are the results for the model\n")
print(classification_report(y_val_for_sfs, y_pred, target_names=target_names))

Gaussian Naive Bayes model accuracy(in %): 0.7553719008264462
These are the results for the model

              precision    recall  f1-score   support

   home_loss       0.69      0.74      0.72      1011
    home_win       0.81      0.76      0.78      1409

    accuracy                           0.76      2420
   macro avg       0.75      0.75      0.75      2420
weighted avg       0.76      0.76      0.76      2420



#### Backwards

In [37]:
X_train_back_sfs = train_data[['num_possible_outcomes','score_home','score_away','G_7days','HG_7days','AG_7days','HG_7days_VISITOR',
'Home_Last_5_Avg_FG_PCT_home','Home_Last_5_Avg_FT_PCT_home','Home_Last_5_Avg_FG3_PCT_home','Home_Last_5_Avg_REB_away',
'Away_Last_5_Avg_FG3_PCT_home','Away_Last_5_Avg_FT_PCT_home','Away_Last_5_Avg_FG_PCT_away','Away_Last_5_Avg_PTS_home',
'Away_Last_5_Avg_AST_away','Away_Last_5_Avg_REB_away','HOME_RECORD_away','HOME_RECORD_home','ROAD_RECORD_away','HOME_RECORD_prev_home',
'HOME_RECORD_prev_away','FG3_PCT_home_3g','FT_PCT_home_7g',
'REB_home_7g','diff_avg_fg_pct_home','diff_avg_reb_away','top_players',
'FT_PCT_away_7g','REB_away_7g','diff_avg_fg3_pct_home','diff_avg_ft_pct_home',
'top_players_visitor','eff','eff_visitor','eff_diff','missing_player_diff','month','diff_pts_last_3_games',
'diff_ft_pct_last_7_games','missing_players','missing_players_visitor','top_player_diff','diff_fg3_pct_last_3_games','diff_ast_last_7_games',
'diff_ast_last_3_games','diff_reb_last_7_games','diff_win_pct_prev_season','diff_road_record_last_season','diff_curr_home_record']]
y_train_back_sfs = y_train

X_val_back_sfs = valid_data[['num_possible_outcomes','score_home','score_away','G_7days','HG_7days','AG_7days','HG_7days_VISITOR',
'Home_Last_5_Avg_FG_PCT_home','Home_Last_5_Avg_FT_PCT_home','Home_Last_5_Avg_FG3_PCT_home','Home_Last_5_Avg_REB_away',
'Away_Last_5_Avg_FG3_PCT_home','Away_Last_5_Avg_FT_PCT_home','Away_Last_5_Avg_FG_PCT_away','Away_Last_5_Avg_PTS_home',
'Away_Last_5_Avg_AST_away','Away_Last_5_Avg_REB_away','HOME_RECORD_away','HOME_RECORD_home','ROAD_RECORD_away','HOME_RECORD_prev_home',
'HOME_RECORD_prev_away','FG3_PCT_home_3g','FT_PCT_home_7g',
'REB_home_7g','diff_avg_fg_pct_home','diff_avg_reb_away','top_players',
'FT_PCT_away_7g','REB_away_7g','diff_avg_fg3_pct_home','diff_avg_ft_pct_home',
'top_players_visitor','eff','eff_visitor','eff_diff','missing_player_diff','month','diff_pts_last_3_games',
'diff_ft_pct_last_7_games','missing_players','missing_players_visitor','top_player_diff','diff_fg3_pct_last_3_games','diff_ast_last_7_games',
'diff_ast_last_3_games','diff_reb_last_7_games','diff_win_pct_prev_season','diff_road_record_last_season','diff_curr_home_record']]
y_val_back_sfs = y_val

win_accuracy = {}

In [38]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
# κάνουμε εκπαίδευση (fit) δηλαδή ουσιαστικά υπολογίζουμε μέση τιμή και διακύμανση για όλα τα χαρακτηριστικά και κλάσεις στο training set
model = gnb.fit(X_train_back_sfs, y_train_back_sfs)
# η GaussianNB έχει builtin μέθοδο υπολογισμό accuracy. Αποθηκεύουμε την τιμή της στον πίνακά μας με τα αποτελέσματα από τα άλλα classifiers
win_accuracy['gaussian naive bayes'] = gnb.score(X_val_back_sfs, y_val_back_sfs)
# και ξανατυπώνουμε τα sorted αποτελέσματα
print("Classification Accuracy on the NBA Games Dataset (40% test set)\n")
sorted_accuracy = [(k, win_accuracy[k]) for k in sorted(win_accuracy, key=win_accuracy.get, reverse=True)]
for k, v in sorted_accuracy:
  print(k,v)

Classification Accuracy on the NBA Games Dataset (40% test set)

gaussian naive bayes 0.7417355371900827


In [39]:
# making predictions on the validation set
y_pred = gnb.predict(X_val_back_sfs)
target_names = ['home_loss', 'home_win']

# comparing actual response values (y_test) with predicted response values (y_pred)
print("Gaussian Naive Bayes model accuracy(in %):", accuracy_score(y_val_back_sfs, y_pred))
print(f"These are the results for the model\n")
print(classification_report(y_val_back_sfs, y_pred, target_names=target_names))

Gaussian Naive Bayes model accuracy(in %): 0.7417355371900827
These are the results for the model

              precision    recall  f1-score   support

   home_loss       0.69      0.71      0.70      1011
    home_win       0.78      0.77      0.78      1409

    accuracy                           0.74      2420
   macro avg       0.73      0.74      0.74      2420
weighted avg       0.74      0.74      0.74      2420



### GNB with Recursive Feature Selection

Έχουμε τα επιλεγμένα features από το αποτέλεσμα του κώδικα στο 'rfe_with_log_regression'

In [44]:
X_train_rfe = train_data[['num_possible_outcomes', 'odds_home', 'odds_away', 'score_home',
       'score_away', 'G_home', 'W_PCT_home', 'HOME_RECORD_home',
       'ROAD_RECORD_home', 'G_away', 'W_PCT_away', 'ROAD_RECORD_away',
       'W_PCT_prev_home', 'ROAD_RECORD_prev_home', 'W_PCT_prev_away',
       'HOME_RECORD_prev_away', 'ROAD_RECORD_prev_away', 'AST_home_3g',
       'REB_home_3g', 'WIN_PRCT_away_3g', 'PTS_away_3g', 'FG3_PCT_away_3g',
       'AST_home_7g', 'REB_home_7g', 'PTS_away_7g', 'AST_away_7g',
       'diff_avg_pts_home', 'diff_avg_pts_away', 'diff_avg_ast_home',
       'diff_avg_fg3_pct_home', 'diff_avg_reb_home', 'diff_avg_reb_away',
       'top_players', 'HG_7days', 'AG_7days', 'G_7days', 'HG_7days_VISITOR',
       'back2back_visitor', 'missing_players', 'missing_players_visitor',
       'top_player_diff', 'missing_player_diff', 'month',
       'Home_Last_5_Avg_AST_home', 'Home_Last_5_Avg_REB_home',
       'Home_Last_5_Avg_PTS_away', 'Home_Last_5_Avg_FT_PCT_away',
       'Home_Last_5_Avg_REB_away', 'Home_Last_5_Avg_AST_away',
       'Away_Last_5_Avg_REB_home', 'Away_Last_5_Avg_REB_away',
       'Away_Last_5_Avg_AST_away', 'diff_pts_last_3_games',
       'diff_fg3_pct_last_3_games', 'diff_ft_pct_last_3_games',
       'diff_ast_last_3_games', 'diff_ast_last_7_games',
       'diff_reb_last_7_games', 'diff_win_pct_3_last_games',
       'diff_curr_win_pct', 'diff_curr_home_record']]
y_train_rfe = y_train

X_val_rfe = valid_data[['num_possible_outcomes', 'odds_home', 'odds_away', 'score_home',
       'score_away', 'G_home', 'W_PCT_home', 'HOME_RECORD_home',
       'ROAD_RECORD_home', 'G_away', 'W_PCT_away', 'ROAD_RECORD_away',
       'W_PCT_prev_home', 'ROAD_RECORD_prev_home', 'W_PCT_prev_away',
       'HOME_RECORD_prev_away', 'ROAD_RECORD_prev_away', 'AST_home_3g',
       'REB_home_3g', 'WIN_PRCT_away_3g', 'PTS_away_3g', 'FG3_PCT_away_3g',
       'AST_home_7g', 'REB_home_7g', 'PTS_away_7g', 'AST_away_7g',
       'diff_avg_pts_home', 'diff_avg_pts_away', 'diff_avg_ast_home',
       'diff_avg_fg3_pct_home', 'diff_avg_reb_home', 'diff_avg_reb_away',
       'top_players', 'HG_7days', 'AG_7days', 'G_7days', 'HG_7days_VISITOR',
       'back2back_visitor', 'missing_players', 'missing_players_visitor',
       'top_player_diff', 'missing_player_diff', 'month',
       'Home_Last_5_Avg_AST_home', 'Home_Last_5_Avg_REB_home',
       'Home_Last_5_Avg_PTS_away', 'Home_Last_5_Avg_FT_PCT_away',
       'Home_Last_5_Avg_REB_away', 'Home_Last_5_Avg_AST_away',
       'Away_Last_5_Avg_REB_home', 'Away_Last_5_Avg_REB_away',
       'Away_Last_5_Avg_AST_away', 'diff_pts_last_3_games',
       'diff_fg3_pct_last_3_games', 'diff_ft_pct_last_3_games',
       'diff_ast_last_3_games', 'diff_ast_last_7_games',
       'diff_reb_last_7_games', 'diff_win_pct_3_last_games',
       'diff_curr_win_pct', 'diff_curr_home_record']]
y_val_rfe = y_val

win_accuracy = {}

In [45]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
# κάνουμε εκπαίδευση (fit) δηλαδή ουσιαστικά υπολογίζουμε μέση τιμή και διακύμανση για όλα τα χαρακτηριστικά και κλάσεις στο training set
model = gnb.fit(X_train_rfe, y_train_rfe)
# η GaussianNB έχει builtin μέθοδο υπολογισμό accuracy. Αποθηκεύουμε την τιμή της στον πίνακά μας με τα αποτελέσματα από τα άλλα classifiers
win_accuracy['gaussian naive bayes'] = gnb.score(X_val_rfe, y_val_rfe)
# και ξανατυπώνουμε τα sorted αποτελέσματα
print("Classification Accuracy on the NBA Games Dataset (40% test set)\n")
sorted_accuracy = [(k, win_accuracy[k]) for k in sorted(win_accuracy, key=win_accuracy.get, reverse=True)]
for k, v in sorted_accuracy:
  print(k,v)

Classification Accuracy on the NBA Games Dataset (40% test set)

gaussian naive bayes 0.7301652892561984


In [46]:
# making predictions on the validation set
y_pred = gnb.predict(X_val_rfe)
target_names = ['home_loss', 'home_win']

# comparing actual response values (y_test) with predicted response values (y_pred)
print("Gaussian Naive Bayes model accuracy(in %):", accuracy_score(y_val_rfe, y_pred))
print(f"These are the results for the model\n")
print(classification_report(y_val_rfe, y_pred, target_names=target_names))

Gaussian Naive Bayes model accuracy(in %): 0.7301652892561984
These are the results for the model

              precision    recall  f1-score   support

   home_loss       0.67      0.70      0.69      1011
    home_win       0.78      0.75      0.76      1409

    accuracy                           0.73      2420
   macro avg       0.72      0.73      0.72      2420
weighted avg       0.73      0.73      0.73      2420

