# Model Evaluation 1

---

__This Notebook__

[Quickly test multiple models](https://towardsdatascience.com/quickly-test-multiple-models-a98477476f0)


## Setup

In [1]:
import re
import os
import sys
import time
import joblib 

import numpy as np
import pandas as pd
import scipy.sparse as sp
import matplotlib.pyplot as plt

from datetime import datetime

from sklearn.ensemble import AdaBoostClassifier, \
    RandomForestClassifier, GradientBoostingClassifier

import custom.evaluate_models as E

# set print options, print revision date
np.set_printoptions(threshold=sys.maxsize)
pd.options.display.max_colwidth = 999
dt_object = datetime.fromtimestamp(time.time())
day, T = str(dt_object).split('.')[0].split(' ')
print('Revised on: ' + day)

Revised on: 2021-02-23


## Load Raw Data

In [7]:
def load_raw(data):
    raw_path = os.path.join("data","1_raw")
    filename = ''.join([data, ".csv"])
    out_dfm = pd.read_csv(os.path.join(raw_path, filename))
    out_arr = np.array(out_dfm.iloc[:,0].ravel())
    return out_arr

X_train_raw = load_raw("X_train")
X_test_raw = load_raw("X_test")
y_train_array = load_raw("y_train")
y_test_array = load_raw("y_test") 

def make_int(y_array):
    y = y_array.copy()
    y[y=='ham'] = 0
    y[y=='spam'] = 1
    y = y.astype('int')
    return y

y_train = make_int(y_train_array)
y_test = make_int(y_test_array)

## Load Preprocessed Data

In [3]:
def load_X(filename):
    proc_dir = os.path.join("data", "2_processed")
    filename = ''.join([filename, '.npz'])
    X = sp.load_npz(os.path.join(proc_dir, filename))
    return X

X_train_processed = load_X('X_train_processed')
X_test_processed = load_X('X_test_processed')

## Instantiate Candidate Models

In [22]:
# previously chosen
ada_clf =  AdaBoostClassifier(
    random_state=42 , n_estimators=10, 
    learning_rate=0.001)

rnd_clf1 = RandomForestClassifier(
    random_state=42, n_estimators=100, max_features=150, 
    max_depth=8, min_samples_split=3, n_jobs=1) 

rnd_clf2 = RandomForestClassifier(
    random_state=42, n_estimators=100, max_features=300, 
    max_depth=8, min_samples_split=3, n_jobs=1)
    
gboost_1a = GradientBoostingClassifier(
    random_state=42, n_estimators=50, max_features=None, 
    max_depth=1, min_samples_split=2)

gboost_2a = GradientBoostingClassifier(
    random_state=42, n_estimators=100, max_features=300, 
    max_depth=8, min_samples_split=5)

gboost_2c = GradientBoostingClassifier(
    random_state=42, n_estimators=50, max_features=300, 
    max_depth=3, min_samples_split=5)

In [34]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from xgboost import XGBClassifier

# new models
knn_clf = KNeighborsClassifier()
gauss_nb = GaussianNB()
multi_nb = MultinomialNB()
xgboost = XGBClassifier(eval_metric='logloss',use_label_encoder=False)

In [35]:
from sklearn import model_selection
from sklearn.utils import class_weight
from sklearn.metrics import classification_report, confusion_matrix

def run_exps(X_train: pd.DataFrame , y_train: pd.DataFrame, 
             X_test: pd.DataFrame, y_test: pd.DataFrame) -> pd.DataFrame:
    """
    Lightweight script to test many models and find winners
    :param X_train: training split
    :param y_train: training target vector
    :param X_test: test split
    :param y_test: test target vector
    :return: DataFrame of predictions
    """
    
    models = [
         # ('AdaBoost', ada_clf)
        #, ('KNN', knn_clf) # recall for spam is bad: meaning... very little spam gets classified?
        # , ('GaussianNB', gauss_nb) # needs dense arrays
       #  ('MultinomNB', multi_nb) # needs positive data
         ('XGboost', xgboost)
    ]
    
    dfs, results, names = [], [], []
    scoring = ['accuracy', 'precision_weighted', 
               'recall_weighted', 'f1_weighted', 
               'roc_auc']
    target_names = ['ham', 'spam']
    for name, model in models:
        start_ = time.time()
        kfold = model_selection.KFold(n_splits=5, 
                                      shuffle=True, 
                                      random_state=42)
        
        cv_results = model_selection.cross_validate(model, 
                                                    X_train, 
                                                    y_train, 
                                                    cv=kfold, 
                                                    scoring=scoring)
        clf = model.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        
        print(name)
        print(classification_report(y_test, 
                                    y_pred,
                                    digits=3,
                                    target_names=target_names))
            
        # collect results
        results.append(cv_results)
        names.append(name)
        
        df = pd.DataFrame(cv_results)
        df['model'] = name
        dfs.append(df)
                
    final = pd.concat(dfs, ignore_index=True)
    return final

In [36]:
df_splits = run_exps(X_train_processed, y_train, X_test_processed, y_test)

XGboost
              precision    recall  f1-score   support

         ham      0.995     0.993     0.994      1442
        spam      0.957     0.970     0.963       230

    accuracy                          0.990      1672
   macro avg      0.976     0.981     0.979      1672
weighted avg      0.990     0.990     0.990      1672



In [37]:
df_splits

Unnamed: 0,fit_time,score_time,test_accuracy,test_precision_weighted,test_recall_weighted,test_f1_weighted,test_roc_auc,model
0,10.411941,0.763245,0.994872,0.994935,0.994872,0.99489,0.993751,XGboost
1,11.054469,0.739355,0.985897,0.98585,0.985897,0.98587,0.998318,XGboost
2,6.761108,0.753396,0.99359,0.993572,0.99359,0.993547,0.999718,XGboost
3,11.34067,0.768135,0.996154,0.996146,0.996154,0.996146,0.999932,XGboost
4,10.275149,0.750562,0.994872,0.994853,0.994872,0.994846,0.999619,XGboost


In [38]:
def eval_classifier(clf, sets):
    X_train, y_train, X_test, y_test = sets
    E.fit_clf(clf, X_train, y_train)
    y_pred = clf.predict(X_test)
    E.eval_clf(y_test, y_pred)

In [41]:
sets = X_train_processed, y_train, X_test_processed, y_test

In [42]:
eval_classifier(knn_clf, sets)

Elapsed: 0m 0s
          pred_neg  pred_pos
cond_neg      1442         0
cond_pos       137        93
acc: 0.9181
tpr: 0.4043
tnr: 1.0000


In [46]:
eval_classifier(xgboost, sets)

Elapsed: 0m 15s
          pred_neg  pred_pos
cond_neg      1432        10
cond_pos         7       223
acc: 0.9898
tpr: 0.9696
tnr: 0.9931


In [None]:
bootstraps = []
for model in list(set(final.model.values)):
    model_df = final.loc[final.model == model]
    bootstrap = model_df.sample(n=30, replace=True)
    bootstraps.append(bootstrap)
        
bootstrap_df = pd.concat(bootstraps, ignore_index=True)
results_long = pd.melt(bootstrap_df,id_vars=['model'],var_name='metrics', value_name='values')
time_metrics = ['fit_time','score_time'] # fit time metrics

## PERFORMANCE METRICS
results_long_nofit = results_long.loc[~results_long['metrics'].isin(time_metrics)] # get df without fit times
results_long_nofit = results_long_nofit.sort_values(by='values')

## TIME METRICS
results_long_fit = results_long.loc[results_long['metrics'].isin(time_metrics)] # df with fit times
results_long_fit = results_long_fit.sort_values(by='values')

In [45]:
import matplotlib.pyplot as plt
#import seaborn as sns
#plt.figure(figsize=(20, 12))
#sns.set(font_scale=2.5)
#g = sns.boxplot(x="model", y="values", hue="metrics", data=results_long_nofit, palette="Set3")
#plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
#plt.title('Comparison of Model by Classification Metric')
#plt.savefig('./benchmark_models_performance.png',dpi=300)

---