# Model Evaluation 1

---

__This Notebook__

[Quickly test multiple models](https://towardsdatascience.com/quickly-test-multiple-models-a98477476f0)


## Setup

In [1]:
import re
import os
import sys
import time
import joblib 

import numpy as np
import pandas as pd
import scipy.sparse as sp
import matplotlib.pyplot as plt

from datetime import datetime

from sklearn.ensemble import AdaBoostClassifier, \
    RandomForestClassifier, GradientBoostingClassifier

import custom.evaluate_models as E

# set print options, print revision date
np.set_printoptions(threshold=sys.maxsize)
pd.options.display.max_colwidth = 999
dt_object = datetime.fromtimestamp(time.time())
day, T = str(dt_object).split('.')[0].split(' ')
print('Revised on: ' + day)

Revised on: 2021-02-23


## Load Raw Data

In [2]:
def load_raw(data):
    raw_path = os.path.join("data","1_raw")
    filename = ''.join([data, ".csv"])
    out_dfm = pd.read_csv(os.path.join(raw_path, filename))
    out_arr = np.array(out_dfm.iloc[:,0].ravel())
    return out_arr

X_train_raw = load_raw("X_train")
X_test_raw = load_raw("X_test")
y_train_array = load_raw("y_train")
y_test_array = load_raw("y_test") 

def make_int(y_array):
    y = y_array.copy()
    y[y=='ham'] = 0
    y[y=='spam'] = 1
    y = y.astype('int')
    return y

y_train = make_int(y_train_array)
y_test = make_int(y_test_array)

## Load Preprocessed Data

In [3]:
def load_X(filename):
    proc_dir = os.path.join("data", "2_processed")
    filename = ''.join([filename, '.npz'])
    X = sp.load_npz(os.path.join(proc_dir, filename))
    return X

X_train_processed = load_X('X_train_processed')
X_test_processed = load_X('X_test_processed')

## Instantiate Candidate Models

In [4]:
# previously chosen
ada_clf =  AdaBoostClassifier(
    random_state=42 , n_estimators=10, 
    learning_rate=0.001)

rnd_clf1 = RandomForestClassifier(
    random_state=42, n_estimators=100, max_features=150, 
    max_depth=8, min_samples_split=3, n_jobs=1) 

rnd_clf2 = RandomForestClassifier(
    random_state=42, n_estimators=100, max_features=300, 
    max_depth=8, min_samples_split=3, n_jobs=1)
    
gboost_1a = GradientBoostingClassifier(
    random_state=42, n_estimators=50, max_features=None, 
    max_depth=1, min_samples_split=2)

gboost_2a = GradientBoostingClassifier(
    random_state=42, n_estimators=100, max_features=300, 
    max_depth=8, min_samples_split=5)

gboost_2c = GradientBoostingClassifier(
    random_state=42, n_estimators=50, max_features=300, 
    max_depth=3, min_samples_split=5)

In [6]:
from sklearn.neighbors import KNeighborsClassifier, RadiusNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from xgboost import XGBClassifier

# new models
#radn_clf = RadiusNeighborsClassifier()
#gauss_nb = GaussianNB()
#multi_nb = MultinomialNB()
#, ('RadN', radn_clf) 
# UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels
# with no predicted samples. Use `zero_division` parameter to control this behavior
#, ('GaussianNB', gauss_nb) # needs dense arrays
#, ('MultinomNB', multi_nb) # needs positive data

In [17]:
models = [
    ('KNN', KNeighborsClassifier(
        n_neighbors=3 # default=5 had terrible spam recall
    )),
    ('AdaBoost', AdaBoostClassifier(
        random_state=42, 
        n_estimators=10,    
        learning_rate=0.001
    )),
    ('XGboost', XGBClassifier(
        seed=42,
        eval_metric='error', # try logloss
        use_label_encoder=False
    ))
    ]

In [18]:
from sklearn import model_selection
from sklearn.utils import class_weight
from sklearn.metrics import classification_report, confusion_matrix

def run_exps(models: list(), X_train: pd.DataFrame , y_train: pd.DataFrame, 
             X_test: pd.DataFrame, y_test: pd.DataFrame) -> pd.DataFrame:
    """
    Lightweight script to test many models and find winners
    :param X_train: training split
    :param y_train: training target vector
    :param X_test: test split
    :param y_test: test target vector
    :return: DataFrame of predictions
    """    
    dfs, results, names = [], [], []
    scoring = ['accuracy', 'precision_weighted', 
               'recall_weighted', 'f1_weighted', 'roc_auc']
    target_names = ['ham', 'spam']
    
    for name, model in models:
        start_ = time.time()
        kfold = model_selection.KFold(n_splits=5, 
                                      shuffle=True, 
                                      random_state=42)
        
        cv_results = model_selection.cross_validate(model, 
                                                    X_train, 
                                                    y_train, 
                                                    cv=kfold, 
                                                    scoring=scoring)
        clf = model.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        
        print(name)
        print(classification_report(y_test, 
                                    y_pred,
                                    digits=3,
                                    target_names=target_names))
            
        # collect results
        results.append(cv_results)
        names.append(name)
        
        df = pd.DataFrame(cv_results)
        df['model'] = name
        dfs.append(df)
                
    final = pd.concat(dfs, ignore_index=True)
    return final

In [20]:
df_splits = run_exps(models, X_train_processed, y_train, X_test_processed, y_test)

KNN
              precision    recall  f1-score   support

         ham      0.929     1.000     0.963      1442
        spam      1.000     0.517     0.682       230

    accuracy                          0.934      1672
   macro avg      0.964     0.759     0.822      1672
weighted avg      0.938     0.934     0.924      1672

AdaBoost
              precision    recall  f1-score   support

         ham      0.994     0.993     0.994      1442
        spam      0.957     0.965     0.961       230

    accuracy                          0.989      1672
   macro avg      0.976     0.979     0.977      1672
weighted avg      0.989     0.989     0.989      1672

XGboost
              precision    recall  f1-score   support

         ham      0.995     0.993     0.994      1442
        spam      0.957     0.970     0.963       230

    accuracy                          0.990      1672
   macro avg      0.976     0.981     0.979      1672
weighted avg      0.990     0.990     0.990      1672

In [21]:
df_splits

Unnamed: 0,fit_time,score_time,test_accuracy,test_precision_weighted,test_recall_weighted,test_f1_weighted,test_roc_auc,model
0,0.021942,5.275063,0.923077,0.927776,0.923077,0.911401,0.824853,KNN
1,0.031277,5.323851,0.920513,0.927236,0.920513,0.906472,0.797741,KNN
2,0.014986,5.215429,0.923077,0.9272,0.923077,0.908714,0.810261,KNN
3,0.015626,5.260679,0.917949,0.925093,0.917949,0.902767,0.787678,KNN
4,0.03656,5.221972,0.944872,0.946737,0.944872,0.937167,0.859672,KNN
5,2.717947,0.06622,0.997436,0.997436,0.997436,0.997436,0.994826,AdaBoost
6,2.910082,0.06649,0.984615,0.984615,0.984615,0.984615,0.968006,AdaBoost
7,2.698767,0.078107,0.99359,0.993572,0.99359,0.993547,0.979064,AdaBoost
8,2.963183,0.062483,0.994872,0.994859,0.994872,0.994852,0.985493,AdaBoost
9,2.7718,0.068849,0.99359,0.993566,0.99359,0.99354,0.97629,AdaBoost


In [22]:
def eval_classifier(clf, sets):
    X_train, y_train, X_test, y_test = sets
    E.fit_clf(clf, X_train, y_train)
    y_pred = clf.predict(X_test)
    E.eval_clf(y_test, y_pred)

In [23]:
sets = X_train_processed, y_train, X_test_processed, y_test

In [26]:
models[0]

('KNN', KNeighborsClassifier(n_neighbors=3))

In [28]:
#eval_classifier(models[0], sets) # nope...

In [29]:
#eval_classifier(ada_clf, sets)

In [30]:
#eval_classifier(xgboost, sets)

In [None]:
#bootstraps = []
#for model in list(set(final.model.values)):
#    model_df = final.loc[final.model == model]
#    bootstrap = model_df.sample(n=30, replace=True)
#    bootstraps.append(bootstrap)
#        
#bootstrap_df = pd.concat(bootstraps, ignore_index=True)
#results_long = pd.melt(bootstrap_df,id_vars=['model'],var_name='metrics', value_name='values')
#time_metrics = ['fit_time','score_time'] # fit time metrics
#
### PERFORMANCE METRICS
#results_long_nofit = results_long.loc[~results_long['metrics'].isin(time_metrics)] # get df without fit times
#results_long_nofit = results_long_nofit.sort_values(by='values')
#
### TIME METRICS
#results_long_fit = results_long.loc[results_long['metrics'].isin(time_metrics)] # df with fit times
#results_long_fit = results_long_fit.sort_values(by='values')

In [None]:
#import matplotlib.pyplot as plt
#import seaborn as sns
#plt.figure(figsize=(20, 12))
#sns.set(font_scale=2.5)
#g = sns.boxplot(x="model", y="values", hue="metrics", data=results_long_nofit, palette="Set3")
#plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
#plt.title('Comparison of Model by Classification Metric')
#plt.savefig('./benchmark_models_performance.png',dpi=300)

---