### Problem
Practice of metrics for evaluating models performance.

Techniques used and implemented :

    1. Accuracy score
    
    2. Precision score
    
    3. Recall score
    
    4. f1 score

#### Import

In [137]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.metrics import fbeta_score

#### Load Dataset and Preprocessing

In [24]:
# Load dataset file, with no header, tab separator and columns label/sms_message
dataset = pd.read_csv('../dataset/sms-spam-collection', sep='\t', header=None, names=['label','sms_message'])

# Change label value
dataset['label'] = dataset.label.map({'ham': 0, 'spam':1})

# Split train and test datasets
X_train, X_test, y_train, y_test = train_test_split(dataset['sms_message'], dataset['label'], random_state = 1)

# Instantiate CountVector
count_vector = CountVectorizer()

count_vector.fit(X_train)
training_data = count_vector.transform(X_train)
testing_data = count_vector.transform(X_test)

#### Models

In [35]:
# Create models
naive_bayes = MultinomialNB()
bag_mod = BaggingClassifier(n_estimators=200)
rf_mod = RandomForestClassifier(n_estimators=200)
ada_mod = AdaBoostClassifier(n_estimators=300, learning_rate=0.2)
svm_mod = SVC(gamma='auto')

# Fit models to the training data
naive_bayes.fit(training_data, y_train)
bag_mod.fit(training_data, y_train)
rf_mod.fit(training_data, y_train)
ada_mod.fit(training_data, y_train)
svm_mod.fit(training_data, y_train)

# Predictions
naive_bayes_preds = naive_bayes.predict(testing_data)
bag_mod_preds = bag_mod.predict(testing_data)
rf_mod_preds = rf_mod.predict(testing_data)
ada_mod_preds = ada_mod.predict(testing_data)
svm_mod_preds = svm_mod.predict(testing_data)

#### Custom Metrics Implementation

In [116]:
# Calculates the model accuracy
# Reminder: accuracy is not a good metrics when we have imbalanced classes
def accuracy(actual, preds):
    # when doing preds == actual, it is checked how many of the comparisons are True
    return np.sum(preds == actual)/len(actual)

# Calculates model precision
# Reminder: when we identify something as positive, we want to be sure it is truly positive
def precision(actual, preds):
    true_positives = len(np.intersect1d(np.where(preds==1), np.where(actual==1)))
    preds_positives = (preds==1).sum()
    return true_positives/preds_positives

# Calculates model recall
# Reminder: the most important is to capture all positives cases even if that means to 
# identify some negatives as positives
def recall(actual, preds):
    true_positives = len(np.intersect1d(np.where(preds==1), np.where(actual==1)))
    actual_positives = (actual==1).sum()
    return true_positives/actual_positives
    
# Calculates model f1 score
# Reminder: We care equally about identifying positive and negative cases
def f1(actual, preds):
    _precision = precision(actual, preds)
    _recall = recall(actual, preds)
    return 2*(_precision*_recall)/(_precision+_recall)

#### Comparison : Custom vs. Sklearn metrics functions

In [124]:
print("Custom Accuracy: {}".format(accuracy(y_test, naive_bayes_preds)))
print("Sklearn Accuracy: {}".format(accuracy_score(y_test, naive_bayes_preds)))
print("\n")
print("Custom Precision: {}".format(precision(y_test, naive_bayes_preds)))
print("Sklearn Precision: {}".format(precision_score(y_test, naive_bayes_preds)))
print("\n")
print("Custom Recall: {}".format(recall(y_test, naive_bayes_preds)))
print("Sklearn Recall: {}".format(recall_score(y_test, naive_bayes_preds)))
print("\n")
print("Custom f1: {}".format(f1(y_test, naive_bayes_preds)))
print("Sklearn f1: {}".format(f1_score(y_test, naive_bayes_preds)))

Custom Accuracy: 0.9885139985642498
Sklearn Accuracy: 0.9885139985642498


Custom Precision: 0.9720670391061452
Sklearn Precision: 0.9720670391061452


Custom Recall: 0.9405405405405406
Sklearn Recall: 0.9405405405405406


Custom f1: 0.9560439560439562
Sklearn f1: 0.9560439560439562


#### Displaying metrics

In [135]:
def print_metrics(y_true, preds, model_name=None):
    if model_name == None:
        print('Accuracy Score: {}'.format(accuracy_score(y_true, preds)))
        print('Precision Score: {}'.format(precision_score(y_true, preds)))
        print('Recall Score: {}'.format(recall_score(y_true, preds)))
        print('f1 Score: {}'.format(f1_score(y_true, preds)))
        print("\n\n")
    else:
        print('Accuracy Score for {}: {}'.format(model_name, accuracy_score(y_true, preds)))
        print('Precision Score for {}: {}'.format(model_name, precision_score(y_true, preds)))
        print('Recall Score for {}: {}'.format(model_name, recall_score(y_true, preds)))
        print('f1 Score for {}: {}'.format(model_name, f1_score(y_true, preds)))
        print("\n\n")

In [136]:
print_metrics(y_test, bag_mod_preds, 'bagging')
print_metrics(y_test, rf_mod_preds, 'random forest')
print_metrics(y_test, ada_mod_preds, 'adaboost')
print_metrics(y_test, naive_bayes_preds, 'naive bayes')
print_metrics(y_test, svm_mod_preds, 'svm')

Accuracy Score for bagging: 0.9748743718592965
Precision Score for bagging: 0.9166666666666666
Recall Score for bagging: 0.8918918918918919
f1 Score for bagging: 0.9041095890410958



Accuracy Score for random forest: 0.9827709978463748
Precision Score for random forest: 1.0
Recall Score for random forest: 0.8702702702702703
f1 Score for random forest: 0.930635838150289



Accuracy Score for adaboost: 0.9770279971284996
Precision Score for adaboost: 0.9693251533742331
Recall Score for adaboost: 0.8540540540540541
f1 Score for adaboost: 0.9080459770114943



Accuracy Score for naive bayes: 0.9885139985642498
Precision Score for naive bayes: 0.9720670391061452
Recall Score for naive bayes: 0.9405405405405406
f1 Score for naive bayes: 0.9560439560439562



Accuracy Score for svm: 0.8671931083991385
Precision Score for svm: 0.0
Recall Score for svm: 0.0
f1 Score for svm: 0.0





#### Checking fbeta score

In [141]:
# Sklearn fbeta implemenatation
print(fbeta_score(y_test, bag_mod_preds, beta=1))
# Sklearn f1 score implementation
print(f1_score(y_test, bag_mod_preds))
# Custom f1 score implementation
print(f1(y_test, bag_mod_preds))

0.9041095890410958
0.9041095890410958
0.9041095890410958
