### Problem
Practice of metrics for evaluating models performance.

Some techniques used:

    1.
    
    2.
    
    3.
    
    4.

#### Import

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, AdaBoostClassifier
from sklearn.svm import SVC

#### Load Dataset and Preprocessing

In [24]:
# Load dataset file, with no header, tab separator and columns label/sms_message
dataset = pd.read_csv('../dataset/sms-spam-collection', sep='\t', header=None, names=['label','sms_message'])

# Change label value
dataset['label'] = dataset.label.map({'ham': 0, 'spam':1})

# Split train and test datasets
X_train, X_test, y_train, y_test = train_test_split(dataset['sms_message'], dataset['label'], random_state = 1)

# Instantiate CountVector
count_vector = CountVectorizer()

count_vector.fit(X_train)
training_data = count_vector.transform(X_train)
testing_data = count_vector.transform(X_test)

#### Models

In [35]:
# Create models
naive_bayes = MultinomialNB()
bag_mod = BaggingClassifier(n_estimators=200)
rf_mod = RandomForestClassifier(n_estimators=200)
ada_mod = AdaBoostClassifier(n_estimators=300, learning_rate=0.2)
svm_mod = SVC(gamma='auto')

# Fit models to the training data
naive_bayes.fit(training_data, y_train)
bag_mod.fit(training_data, y_train)
rf_mod.fit(training_data, y_train)
ada_mod.fit(training_data, y_train)
svm_mod.fit(training_data, y_train)

# Predictions
naive_bayes_preds = naive_bayes.predict(testing_data)
bag_mod_preds = bag_mod.predict(testing_data)
rf_mod_preds = rf_mod.predict(testing_data)
ada_mod_preds = ada_mod.predict(testing_data)
svm_mod_preds = svm_mod.predict(testing_data)

#### Metrics

In [114]:
# Calculates the model accuracy
# Reminder: accuracy is not a good metrics when we have imbalanced classes
def accuracy(actual, preds):
    # when doing preds == actual, it is checked how many of the comparisons are True
    return np.sum(preds == actual)/len(actual)

# Calculates model precision
# Reminder: when we identify something as positive, we want to be sure it is truly positive
def precision(actual, preds):
    true_positives = len(np.intersect1d(np.where(preds==1), np.where(actual==1)))
    preds_positives = (preds==1).sum()
    return true_positives/preds_positives

# Calculates model recall
# Reminder: the most important is to capture all positives cases even if that means to 
# identify some negatives as positives
def recall(actual, preds):
    true_positives = len(np.intersect1d(np.where(preds==1), np.where(actual==1)))
    actual_positives = (actual==1).sum()
    return true_positives/actual_positives
    
# Calculates model f1 score
# Reminder: We care equally about identifying positive and negative cases
def f1(actual, preds):
    _precision = precision(actual, preds)
    _recall = recall(actual, preds)
    return 2*(_precision*_recall)/(_precision+_recall)

In [115]:
recall(y_test, naive_bayes_preds)

0.9405405405405406