In [None]:
%matplotlib notebook
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits

dataset = load_digits()

X, y = dataset.data, dataset.target 

for class_name, class_count in zip(dataset.target_names, np.bincount(dataset.target)):
    print(class_name, class_count)

In [None]:
dataset.target_names

In [None]:
# Lets create a dataset with imbalanced binary classes:
# Negative class (0) is 'not digit 1'
# Positive class (1) is 'digit 1
y_binary_imbalanced = y.copy()
y_binary_imbalanced[y_binary_imbalanced != 1] = 0

print('Original labels:\t', y[1:30])
print('New binary labels:\t', y_binary_imbalanced[1:30])
np.bincount(y_binary_imbalanced)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state = 0)

from sklearn.svm import SVC
svm = SVC(kernel = 'rbf', C = 1).fit(X_train, y_train)
svm.score(X_test, y_test)

In [None]:
from sklearn.dummy import DummyClassifier

# Negative class (0) is most frequent
dummy_majority = DummyClassifier(strategy = 'most_frequent').fit(X_train, y_train)
# Therefore the dummy 'most_frequent' classifier always predicts class 0

y_dummy_predictions = dummy_majority.predict(X_test)
y_dummy_predictions

In [None]:
dummy_majority.score(X_test, y_test)
svm = SVC(kernel = 'linear', C = 1).fit(X_train, y_train)
svm.score(X_test, y_test), dummy_majority.score(X_test, y_test)
# From the output we can observe that SVC with linear kernel can perform better than the simple dummy classifier 
# as test score on SVC with linear kernel is much higher than the dummy classifier's test score

# Confusion Matrices

In [None]:
from sklearn.metrics import confusion_matrix

# Negative class (0) is most frequent class

dummy_majority = DummyClassifier(strategy = 'most_frequent').fit(X_train, y_train)
y_majority_predicted = dummy_majority.predict(X_test)
confusion = confusion_matrix(y_test, y_majority_predicted)

print('Most frequent class (dummy classifier)\n', confusion)
# Output of the confusion matrix indicates that there is no prediction of positive class
# as negative class is predominant in this imbalanced dataset.

In [None]:
from sklearn.metrics import confusion_matrix 
from sklearn.dummy import DummyClassifier
# Two additional methods are being applied for the performance of a dummy classifier
# First one corresponds to the class distribution in the training set
stratified_dummy = DummyClassifier(strategy = 'stratified', random_state = 0).fit(X_train,y_train)
stratified_predicted = stratified_dummy.predict(X_test)
confusion  = confusion_matrix(y_test, stratified_predicted)

print('Stratified dummy classifier\n', confusion)

In [None]:
# Second one corresponds to the dummy classifier where dummy randomly classifies with uniform nature
uniform_dummy = DummyClassifier(strategy = 'uniform').fit(X_train, y_train)
uniform_predicted = uniform_dummy.predict(X_test)
confusion = confusion_matrix(y_test, uniform_predicted)

print('Uniform strategy for the dummy classifier\n', confusion)

In [None]:
# produces random predictions w/ same class proportions as training set
dummy_classprop = DummyClassifier(strategy = 'stratified', random_state = 0).fit(X_train, y_train)
y_classprop_predicted = dummy_classprop.predict(X_test)
confusion = confusion_matrix(y_test, y_classprop_predicted)

print('Random class-proportional prediction (dummy classifier)\n', confusion)

In [None]:
svm = SVC(kernel = 'linear', C = 1).fit(X_train, y_train)
svm_predicted = svm.predict(X_test)
confusion = confusion_matrix(y_test, svm_predicted)

print('Support vector machine classifier (linear kernel, C=1)\n', confusion)

In [None]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression().fit(X_train, y_train)
lr_predicted = lr.predict(X_test)
confusion = confusion_matrix(y_test, lr_predicted)

print('Logistic regression classifier (default settings)\n', confusion)

In [None]:
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(max_depth = 2).fit(X_train, y_train)
tree_predicted = dt.predict(X_test)
confusion = confusion_matrix(y_test, tree_predicted)

print('Decision tree classifier (max_depth = 2)\n', confusion)

# Evaluation metrics for binary classification

In [None]:
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

# Accuracy = (TP + TN)/ (TP + TN + FN + FP)
# Precision = TP / (TP + FP)
# Recall = TP / (TP + FN) Also known as Specificity, or True Positive Rate
# F1-score = 2 * Precision * Recall / (Precision + Recall)  

print('Accuracy: {:.2f}'.format(accuracy_score(y_test, tree_predicted)) )
print('Recall: {:.2f}'.format(recall_score(y_test, tree_predicted)) )
print('Precision: {:.2f}'.format(precision_score(y_test, tree_predicted)) )
print('F1: {:.2f}'.format(f1_score(y_test, tree_predicted)) )

In [None]:
from sklearn.metrics import classification_report
# Combined report with all above metrics
print(classification_report(y_test, tree_predicted, target_names = ['not 1', '1']))

In [None]:
print('Random class-proportional (dummy)\n',
     classification_report(y_test, y_classprop_predicted, target_names = ['not 1', '1']))
print('SVM\n',
     classification_report(y_test, svm_predicted, target_names = ['not 1', '1']))
print('Logistic regression\n',
     classification_report(y_test, lr_predicted, target_names = ['not 1', '1']))
print('Decision tree\n',
     classification_report(y_test, tree_predicted, target_names = ['not 1', '1']))

## Decision Functions

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state = 0)
y_scores_lr = lr.fit(X_train, y_train).decision_function(X_test)
y_score_list = list(zip(y_test[0:20], y_scores_lr[0:20]))

# It shows decision_function scores for first 20 instances
y_score_list

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state = 0)
y_proba_lr = lr.fit(X_train, y_train).predict_proba(X_test)
y_proba_list = list(zip(y_test[0:20], y_proba_lr[0:20,1]))

# displays the probability of positive class for first 20 instances
y_proba_list

## Precision-recall curves

In [None]:
from sklearn.metrics import precision_recall_curve

precision, recall, thresholds = precision_recall_curve(y_test, y_scores_lr)
closest_zero = np.argmin(np.abs(thresholds))
closest_zero_p = precision[closest_zero]
closest_zero_r = recall[closest_zero]
closest_zero_r, closest_zero_p

In [None]:
plt.figure()
plt.xlim([0, 1.01])
plt.ylim([0, 1.01])
plt.plot(precision, recall, label = 'Precision-Recall Curve')
plt.plot(closest_zero_p, closest_zero_r, 'o', markersize = 12, fillstyle = 'none', c = 'r', mew = 3)
plt.xlabel('Precision', fontsize = 16)
plt.ylabel('Recall', fontsize = 16)
plt.axes().set_aspect('equal')
plt.show()

## ROC curves, Area-Under-Curve (AUC)

In [None]:
from sklearn.metrics import roc_curve, auc

X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state = 0)
y_scores_lr = lr.fit(X_train, y_train).decision_function(X_test)
fpr_lr, tpr_lr, thresholds = roc_curve(y_test, y_scores_lr)
roc_auc_lr = auc(fpr_lr, tpr_lr)

plt.figure()
plt.xlim([-0.01, 1.00])
plt.ylim([-0.01, 1.01])
plt.plot(fpr_lr, tpr_lr, lw = 3, label = 'LogRegr ROC curve (area = {:.2f})'.format(roc_auc_lr))
plt.xlabel('False positive rate', fontsize = 16)
plt.ylabel('True positive rate', fontsize = 16)
plt.title('ROC curve (1-of-10 digits classifier)', fontsize = 16)
plt.legend(loc = 'lower right', fontsize = 13)
plt.plot([0, 1], [0, 1], color = 'navy', lw = 3, linestyle = '--')
plt.axes().set_aspect('equal')
plt.show()


In [None]:
from matplotlib import cm

X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state = 0)

plt.figure()
plt.xlim([-0.01, 1.00])
plt.ylim([-0.01, 1.01])

for g in [0.01, 0.1, 0.20, 1]:
    svm = SVC(gamma = g).fit(X_train, y_train)
    y_score_svm = svm.decision_function(X_test)
    fpr_svm, tpr_svm, thresholds_svm = roc_curve(y_test, y_score_svm)
    roc_auc_svm = auc(fpr_svm, tpr_svm)
    accuracy_svm = svm.score(X_test, y_test)
    print('gamma = {:.2f}  accuracy = {:.2f}  AUC = {:.2f}'.format(g, accuracy_svm,
                                                                  roc_auc_svm))
    plt.plot(fpr_svm, tpr_svm, lw = 3, alpha = 0.7,
            label = 'SVM (gamma = {:.2f}, area = {:.2f})'.format(g, roc_auc_svm))

plt.xlabel('False positive rate', fontsize = 16)
plt.ylabel('True positive rate (Recall)', fontsize = 16)
plt.plot([0, 1], [0, 1], color = 'k', lw = 0.5, linestyle = '--')
plt.legend(loc = 'lower right', fontsize = 11)
plt.title('ROC curve: (1-of-10 digits classifier)', fontsize = 16)
plt.axes().set_aspect('equal')
plt.show()


## Evaluation measures for multi-class classification

In [None]:
dataset = load_digits()

X, y = dataset.data, dataset.target
X_train_mc, X_test_mc, y_train_mc, y_test_mc = train_test_split(X, y, random_state = 0)

svm = SVC(kernel = 'linear').fit(X_train_mc, y_train_mc)
svm_predicted_mc = svm.predict(X_test_mc)
confusion_mc = confusion_matrix(y_test_mc, svm_predicted_mc)
df_mc = pd.DataFrame(confusion_mc,
                    index = [i for i in range(0,10)], columns = [i for i in range(0,10)])

plt.figure(figsize = (5.5, 4))
sns.heatmap(df_mc, annot = True)
plt.title('SVM linear kernel \nAccuracy: {:.3f}'.format(accuracy_score(y_test_mc,
                                                                      svm_predicted_mc)))
plt.ylabel('True label')
plt.xlabel('Predicted label')


svm = SVC(kernel = 'rbf').fit(X_train_mc, y_train_mc)
svm_predicted_mc = svm.predict(X_test_mc)
confusion_mc = confusion_matrix(y_test_mc, svm_predicted_mc)
df_mc = pd.DataFrame(confusion_mc,
                    index = [i for i in range(0, 10)], columns = [i for i in range(0, 10)])

plt.figure(figsize = (5.5, 4))
sns.heatmap(df_mc, annot = True)
plt.title('SVM RBF kernel \nAccuracy: {:.3f}'.format(accuracy_score(y_test_mc,
                                                                   svm_predicted_mc)))
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()


In [None]:
# This classification report corresponds to rbf kernel in SVM classifier
# as last svm_predicted_mc denoted rbf kernelized SVM
print(classification_report(y_test_mc, svm_predicted_mc))

In [None]:
# This classification report for three evaluation metrics
# corresponds to SVM classifier with linear kernel
svm = SVC(kernel = 'linear').fit(X_train_mc, y_train_mc)
svm_predicted_mc = svm.predict(X_test_mc)
print(classification_report(y_test_mc, svm_predicted_mc))

## Micro- vs. Macro-averaged metrics

In [None]:
# These micro- and macro- averaged metrics correspond to linear kernelized SVM classifier
print('Micro-averaged precision = {:.2f} (treat instances equally)'.
     format(precision_score(y_test_mc, svm_predicted_mc, average = 'micro')))
print('Macro-averaged precision = {:.2f}(treat classes equally)'.
     format(precision_score(y_test_mc, svm_predicted_mc, average = 'macro')))

print('Micro-averaged f1 = {:.2f} (treat instances equally)'
     .format(f1_score(y_test_mc, svm_predicted_mc, average = 'micro')))
print('Macro-averaged f1 = {:.2f} (treat classes equally)'
     .format(f1_score(y_test_mc, svm_predicted_mc, average = 'macro')))

In [None]:
svm = SVC(kernel = 'rbf').fit(X_train_mc, y_train_mc)
svm_predicted_mc = svm.predict(X_test_mc)

# These micro- and macro- averaged metrics correspond to rbf kernelized SVM classifier
print('Micro-averaged precision = {:.2f} (treat instances equally)'.
     format(precision_score(y_test_mc, svm_predicted_mc, average = 'micro')))
print('Macro-averaged precision = {:.2f}(treat classes equally)'.
     format(precision_score(y_test_mc, svm_predicted_mc, average = 'macro')))

print('Micro-averaged f1 = {:.2f} (treat instances equally)'
     .format(f1_score(y_test_mc, svm_predicted_mc, average = 'micro')))
print('Macro-averaged f1 = {:.2f} (treat classes equally)'
     .format(f1_score(y_test_mc, svm_predicted_mc, average = 'macro')))

In [None]:
matches = []
for i in range(len(y_test_mc)):
    if y_test_mc[i] == svm_predicted_mc[i]:
        matches.append(1)
    else:
        matches.append(0)
micro_averaged_precision = sum(matches)/len(matches)

### Regression evaluation metrics 

In [None]:
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.dummy import DummyRegressor

diabetes = datasets.load_diabetes()

X = diabetes.data[:, None, 6]
y = diabetes.target

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 0)

lm = LinearRegression().fit(X_train, y_train)
lm_dummy_mean = DummyRegressor(strategy = 'mean').fit(X_train, y_train)

y_predict = lm.predict(X_test)
y_predict_dummy_mean = lm_dummy_mean.predict(X_test)

print('Linear model, coefficients: ', lm.coef_)
print('Mean squared error (dummy): {:.2f}'.format(mean_squared_error(y_test,
                                                                     y_predict_dummy_mean)))
print('Mean squared error (linear mode): {:.2f}'.format(mean_squared_error(y_test,
                                                                          y_predict)))
print('r2_score (dummy): {:.2f}'.format(r2_score(y_test, y_predict_dummy_mean)))
print('r2_score (linear model): {:.2f}'.format(r2_score(y_test, y_predict)))



In [None]:
#Plot outputs
plt.scatter(X_test, y_test, color = 'black')
plt.plot(X_test, y_predict, color = 'green', linewidth = 2,
        label = 'linear fit')
plt.plot(X_test, y_predict_dummy_mean, color = 'red', linestyle = 'dashed',
        linewidth = 2, label = 'dummy')
plt.legend(loc = 'best')
plt.show()

## Model selection using evaluation metrics


### Cross-validation example

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC

dataset = load_digits()
# Again making this a binary problem with 'digit 1' as positive class
# and 'not 1' as negative class

X, y = dataset.data, dataset.target == 1
clf = SVC(kernel = 'linear', C = 1)

# accuracy is the default scoring metric
print('Cross-validation (Accuracy)', cross_val_score(clf, X, y, cv = 5))
# use AUC as scoring metric
print('Cross-validation (AUC)', cross_val_score(clf, X, y, cv = 5, scoring = 'roc_auc'))
# use recall as scoring metric
print('Cross-validation (recall)', cross_val_score(clf, X, y, cv = 5, scoring = 'recall'))

### Grid search example

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_auc_score

dataset = load_digits()
X, y = dataset.data, dataset.target == 1
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 0)


clf = SVC(kernel = 'rbf')
param_values = {'gamma': [0.001, 0.01, 0.05, 0.1, 1, 10, 100]}


grid_clf_acc = GridSearchCV(clf, param_grid = param_values)
grid_clf_acc.fit(X_train, y_train)
y_decision_fn_scores_acc = grid_clf_acc.decision_function(X_test)

print('Grid best parameter (max. accuracy): ', grid_clf_acc.best_params_)
print('Grid best score (accuracy): ', grid_clf_acc.best_score_)

grid_clf_auc = GridSearchCV(clf, param_grid = param_values, scoring = 'roc_auc')
grid_clf_auc.fit(X_train, y_train)
y_decision_fn_scores_auc = grid_clf_auc.decision_function(X_test)

print('Test set AUC: ', roc_auc_score(y_test, y_decision_fn_scores_auc))
print('Grid best parameter (max. AUC): ', grid_clf_auc.best_params_)
print('Grid best score (AUC): ', grid_clf_auc.best_score_)

In [None]:
from sklearn.metrics.scorer import SCORERS
print(sorted(list(SCORERS.keys())))