In [1]:
%load_ext pycodestyle_magic
%flake8_on 

In [2]:
%matplotlib notebook
import numpy as np 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits

# Loading a dataset that has an equal representation of 0-9 digits

dataset = load_digits()
X, y = dataset.data, dataset.target

for class_name, class_count in zip(dataset.target_names, np.bincount(dataset.target)):
    print(class_name,class_count)

0 178
1 182
2 177
3 183
4 181
5 182
6 181
7 179
8 174
9 180


In [3]:
# Creating a dataset with imbalanced binary classes:
# Negative class (0) is 'not digit 1'
# Positive class (1) is 'digit 1'

y_binary_imbalanced = y.copy()
y_binary_imbalanced[y_binary_imbalanced != 1] = 0

print('Original labels:\t', y[1:30])
print('New binary labels:\t', y_binary_imbalanced[1:30])

Original labels:	 [1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9]
New binary labels:	 [1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0]


In [4]:
np.bincount(y_binary_imbalanced)    # Negative class (0) is the most frequent class

array([1615,  182])

In [5]:
# Accuracy of Support Vector Machine classifier
from sklearn.svm import SVC

X_train, X_test, y_train, y_test = train_test_split(
    X, y_binary_imbalanced, random_state=0)

svm = SVC(kernel='rbf', C=1).fit(X_train, y_train)
svm.score(X_test, y_test)

0.9955555555555555

## Dummy Classifiers
DummyClassifier is a classifier that makes predictions using simple rules, which can be useful as a baseline for comparison against actual classifiers, especially with imbalanced classes.

In [6]:
from sklearn.dummy import DummyClassifier

# Negative class (0) is most frequent
# This strategy will always predict the most frequent class
# Taking the majority from fit y_train

dummy_majority = DummyClassifier(
    strategy='most_frequent').fit(X_train, y_train)

# Therefore the dummy 'most_frequent' classifier always predicts class 0
y_dummy_predictions = dummy_majority.predict(X_test)

y_dummy_predictions

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [7]:
# This classifier provides a null metric

dummy_majority.score(X_test, y_test)

0.9044444444444445

In [8]:
svm = SVC(kernel='linear', C=1).fit(X_train, y_train)
svm.score(X_test, y_test)

0.9777777777777777

## Confusion matrices
Binary (two-class) confusion matrix

In [9]:
from sklearn.metrics import confusion_matrix

# Negative class (0) is most frequent
dummy_majority = DummyClassifier(
    strategy='most_frequent').fit(X_train, y_train)

y_majority_predicted = dummy_majority.predict(X_test)

confusion = confusion_matrix(y_test, y_majority_predicted)

print('Most frequent class (dummy classifier)\n\n', confusion)


Most frequent class (dummy classifier)

 [[407   0]
 [ 43   0]]


In [10]:
# produces random predictions w/ same class proportion as training set

dummy_classprop = DummyClassifier(strategy='stratified').fit(X_train, y_train)
y_classprop_predicted = dummy_classprop.predict(X_test)
confusion = confusion_matrix(y_test, y_classprop_predicted)

print('Random class-proportional prediction (dummy classifier)\n\n', confusion)

Random class-proportional prediction (dummy classifier)

 [[357  50]
 [ 39   4]]


In [11]:
svm = SVC(kernel='linear', C=1).fit(X_train, y_train)
svm_predicted = svm.predict(X_test)
confusion = confusion_matrix(y_test, svm_predicted)

print('Support vector machine classifier (linear kernel, C=1)\n\n', confusion)

Support vector machine classifier (linear kernel, C=1)

 [[402   5]
 [  5  38]]


In [12]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression().fit(X_train, y_train)
lr_predicted = lr.predict(X_test)
confusion = confusion_matrix(y_test, lr_predicted)

print('Logistic regression classifier (default settings)\n\n', confusion)

Logistic regression classifier (default settings)

 [[401   6]
 [  8  35]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [13]:
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(max_depth=2).fit(X_train, y_train)
tree_predicted = dt.predict(X_test)
confusion = confusion_matrix(y_test, tree_predicted)

print('Decision tree classifier (max_depth = 2)\n\n', confusion)

Decision tree classifier (max_depth = 2)

 [[400   7]
 [ 17  26]]


## Evaluation metrics for binary classification

In [14]:
from sklearn.metrics import (accuracy_score,
                            precision_score, recall_score, f1_score)

# Accuracy = TP + TN / (TP + TN + FP + FN)
# Precision = TP / (TP + FP)
# Recall = TP / (TP + FN)  Also known as sensitivity, or True Positive Rate
# F1 = 2 * Precision * Recall / (Precision + Recall)

print('Accuracy: {:.2f}'.format(accuracy_score(y_test, tree_predicted)))
print('Precision: {:.2f}'.format(precision_score(y_test, tree_predicted)))
print('Recall: {:.2f}'.format(recall_score(y_test, tree_predicted)))
print('F1: {:.2f}'.format(f1_score(y_test, tree_predicted)))

2:29: E128 continuation line under-indented for visual indent


Accuracy: 0.95
Precision: 0.79
Recall: 0.60
F1: 0.68


In [15]:
# A handy combination of all these scores in one

from sklearn.metrics import classification_report

print('Example for classification report\n\n',
      classification_report(y_test, tree_predicted,
                            target_names=['not 1', '1']))

# support is giving the number of instances that have that (true) label

Example for classification report

               precision    recall  f1-score   support

       not 1       0.96      0.98      0.97       407
           1       0.79      0.60      0.68        43

    accuracy                           0.95       450
   macro avg       0.87      0.79      0.83       450
weighted avg       0.94      0.95      0.94       450



In [16]:
print('Random class-proportional (dummy)\n\n',
      classification_report(y_test, y_classprop_predicted,
                            target_names=['not 1', '1']))

print('SVM\n',
      classification_report(y_test, svm_predicted,
                            target_names=['not 1', '1']))


print('Logistic Regression\n',
      classification_report(y_test, lr_predicted, target_names=['not 1', '1']))


print('Decision Tree\n',
      classification_report(y_test, tree_predicted, target_names=['not 1', '1']))

Random class-proportional (dummy)

               precision    recall  f1-score   support

       not 1       0.90      0.88      0.89       407
           1       0.07      0.09      0.08        43

    accuracy                           0.80       450
   macro avg       0.49      0.49      0.49       450
weighted avg       0.82      0.80      0.81       450

SVM
               precision    recall  f1-score   support

       not 1       0.99      0.99      0.99       407
           1       0.88      0.88      0.88        43

    accuracy                           0.98       450
   macro avg       0.94      0.94      0.94       450
weighted avg       0.98      0.98      0.98       450

Logistic Regression
               precision    recall  f1-score   support

       not 1       0.98      0.99      0.98       407
           1       0.85      0.81      0.83        43

    accuracy                           0.97       450
   macro avg       0.92      0.90      0.91       450
weighted avg

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y_binary_imbalanced, random_state=0)

y_proba_lr = lr.fit(X_train, y_train).predict_proba(X_test)
y_proba_list = list(zip(y_test[0:20], y_proba_lr[0:20, 1]))

y_proba_list

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


[(0, 1.1105347039890248e-13),
 (0, 3.821031299662777e-09),
 (0, 2.0851240904302122e-13),
 (0, 3.594393014894706e-10),
 (0, 1.4674302094015812e-10),
 (0, 7.461184884542288e-06),
 (1, 0.9984927975270779),
 (0, 7.197432837593348e-11),
 (0, 1.0904024157627175e-12),
 (0, 2.101857187536468e-12),
 (0, 1.4517103586022847e-14),
 (0, 1.7157595958393444e-10),
 (0, 1.0104107939704357e-11),
 (0, 1.5389021892036701e-06),
 (0, 1.283774952801467e-06),
 (0, 1.6607161806689274e-06),
 (1, 0.9999948741777424),
 (0, 1.1929828128242475e-15),
 (0, 1.79300810474413e-06),
 (0, 1.4043003289732896e-13)]

2:80: E501 line too long (88 > 79 characters)


### Figuring out the "perfect" decision rule 
#### Plotting a Recall-Precision-Curve

Scitic Learn has a built in function for this. 

Two functions: 

Precision-recal curve (precision vs recall)
ROC curves (FP vs TP curve) 

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y_binary_imbalanced, random_state=0)
y_scores_lr = lr.fit(X_train, y_train).decision_function(X_test)
y_score_list = list(zip(y_test[0:20], y_scores_lr[0:20]))

# show the decision_function scores for first 20 instances
y_score_list

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


[(0, -29.828764594280383),
 (0, -19.382745473209535),
 (0, -29.19877783964875),
 (0, -21.746475794139076),
 (0, -22.64233821568212),
 (0, -11.805788863440224),
 (1, 6.495991673959203),
 (0, -23.35471161075396),
 (0, -27.544474299046676),
 (0, -26.88819978700888),
 (0, -31.86344888295895),
 (0, -22.485995034182142),
 (0, -25.318079048058713),
 (0, -13.384439720997293),
 (0, -13.565704354686222),
 (0, -13.308259953781302),
 (1, 12.18121447946495),
 (0, -34.36231965858414),
 (0, -13.231614050127543),
 (0, -29.59406701666908)]

2:80: E501 line too long (88 > 79 characters)


In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state=0)
y_proba_lr = lr.fit(X_train, y_train).predict_proba(X_test)
y_proba_list = list(zip(y_test[0:20], y_proba_lr[0:20,1]))

# show the probability of positive class for first 20 instances
y_proba_list

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


[(0, 1.1105347039890248e-13),
 (0, 3.821031299662777e-09),
 (0, 2.0851240904302122e-13),
 (0, 3.594393014894706e-10),
 (0, 1.4674302094015812e-10),
 (0, 7.461184884542288e-06),
 (1, 0.9984927975270779),
 (0, 7.197432837593348e-11),
 (0, 1.0904024157627175e-12),
 (0, 2.101857187536468e-12),
 (0, 1.4517103586022847e-14),
 (0, 1.7157595958393444e-10),
 (0, 1.0104107939704357e-11),
 (0, 1.5389021892036701e-06),
 (0, 1.283774952801467e-06),
 (0, 1.6607161806689274e-06),
 (1, 0.9999948741777424),
 (0, 1.1929828128242475e-15),
 (0, 1.79300810474413e-06),
 (0, 1.4043003289732896e-13)]

1:80: E501 line too long (91 > 79 characters)
3:54: E231 missing whitespace after ','


In [20]:
# Precision recall curves
from sklearn.metrics import precision_recall_curve

precision, recall, thresholds = precision_recall_curve(y_test, y_scores_lr)
closest_zero = np.argmin(np.abs(thresholds))
closest_zero_p = precision[closest_zero]
closest_zero_r = recall[closest_zero]

plt.figure()
plt.xlim([0.0, 1.01])
plt.ylim([0.0, 1.01])
plt.plot(precision, recall, label='Precision-Recall Curve')
plt.plot(closest_zero_p, closest_zero_r, 'o', markersize = 12, fillstyle = 'none', c='r', mew=3)
plt.xlabel('Precision', fontsize=16)
plt.ylabel('Recall', fontsize=16)
plt.axes().set_aspect('equal')
plt.show()

<IPython.core.display.Javascript object>

  app.launch_new_instance()
13:57: E251 unexpected spaces around keyword / parameter equals
13:59: E251 unexpected spaces around keyword / parameter equals
13:73: E251 unexpected spaces around keyword / parameter equals
13:75: E251 unexpected spaces around keyword / parameter equals
13:80: E501 line too long (96 > 79 characters)


### ROC curves, Area-Under-Curve (AUC)

In [21]:
from sklearn.metrics import roc_curve, auc

X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state=0)

y_score_lr = lr.fit(X_train, y_train).decision_function(X_test)
fpr_lr, tpr_lr, _ = roc_curve(y_test, y_score_lr)
roc_auc_lr = auc(fpr_lr, tpr_lr)

plt.figure()
plt.xlim([-0.01, 1.00])
plt.ylim([-0.01, 1.01])
plt.plot(fpr_lr, tpr_lr, lw=3, label='LogRegr ROC curve (area = {:0.2f})'.format(roc_auc_lr))
plt.xlabel('False Positive Rate', fontsize=16)
plt.ylabel('True Positive Rate', fontsize=16)
plt.title('ROC curve (1-of-10 digits classifier)', fontsize=16)
plt.legend(loc='lower right', fontsize=13)
plt.plot([0, 1], [0, 1], color='navy', lw=3, linestyle='--')
plt.axes().set_aspect('equal')
plt.show()

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


<IPython.core.display.Javascript object>

3:80: E501 line too long (91 > 79 characters)
12:80: E501 line too long (93 > 79 characters)


In [59]:
from matplotlib import cm

X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state=0)

plt.figure()
plt.xlim([-0.01, 1.00])
plt.ylim([-0.01, 1.01])
for g in [0.01, 0.1, 0.20, 1]:
    svm = SVC(gamma=g).fit(X_train, y_train)
    y_score_svm = svm.decision_function(X_test)
    fpr_svm, tpr_svm, _ = roc_curve(y_test, y_score_svm)
    roc_auc_svm = auc(fpr_svm, tpr_svm)
    accuracy_svm = svm.score(X_test, y_test)
    print("gamma = {:.2f}  accuracy = {:.2f}   AUC = {:.2f}".format(g, accuracy_svm, 
                                                                    roc_auc_svm))
    plt.plot(fpr_svm, tpr_svm, lw=3, alpha=0.7, 
             label='SVM (gamma = {:0.2f}, area = {:0.2f})'.format(g, roc_auc_svm))

plt.xlabel('False Positive Rate', fontsize=16)
plt.ylabel('True Positive Rate (Recall)', fontsize=16)
plt.plot([0, 1], [0, 1], color='k', lw=0.5, linestyle='--')
plt.legend(loc="lower right", fontsize=11)
plt.title('ROC curve: (1-of-10 digits classifier)', fontsize=16)
plt.axes().set_aspect('equal')

plt.show()

<IPython.core.display.Javascript object>

gamma = 0.01  accuracy = 0.91   AUC = 1.00
gamma = 0.10  accuracy = 0.90   AUC = 0.98
gamma = 0.20  accuracy = 0.90   AUC = 0.66
3:80: E501 line too long (91 > 79 characters)
14:80: E501 line too long (84 > 79 characters)
14:85: W291 trailing whitespace
15:80: E501 line too long (81 > 79 characters)
16:48: W291 trailing whitespace
17:80: E501 line too long (82 > 79 characters)
gamma = 1.00  accuracy = 0.90   AUC = 0.50


### Evaluation measures for multi-class classification

#### Multi-class confusion matrix

In [60]:
dataset = load_digits()
X, y = dataset.data, dataset.target
X_train_mc, X_test_mc, y_train_mc, y_test_mc = train_test_split(X, y, random_state=0)


svm = SVC(kernel = 'linear').fit(X_train_mc, y_train_mc)
svm_predicted_mc = svm.predict(X_test_mc)
confusion_mc = confusion_matrix(y_test_mc, svm_predicted_mc)
df_cm = pd.DataFrame(confusion_mc, 
                     index = [i for i in range(0,10)], columns = [i for i in range(0,10)])

plt.figure(figsize=(5.5,4))
sns.heatmap(df_cm, annot=True)
plt.title('SVM Linear Kernel \nAccuracy:{0:.3f}'.format(accuracy_score(y_test_mc, 
                                                                       svm_predicted_mc)))
plt.ylabel('True label')
plt.xlabel('Predicted label')


svm = SVC(kernel = 'rbf').fit(X_train_mc, y_train_mc)
svm_predicted_mc = svm.predict(X_test_mc)
confusion_mc = confusion_matrix(y_test_mc, svm_predicted_mc)
df_cm = pd.DataFrame(confusion_mc, index = [i for i in range(0,10)],
                  columns = [i for i in range(0,10)])

plt.figure(figsize = (5.5,4))
sns.heatmap(df_cm, annot=True)
plt.title('SVM RBF Kernel \nAccuracy:{0:.3f}'.format(accuracy_score(y_test_mc, 
                                                                    svm_predicted_mc)))
plt.ylabel('True label')
plt.xlabel('Predicted label');

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

3:80: E501 line too long (85 > 79 characters)
6:17: E251 unexpected spaces around keyword / parameter equals
6:19: E251 unexpected spaces around keyword / parameter equals
9:35: W291 trailing whitespace
10:27: E251 unexpected spaces around keyword / parameter equals
10:29: E251 unexpected spaces around keyword / parameter equals
10:49: E231 missing whitespace after ','
10:63: E251 unexpected spaces around keyword / parameter equals
10:65: E251 unexpected spaces around keyword / parameter equals
10:80: E501 line too long (90 > 79 characters)
10:85: E231 missing whitespace after ','
12:24: E231 missing whitespace after ','
14:80: E501 line too long (81 > 79 characters)
14:82: W291 trailing whitespace
15:80: E501 line too long (90 > 79 characters)
20:17: E251 unexpected spaces around keyword / parameter equals
20:19: E251 unexpected spaces around keyword / parameter equals
23:41: E251 unexpected spaces around keyword / parameter equals
23:43: E251 unexpected spaces around keyword / parame

#### Multi-class classification report

In [62]:
print("Result\n", classification_report(y_test_mc, svm_predicted_mc))

Result
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        37
           1       0.98      1.00      0.99        43
           2       1.00      1.00      1.00        44
           3       1.00      0.98      0.99        45
           4       1.00      1.00      1.00        38
           5       0.98      0.98      0.98        48
           6       1.00      1.00      1.00        52
           7       1.00      1.00      1.00        48
           8       0.98      0.98      0.98        48
           9       0.98      0.98      0.98        47

    accuracy                           0.99       450
   macro avg       0.99      0.99      0.99       450
weighted avg       0.99      0.99      0.99       450

