## Evaluacion de Modelos

### Preamble

In [3]:
%matplotlib notebook
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits

dataset = load_digits()
X, y = dataset.data, dataset.target

for class_name, class_count in zip(dataset.target_names, np.bincount(dataset.target)):
    print(class_name,class_count)

0 178
1 182
2 177
3 183
4 181
5 182
6 181
7 179
8 174
9 180


In [4]:
# Creating a dataset with imbalanced binary classes:  
# Negative class (0) is 'not digit 1' 
# Positive class (1) is 'digit 1'
y_binary_imbalanced = y.copy()
y_binary_imbalanced[y_binary_imbalanced != 1] = 0

print('Original labels:\t', y[1:30])
print('New binary labels:\t', y_binary_imbalanced[1:30])

Original labels:	 [1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9]
New binary labels:	 [1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0]


In [5]:
np.bincount(y_binary_imbalanced)    # Negative class (0) is the most frequent class

array([1615,  182], dtype=int64)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state=0)

# Accuracy of Support Vector Machine classifier
from sklearn.svm import SVC

svm = SVC(kernel='rbf', C=1, gamma=1).fit(X_train, y_train)
svm.score(X_test, y_test)

0.9044444444444445

### Dummy Classifiers

DummyClassifier is a classifier that makes predictions using simple rules, which can be useful as a baseline for comparison against actual classifiers, especially with imbalanced classes.

In [7]:
from sklearn.dummy import DummyClassifier

# Negative class (0) is most frequent
dummy_majority = DummyClassifier(strategy = 'most_frequent').fit(X_train, y_train)
# Therefore the dummy 'most_frequent' classifier always predicts class 0
y_dummy_predictions = dummy_majority.predict(X_test)



In [8]:
dummy_majority.score(X_test, y_test)

0.9044444444444445

In [9]:
#svm = SVC(kernel='rbf', C=1, gamma=1).fit(X_train, y_train)
svm = SVC(kernel='linear', C=1, gamma=1).fit(X_train, y_train)
svm.score(X_test, y_test)

0.9777777777777777

### Confusion matrices

#### Binary (two-class) confusion matrix

In [10]:
from sklearn.metrics import confusion_matrix

# Negative class (0) is most frequent
dummy_majority = DummyClassifier(strategy = 'most_frequent').fit(X_train, y_train)
y_majority_predicted = dummy_majority.predict(X_test)
confusion = confusion_matrix(y_test, y_majority_predicted)

print('Most frequent class (dummy classifier)\n', confusion)

Most frequent class (dummy classifier)
 [[407   0]
 [ 43   0]]


In [11]:
# produces random predictions w/ same class proportion as training set
dummy_classprop = DummyClassifier(strategy='stratified').fit(X_train, y_train)
y_classprop_predicted = dummy_classprop.predict(X_test)
confusion = confusion_matrix(y_test, y_classprop_predicted)

print('Random class-proportional prediction (dummy classifier)\n', confusion)

Random class-proportional prediction (dummy classifier)
 [[354  53]
 [ 35   8]]


In [12]:
svm = SVC(kernel='linear', C=1).fit(X_train, y_train)
svm_predicted = svm.predict(X_test)
confusion = confusion_matrix(y_test, svm_predicted)

print('Support vector machine classifier (linear kernel, C=1)\n', confusion)

Support vector machine classifier (linear kernel, C=1)
 [[402   5]
 [  5  38]]


In [13]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(max_iter=1000).fit(X_train, y_train)
lr_predicted = lr.predict(X_test)
confusion = confusion_matrix(y_test, lr_predicted)

print('Logistic regression classifier (default settings)\n', confusion)

Logistic regression classifier (default settings)
 [[403   4]
 [  7  36]]


In [14]:
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(max_depth=2).fit(X_train, y_train)
tree_predicted = dt.predict(X_test)
confusion = confusion_matrix(y_test, tree_predicted)

print('Decision tree classifier (max_depth = 2)\n', confusion)

Decision tree classifier (max_depth = 2)
 [[400   7]
 [ 17  26]]


### Evalucion de Metricas para Clasificacion Binaria

In [15]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# Accuracy = TP + TN / (TP + TN + FP + FN)
# Precision = TP / (TP + FP)
# Recall = TP / (TP + FN)  Also known as sensitivity, or True Positive Rate
# F1 = 2 * Precision * Recall / (Precision + Recall) 
print('Accuracy: {:.2f}'.format(accuracy_score(y_test, tree_predicted)))
print('Precision: {:.2f}'.format(precision_score(y_test, tree_predicted)))
print('Recall: {:.2f}'.format(recall_score(y_test, tree_predicted)))
print('F1: {:.2f}'.format(f1_score(y_test, tree_predicted)))

Accuracy: 0.95
Precision: 0.79
Recall: 0.60
F1: 0.68


In [16]:
# Combined report with all above metrics
from sklearn.metrics import classification_report

print(classification_report(y_test, tree_predicted, target_names=['not 1', '1']))

              precision    recall  f1-score   support

       not 1       0.96      0.98      0.97       407
           1       0.79      0.60      0.68        43

    accuracy                           0.95       450
   macro avg       0.87      0.79      0.83       450
weighted avg       0.94      0.95      0.94       450



In [17]:
print('Random class-proportional (dummy)\n', 
      classification_report(y_test, y_classprop_predicted, target_names=['not 1', '1']))
print('SVM\n', 
      classification_report(y_test, svm_predicted, target_names = ['not 1', '1']))
print('Logistic regression\n', 
      classification_report(y_test, lr_predicted, target_names = ['not 1', '1']))
print('Decision tree\n', 
      classification_report(y_test, tree_predicted, target_names = ['not 1', '1']))

Random class-proportional (dummy)
               precision    recall  f1-score   support

       not 1       0.91      0.87      0.89       407
           1       0.13      0.19      0.15        43

    accuracy                           0.80       450
   macro avg       0.52      0.53      0.52       450
weighted avg       0.84      0.80      0.82       450

SVM
               precision    recall  f1-score   support

       not 1       0.99      0.99      0.99       407
           1       0.88      0.88      0.88        43

    accuracy                           0.98       450
   macro avg       0.94      0.94      0.94       450
weighted avg       0.98      0.98      0.98       450

Logistic regression
               precision    recall  f1-score   support

       not 1       0.98      0.99      0.99       407
           1       0.90      0.84      0.87        43

    accuracy                           0.98       450
   macro avg       0.94      0.91      0.93       450
weighted avg 

### Probabilidad de la Clasificacion / UNcertainty

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state=0)#y_scores_lr = lr.fit(X_train, y_train).decision_function(X_test)

y_scores_lr = lr.decision_function(X_test)

y_scores_svm = svm.decision_function(X_test)

y_score_list = list(zip(y_test[0:20], y_scores_lr[0:20]))

# show the decision_function scores for first 20 instances
y_score_list

[(0, -34.623990877075684),
 (0, -18.00808307484269),
 (0, -20.992543317973343),
 (0, -36.813388500540924),
 (0, -39.333171719298946),
 (0, -18.86102495864273),
 (1, 6.004206187289107),
 (0, -32.72088256722887),
 (0, -35.48461746167787),
 (0, -37.3524950803254),
 (0, -25.039018562547056),
 (0, -21.401433504229473),
 (0, -36.5033850944784),
 (0, -15.92155914060798),
 (0, -12.839479768638835),
 (0, -13.725129270476636),
 (1, 14.76520754474975),
 (0, -39.40182576143243),
 (0, -26.10268452796355),
 (0, -28.43607500740053)]

In [19]:
y_scores_lr

array([-34.62399088, -18.00808307, -20.99254332, -36.8133885 ,
       -39.33317172, -18.86102496,   6.00420619, -32.72088257,
       -35.48461746, -37.35249508, -25.03901856, -21.4014335 ,
       -36.50338509, -15.92155914, -12.83947977, -13.72512927,
        14.76520754, -39.40182576, -26.10268453, -28.43607501,
       -30.27571209, -24.77020923, -27.35397178, -19.00166346,
       -43.53568846, -28.84102471, -27.99923993, -42.33509357,
       -34.73262351, -34.01686634, -28.28476277, -43.16619521,
       -25.52784251, -16.27230862, -19.41817362, -28.23397091,
       -30.77468554,  -3.08918772, -33.14115111,  19.07593509,
        -6.07642583, -27.38785105, -33.49039831, -44.44991063,
        -2.14012131, -49.68152331, -38.64062169, -28.30807648,
       -29.76607278,  12.43838191, -33.14510211, -12.78463489,
       -34.57179879, -31.12155193, -53.51261613, -45.55657898,
       -18.68855952,   2.20726414, -42.29358743, -41.39456673,
       -28.41566229,  -6.66347688, -24.89128404, -22.78

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state=0)
#y_proba_lr = lr.fit(X_train, y_train).predict_proba(X_test)

y_proba_lr = lr.predict_proba(X_test)


y_proba_list = list(zip(y_test[0:20], y_proba_lr[0:20,1]))

# show the probability of positive class for first 20 instances
y_proba_list

[(0, 9.183153010235858e-16),
 (0, 1.5107370646252717e-08),
 (0, 7.639312492244669e-10),
 (0, 1.0283679051181369e-16),
 (0, 8.276001611424916e-18),
 (0, 6.43814768290449e-09),
 (1, 0.9975377297626462),
 (0, 6.15887313192343e-15),
 (0, 3.883528028677131e-16),
 (0, 5.998152472576815e-17),
 (0, 1.3356491927724637e-11),
 (0, 5.075461339866245e-10),
 (0, 1.40210740406291e-16),
 (0, 1.2171796115737647e-07),
 (0, 2.653894371621569e-06),
 (0, 1.0945903600852344e-06),
 (1, 0.9999996131419209),
 (0, 7.726885822897216e-18),
 (0, 4.610501276939604e-12),
 (0, 4.4706383298191456e-13)]

In [22]:
print(lr.classes_)
y_proba_lr

[0 1]


array([[1.00000000e+00, 9.18315301e-16],
       [9.99999985e-01, 1.51073706e-08],
       [9.99999999e-01, 7.63931249e-10],
       [1.00000000e+00, 1.02836791e-16],
       [1.00000000e+00, 8.27600161e-18],
       [9.99999994e-01, 6.43814768e-09],
       [2.46227024e-03, 9.97537730e-01],
       [1.00000000e+00, 6.15887313e-15],
       [1.00000000e+00, 3.88352803e-16],
       [1.00000000e+00, 5.99815247e-17],
       [1.00000000e+00, 1.33564919e-11],
       [9.99999999e-01, 5.07546134e-10],
       [1.00000000e+00, 1.40210740e-16],
       [9.99999878e-01, 1.21717961e-07],
       [9.99997346e-01, 2.65389437e-06],
       [9.99998905e-01, 1.09459036e-06],
       [3.86858079e-07, 9.99999613e-01],
       [1.00000000e+00, 7.72688582e-18],
       [1.00000000e+00, 4.61050128e-12],
       [1.00000000e+00, 4.47063833e-13],
       [1.00000000e+00, 7.10272996e-14],
       [1.00000000e+00, 1.74757095e-11],
       [1.00000000e+00, 1.31923146e-12],
       [9.99999994e-01, 5.59348414e-09],
       [1.000000

### Precision-recall curves (Clasificadores Binarios)

In [23]:
from sklearn.metrics import precision_recall_curve

precision, recall, thresholds = precision_recall_curve(y_test, y_scores_lr)

# o con y_scores_lr
# o con y_scores_lr   y_proba_lr[:,1]


#precision, recall, thresholds = precision_recall_curve(y_test, y_scores_svm)

#dinf threshold closes to zero
closest_zero = np.argmin(np.abs(thresholds))
closest_zero_p = precision[closest_zero]
closest_zero_r = recall[closest_zero]

plt.figure()
plt.xlim([0.0, 1.01])
plt.ylim([0.0, 1.01])
plt.plot(precision, recall, label='Precision-Recall Curve')
plt.plot(closest_zero_p, closest_zero_r, 'o', markersize = 12, fillstyle = 'none', c='r', mew=3)
plt.xlabel('Precision', fontsize=16)
plt.ylabel('Recall', fontsize=16)
#plt.axes().set_aspect('equal')
plt.show()

<IPython.core.display.Javascript object>

In [24]:
thresholds

array([-7.50917635, -6.76031864, -6.73503154, -6.66347688, -6.28978072,
       -6.07642583, -6.07423401, -6.05775632, -5.38360783, -5.00820258,
       -4.35105819, -4.14040449, -3.84468289, -3.68201169, -3.08918772,
       -2.80926018, -2.14012131, -1.76791708, -1.44490303, -1.05709146,
       -0.99490626, -0.7946741 , -0.39388215, -0.14042904,  0.82627715,
        1.60036479,  1.97917477,  2.20726414,  2.79290384,  3.24424974,
        3.56886338,  5.40081252,  5.60752884,  5.74998705,  5.90162202,
        6.00420619,  6.0181442 ,  6.5502193 ,  6.56802754,  6.57145111,
        7.00181476,  7.31445023,  7.62479059,  7.78595428,  7.79311679,
        8.109191  ,  8.11016451,  8.22010639,  8.22094275,  8.31298888,
        9.01963796,  9.83068766, 10.68118196, 11.0374443 , 11.0457981 ,
       12.43838191, 13.59018714, 14.121181  , 14.1826806 , 14.46517228,
       14.76520754, 19.07593509, 19.35435808, 24.57706133])

### ROC curves, Area-Under-Curve (AUC)

In [38]:
from sklearn.metrics import roc_curve, auc
from sklearn.linear_model import LogisticRegression



#X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state=0)

lr = LogisticRegression(C=1, max_iter=1000000).fit(X_train, y_train)

y_score_lr = lr.decision_function(X_test)
fpr_lr, tpr_lr, treshold = roc_curve(y_test, y_score_lr)
roc_auc_lr = auc(fpr_lr, tpr_lr)

print(roc_auc_lr)

#CAMBIAR C SUBIRLO Y BAJARLO PARA VER CON CUAL QUEDARSE ... IGUAL QUE CON ACCURACY
#CAMBIAR C SUBIRLO Y BAJARLO PARA VER CON CUAL QUEDARSE ... IGUAL QUE CON ACCURACY


plt.figure()
#plt.xlim([-0.01, 1.00])
#plt.ylim([-0.01, 1.01])
plt.plot(fpr_lr, tpr_lr, lw=3, label='LogRegr ROC curve (area = {:0.2f})'.format(roc_auc_lr))
plt.xlabel('False Positive Rate', fontsize=16)
plt.ylabel('True Positive Rate', fontsize=16)
plt.title('ROC curve (1-of-10 digits classifier)', fontsize=16)
plt.legend(loc='lower right', fontsize=13)
plt.plot([0, 1], [0, 1], color='navy', lw=3, linestyle='--')
#plt.axes().set_aspect('equal')
plt.show()

0.9927432718130392


<IPython.core.display.Javascript object>

In [39]:
from matplotlib import cm

X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state=0)

plt.figure()
plt.xlim([-0.01, 1.00])
plt.ylim([-0.01, 1.01])
for g in [0.01, 0.1, 0.20, 1]:
    svm = SVC(gamma=g).fit(X_train, y_train)
    y_score_svm = svm.decision_function(X_test)
    fpr_svm, tpr_svm, _ = roc_curve(y_test, y_score_svm)
    roc_auc_svm = auc(fpr_svm, tpr_svm)
    accuracy_svm = svm.score(X_test, y_test)
    print("gamma = {:.2f}  accuracy = {:.2f}   AUC = {:.2f}".format(g, accuracy_svm, 
                                                                    roc_auc_svm))
    plt.plot(fpr_svm, tpr_svm, lw=3, alpha=0.7, 
             label='SVM (gamma = {:0.2f}, area = {:0.2f})'.format(g, roc_auc_svm))

plt.xlabel('False Positive Rate', fontsize=16)
plt.ylabel('True Positive Rate (Recall)', fontsize=16)
plt.plot([0, 1], [0, 1], color='k', lw=0.5, linestyle='--')
plt.legend(loc="lower right", fontsize=11)
plt.title('ROC curve: (1-of-10 digits classifier)', fontsize=16)
#plt.axes().set_aspect('equal')

plt.show()

<IPython.core.display.Javascript object>

gamma = 0.01  accuracy = 0.91   AUC = 1.00
gamma = 0.10  accuracy = 0.90   AUC = 0.98
gamma = 0.20  accuracy = 0.90   AUC = 0.66
gamma = 1.00  accuracy = 0.90   AUC = 0.50


### Evaluation measures for multi-class classification

#### Multi-class confusion matrix

In [25]:
dataset = load_digits()
X, y = dataset.data, dataset.target
X_train_mc, X_test_mc, y_train_mc, y_test_mc = train_test_split(X, y, random_state=0)


svm = SVC(kernel = 'linear').fit(X_train_mc, y_train_mc)
svm_predicted_mc = svm.predict(X_test_mc)
confusion_mc = confusion_matrix(y_test_mc, svm_predicted_mc)

print(confusion_mc)


df_cm = pd.DataFrame(confusion_mc)
                     #index = [i for i in range(0,10)], columns = [i for i in range(0,10)])

plt.figure(figsize=(5.5,4))
sns.heatmap(df_cm, annot=True)
plt.title('SVM Linear Kernel \nAccuracy:{0:.3f}'.format(accuracy_score(y_test_mc, 
                                                                       svm_predicted_mc)))
plt.ylabel('True label')
plt.xlabel('Predicted label')



[[37  0  0  0  0  0  0  0  0  0]
 [ 0 42  0  0  0  0  0  0  1  0]
 [ 0  0 44  0  0  0  0  0  0  0]
 [ 0  0  0 43  0  0  0  0  1  1]
 [ 0  0  0  0 38  0  0  0  0  0]
 [ 0  0  0  0  0 47  0  0  0  1]
 [ 0  1  0  0  0  0 51  0  0  0]
 [ 0  0  0  0  1  0  0 47  0  0]
 [ 0  3  1  0  0  0  0  0 44  0]
 [ 0  0  0  1  0  1  0  0  1 44]]


<IPython.core.display.Javascript object>

Text(0.5, 14.722222222222216, 'Predicted label')

In [28]:
dataset = load_digits()
X, y = dataset.data, dataset.target
X_train_mc, X_test_mc, y_train_mc, y_test_mc = train_test_split(X, y, random_state=0)


svm = SVC(kernel = 'rbf',C=0.01).fit(X_train_mc, y_train_mc)
svm_predicted_mc1 = svm.predict(X_test_mc)
confusion_mc1 = confusion_matrix(y_test_mc, svm_predicted_mc1)

print(confusion_mc1)


df_cm = pd.DataFrame(confusion_mc1)
                     #index = [i for i in range(0,10)], columns = [i for i in range(0,10)])

plt.figure(figsize=(5.5,4))
sns.heatmap(df_cm, annot=True)
plt.title('SVM Linear Kernel \nAccuracy:{0:.3f}'.format(accuracy_score(y_test_mc, 
                                                                       svm_predicted_mc1)))
plt.ylabel('True label')
plt.xlabel('Predicted label')

[[11  0  0  0 26  0  0  0  0  0]
 [ 0  0  0  0 43  0  0  0  0  0]
 [ 2  0  0  0 42  0  0  0  0  0]
 [19  0  0 11 15  0  0  0  0  0]
 [ 0  0  0  0 38  0  0  0  0  0]
 [ 0  0  0  0 48  0  0  0  0  0]
 [ 0  0  0  0 52  0  0  0  0  0]
 [ 0  0  0  0 48  0  0  0  0  0]
 [ 0  0  0  0 48  0  0  0  0  0]
 [ 1  0  0  0 46  0  0  0  0  0]]


<IPython.core.display.Javascript object>

Text(0.5, 14.722222222222216, 'Predicted label')

In [62]:
confusion_mc

array([[37,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0, 42,  0,  0,  0,  0,  0,  0,  1,  0],
       [ 0,  0, 44,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0, 43,  0,  0,  0,  0,  1,  1],
       [ 0,  0,  0,  0, 38,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0, 47,  0,  0,  0,  1],
       [ 0,  1,  0,  0,  0,  0, 51,  0,  0,  0],
       [ 0,  0,  0,  0,  1,  0,  0, 47,  0,  0],
       [ 0,  3,  1,  0,  0,  0,  0,  0, 44,  0],
       [ 0,  0,  0,  1,  0,  1,  0,  0,  1, 44]], dtype=int64)

#### Multi-class classification report

In [63]:
print(classification_report(y_test_mc, svm_predicted_mc))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        37
           1       0.91      0.98      0.94        43
           2       0.98      1.00      0.99        44
           3       0.98      0.96      0.97        45
           4       0.97      1.00      0.99        38
           5       0.98      0.98      0.98        48
           6       1.00      0.98      0.99        52
           7       1.00      0.98      0.99        48
           8       0.94      0.92      0.93        48
           9       0.96      0.94      0.95        47

    accuracy                           0.97       450
   macro avg       0.97      0.97      0.97       450
weighted avg       0.97      0.97      0.97       450



#### Micro- vs. macro-averaged metrics

In [64]:
print('Micro-averaged precision = {:.2f} (treat instances equally)'
      .format(precision_score(y_test_mc, svm_predicted_mc, average = 'micro')))
print('Macro-averaged precision = {:.2f} (treat classes equally)'
      .format(precision_score(y_test_mc, svm_predicted_mc, average = 'macro')))

Micro-averaged precision = 0.97 (treat instances equally)
Macro-averaged precision = 0.97 (treat classes equally)


In [65]:
print('Micro-averaged f1 = {:.2f} (treat instances equally)'
      .format(f1_score(y_test_mc, svm_predicted_mc, average = 'micro')))
print('Macro-averaged f1 = {:.2f} (treat classes equally)'
      .format(f1_score(y_test_mc, svm_predicted_mc, average = 'macro')))

Micro-averaged f1 = 0.97 (treat instances equally)
Macro-averaged f1 = 0.97 (treat classes equally)


### Regression evaluation metrics

In [66]:
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.dummy import DummyRegressor

diabetes = datasets.load_diabetes()

X = diabetes.data[:, None, 6]
y = diabetes.target

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

lm = LinearRegression().fit(X_train, y_train)
lm_dummy_mean = DummyRegressor(strategy = 'mean').fit(X_train, y_train)

y_predict = lm.predict(X_test)
y_predict_dummy_mean = lm_dummy_mean.predict(X_test)

print('Linear model, coefficients: ', lm.coef_)
print("Mean squared error (dummy): {:.2f}".format(mean_squared_error(y_test, 
                                                                     y_predict_dummy_mean)))
print("Mean squared error (linear model): {:.2f}".format(mean_squared_error(y_test, y_predict)))
print("r2_score (dummy): {:.2f}".format(r2_score(y_test, y_predict_dummy_mean)))
print("r2_score (linear model): {:.2f}".format(r2_score(y_test, y_predict)))

# Plot outputs
plt.scatter(X_test, y_test,  color='black')
plt.plot(X_test, y_predict, color='green', linewidth=2)
plt.plot(X_test, y_predict_dummy_mean, color='red', linestyle = 'dashed', 
         linewidth=2, label = 'dummy')

plt.show()

Linear model, coefficients:  [-698.80206267]
Mean squared error (dummy): 4965.13
Mean squared error (linear model): 4646.74
r2_score (dummy): -0.00
r2_score (linear model): 0.06


<IPython.core.display.Javascript object>

### Model selection using evaluation metrics

#### Cross-validation example

In [29]:
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC

dataset = load_digits()
# again, making this a binary problem with 'digit 1' as positive class 
# and 'not 1' as negative class
X, y = dataset.data, dataset.target == 1
clf = SVC(kernel='linear', C=1)

scores=cross_val_score(clf, X, y, cv=5, scoring = 'roc_auc')

print(np.mean(scores))
# accuracy is the default scoring metric
print('Cross-validation (accuracy)', cross_val_score(clf, X, y, cv=5))

# use AUC as scoring metric
print('Cross-validation (AUC)', cross_val_score(clf, X, y, cv=5, scoring = 'roc_auc'))
# use recall as scoring metric
print('Cross-validation (recall)', cross_val_score(clf, X, y, cv=5, scoring = 'recall'))

0.9878624754785745
Cross-validation (accuracy) [0.91944444 0.98611111 0.97214485 0.97493036 0.96935933]
Cross-validation (AUC) [0.9641871  0.9976571  0.99372205 0.99699002 0.98675611]
Cross-validation (recall) [0.81081081 0.89189189 0.83333333 0.83333333 0.83333333]


#### Grid search example

In [74]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_auc_score

dataset = load_digits()
X, y = dataset.data, dataset.target == 1
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

clf = SVC(kernel='rbf')
grid_values = {'gamma': [0.00001, 0.0001, 0.001, 0.01, 0.05, 0.1, 1, 10]}

# default metric to optimize over grid parameters: accuracy
grid_clf_acc = GridSearchCV(clf, param_grid = grid_values, cv=5)
grid_clf_acc.fit(X_train, y_train)
y_decision_fn_scores_acc = grid_clf_acc.decision_function(X_test) 

print('Grid best parameter (max. accuracy): ', grid_clf_acc.best_params_)
print('Grid best score (accuracy): ', grid_clf_acc.best_score_)



Grid best parameter (max. accuracy):  {'gamma': 0.001}
Grid best score (accuracy):  0.9985157648354676


In [80]:

# alternative metric to optimize over grid parameters: AUC
grid_clf_auc = GridSearchCV(clf, param_grid = grid_values, scoring = 'recall',cv=10)
grid_clf_auc.fit(X_train, y_train)
y_decision_fn_scores_auc = grid_clf_auc.decision_function(X_test) 

print('Test set AUC: ', roc_auc_score(y_test, y_decision_fn_scores_auc))
print('Grid best parameter (max. AUC): ', grid_clf_auc.best_params_)
print('Grid best score (AUC): ', grid_clf_auc.best_score_)

Test set AUC:  0.99982858122393
Grid best parameter (max. AUC):  {'gamma': 0.001}
Grid best score (AUC):  0.9928571428571429


### Two-feature classification example using the digits dataset

#### Optimizing a classifier using different evaluation metrics

In [53]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from adspy_shared_utilities import plot_class_regions_for_classifier_subplot
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV


dataset = load_digits()
X, y = dataset.data, dataset.target == 1
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# Create a two-feature input vector matching the example plot above
# We jitter the points (add a small amount of random noise) in case there are areas
# in feature space where many instances have the same features.
jitter_delta = 0.25
X_twovar_train = X_train[:,[20,59]]+ np.random.rand(X_train.shape[0], 2) - jitter_delta
X_twovar_test  = X_test[:,[20,59]] + np.random.rand(X_test.shape[0], 2) - jitter_delta

clf = SVC(kernel = 'linear').fit(X_twovar_train, y_train)
grid_values = {'class_weight':['balanced', {1:2},{1:3},{1:4},{1:5},{1:10},{1:20},{1:50}]}
plt.figure(figsize=(9,6))
for i, eval_metric in enumerate(('precision','recall', 'f1','roc_auc')):
    grid_clf_custom = GridSearchCV(clf, param_grid=grid_values, scoring=eval_metric)
    grid_clf_custom.fit(X_twovar_train, y_train)
    print('Grid best parameter (max. {0}): {1}'
          .format(eval_metric, grid_clf_custom.best_params_))
    print('Grid best score ({0}): {1}'
          .format(eval_metric, grid_clf_custom.best_score_))
    plt.subplots_adjust(wspace=0.3, hspace=0.3)
    plot_class_regions_for_classifier_subplot(grid_clf_custom, X_twovar_test, y_test, None,
                                             None, None,  plt.subplot(2, 2, i+1))
    
    plt.title(eval_metric+'-oriented SVC')
plt.tight_layout()
plt.show()

<IPython.core.display.Javascript object>

Grid best parameter (max. precision): {'class_weight': {1: 2}}
Grid best score (precision): 0.5291666666666667
Grid best parameter (max. recall): {'class_weight': {1: 50}}
Grid best score (recall): 0.9285714285714285
Grid best parameter (max. f1): {'class_weight': {1: 3}}
Grid best score (f1): 0.5537859855866596
Grid best parameter (max. roc_auc): {'class_weight': {1: 4}}
Grid best score (roc_auc): 0.8943662478460668
