# Model Evaluation & Improvement


## Splitter (Cross-Validation)


### Importing Libraries


In [217]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set()

##### Importing Dataset


In [218]:
from sklearn.datasets import load_breast_cancer

In [219]:
cancer = load_breast_cancer()

##### Selecting Model


In [220]:
from sklearn.svm import SVC

In [221]:
clf = SVC()

### Cross-Validation with `train_test_split`


In [222]:
X = cancer.data
y = cancer.target

In [223]:
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [224]:
from sklearn.model_selection import train_test_split

In [225]:
X_train, X_temp, y_train, y_temp = train_test_split( X, y, test_size=0.2, random_state=42)

In [226]:
X_valid, X_test, y_valid, y_test = train_test_split( X_temp, y_temp, test_size=0.5, random_state=42)

In [227]:
X.shape

(569, 30)

In [228]:
X_train.shape

(455, 30)

In [229]:
X_temp.shape

(114, 30)

In [230]:
X_valid.shape

(57, 30)

In [231]:
X_test.shape

(57, 30)

In [232]:
# training 
clf.fit(X_train, y_train)

##### Evaluating Model


In [233]:
from sklearn.metrics import accuracy_score

In [234]:
# validation
y_pred_valid = clf.predict(X_valid)
y_pred_valid

array([1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0,
       1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0])

In [235]:
y_valid

array([1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0,
       1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0])

In [236]:
print(accuracy_score(y_valid, y_pred_valid))

0.9122807017543859


In [237]:
# testing
y_pred_test = clf.predict(X_test)

In [238]:
y_test

array([0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1])

In [239]:
print(accuracy_score(y_test, y_pred_test))

0.9824561403508771


### Cross-Validation with `cross_val_score`


In [240]:
from sklearn.model_selection import cross_val_score, train_test_split

In [241]:
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.2, random_state=42)

In [242]:
scores = cross_val_score(clf, X_train, y_train)
scores

array([0.92307692, 0.87912088, 0.96703297, 0.86813187, 0.87912088])

In [243]:
scores.mean()

0.9032967032967033

In [244]:
scores = cross_val_score(clf, cancer.data, cancer.target, cv=3)
scores

array([0.85263158, 0.93157895, 0.94708995])

In [245]:
scores.mean()

0.9104334911352455

### Cross-Validation with `cross_validate`


In [246]:
from sklearn.model_selection import cross_validate

In [247]:
# scores = cross_validate(clf, cancer.data, cancer.target,scoring='accuracy',cv=10)
scores = cross_validate(clf, cancer.data, cancer.target,scoring=['accuracy','average_precision'],cv=10)
scores

{'fit_time': array([0.00298405, 0.00397539, 0.00398493, 0.00302935, 0.00206661,
        0.00498343, 0.00298905, 0.00298953, 0.00298738, 0.00298905]),
 'score_time': array([0.00399613, 0.00199509, 0.00099683, 0.0010469 , 0.00291657,
        0.00199437, 0.00199342, 0.00199604, 0.00199389, 0.00099921]),
 'test_accuracy': array([0.89473684, 0.84210526, 0.89473684, 0.92982456, 0.92982456,
        0.92982456, 0.94736842, 0.92982456, 0.92982456, 0.91071429]),
 'test_average_precision': array([0.97979197, 0.97076669, 0.99210049, 0.98058628, 0.99348603,
        0.98882819, 0.98557846, 0.9727489 , 0.99089446, 0.9942693 ])}

In [248]:
pd.DataFrame(scores)

Unnamed: 0,fit_time,score_time,test_accuracy,test_average_precision
0,0.002984,0.003996,0.894737,0.979792
1,0.003975,0.001995,0.842105,0.970767
2,0.003985,0.000997,0.894737,0.9921
3,0.003029,0.001047,0.929825,0.980586
4,0.002067,0.002917,0.929825,0.993486
5,0.004983,0.001994,0.929825,0.988828
6,0.002989,0.001993,0.947368,0.985578
7,0.00299,0.001996,0.929825,0.972749
8,0.002987,0.001994,0.929825,0.990894
9,0.002989,0.000999,0.910714,0.994269


In [249]:
pd.DataFrame(scores)['test_accuracy'].mean()

0.9138784461152882

In [250]:
pd.DataFrame(scores)['test_average_precision'].mean()

0.9849050763460452