# Model evaluation

In [3]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer

dataset = load_breast_cancer()
x, y = dataset.data, dataset.target

In [4]:
# count classification
for class_name, class_count in zip(dataset.target_names, np.bincount(dataset.target)):
    print(class_name, class_count)

malignant 212
benign 357


# Evaluation for binary classification

In [8]:
# without specifiying test_size, training set is set as 0.75
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state = 0)
print(x_train.size, x_test.size, y_train.size, y_test.size)
print('Number of test set', y_test.size)
print('Number of malignant samples', sum(y_test==0))
print('Number of benign sanples', sum(y_test==1))

12780 4290 426 143
Number of test set 143
Number of malignant samples 53
Number of benign sanples 90


In [25]:
# Accuracy of support vector machine classifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
svm = SVC(kernel='linear', C=2).fit(x_train,y_train)
svm.score(x_test, y_test)

0.965034965034965

In [31]:
# Support Vetor Machine
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
svm_predicted = svm.predict(x_test)
confusion = confusion_matrix(y_test, svm_predicted)
print('SVM classifier\n', confusion)
tn, fp, fn, tp = confusion_matrix(y_test, svm_predicted).ravel()
print('tn=', tn, 'fp=', fp, 'fn=', fn, 'tp=', tp)

SVM classifier
 [[52  1]
 [ 4 86]]
tn= 52 fp= 1 fn= 4 tp= 86


In [40]:
# LogisticRegrassion
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(solver='liblinear').fit(x_train,y_train)
lr_predicted = lr.predict(x_test)
çonfusion = confusion_matrix(y_test, lr_predicted)
print('Logistic Regrassion classifier (default setting)\n', confusion)

Logistic Regrassion classifier (default setting)
 [[52  1]
 [ 4 86]]


In [49]:
# Decision Tree
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier(max_depth=2).fit(x_train,y_train)
tree_predicted = dt.predict(x_test)
confusion = confusion_matrix(y_test, tree_predicted)
print('Decision tree classifier (max_depth = 2)\n', confusion)

Decision tree classifier (max_depth = 2)
 [[48  5]
 [ 4 86]]


In [46]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
print('Accuracy: {:.2f}'.format(accuracy_score(y_test, lr_predicted)))
print('Precision: {:.2f}'.format(precision_score(y_test, lr_predicted)))
print('Recall: {:.2f}'.format(recall_score(y_test, lr_predicted)))
print('F1: {:.2f}'.format(f1_score(y_test, lr_predicted)))

Accuracy: 0.96
Precision: 0.99
Recall: 0.94
F1: 0.97


In [51]:
# combined report with all above metrics
from sklearn.metrics import classification_report

print('SVM\n',
      classification_report(y_test, svm_predicted, target_names = ['malignant', 'benign']))
print('LogisticRegrssion\n',
      classification_report(y_test, lr_predicted, target_names = ['malignant', 'benign']))
print('Decision tree\n',
      classification_report(y_test, tree_predicted, target_names = ['malignant', 'benign']))

SVM
               precision    recall  f1-score   support

   malignant       0.93      0.98      0.95        53
      benign       0.99      0.96      0.97        90

    accuracy                           0.97       143
   macro avg       0.96      0.97      0.96       143
weighted avg       0.97      0.97      0.97       143

LogisticRegrssion
               precision    recall  f1-score   support

   malignant       0.91      0.98      0.95        53
      benign       0.99      0.94      0.97        90

    accuracy                           0.96       143
   macro avg       0.95      0.96      0.96       143
weighted avg       0.96      0.96      0.96       143

Decision tree
               precision    recall  f1-score   support

   malignant       0.92      0.91      0.91        53
      benign       0.95      0.96      0.95        90

    accuracy                           0.94       143
   macro avg       0.93      0.93      0.93       143
weighted avg       0.94      0.94  

# Regression Metrics
# correlation coefficient

In [53]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.datasets import load_boston

boston = load_boston()

x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target)

ridge = Ridge(normalize=True).fit(x_train,y_train)
y_predict = ridge.predict(x_test)

print('Ridge model, coefficients', ridge.coef_)
print('Mean squared error (ridge model): {:.2f}'.format(mean_squared_error(y_test, y_predict)))
print('Mean absolute error (ridge model): {:.2f}'.format(mean_absolute_error(y_test, y_predict)))
print('R2_score (ridge model): {:.2f}'.format(r2_score(y_test, y_predict)))

Ridge model, coefficients [-6.46192162e-02  1.50846781e-02 -7.05119494e-02  2.74513967e+00
 -3.46545597e+00  3.06531611e+00 -1.03108368e-02 -2.87236857e-01
  3.59549231e-03 -2.18161321e-03 -4.98177306e-01  6.83772236e-03
 -2.84364775e-01]
Mean squared error (ridge model): 22.69
Mean absolute error (ridge model): 3.48
R2_score (ridge model): 0.71
