# Data Evaluation

In [1]:
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, roc_curve, auc, roc_auc_score

In [2]:
'''
This methods output the evalutation of your model, based on y_real and y_pred

Parameters:
y_true  (1d array): Ground truth (correct) target values
y_pred  (1d array): Estimated targets as returned by a classifier
y_score (1d array): Target scores, can either be probability estimates of the positive class, confidence values, 
                    or non-thresholded measure of decisions

Returns:
'''
def show_evaluation(y_true, y_pred, y_score=None):
    # compute and print Accuracy 
    calc_accuracy_score = accuracy_score(y_true, y_pred)
    print('Accuracy Score:')
    print(calc_accuracy_score)
    print()
    
    # compute and print Precision Score
    calc_precision_score = precision_score(y_true, y_pred)
    print('Precision Score:')
    print(calc_precision_score)
    print()
    
    # compute and print Recall Score
    calc_recall_score = recall_score(y_true, y_pred)
    print('Recall Score:')
    print(calc_recall_score)
    print()
    
    # compute and print F1-Score
    calc_f1_score = f1_score(y_true, y_pred)
    print('F1-Score:')
    print(calc_f1_score)
    print()   
             
    # compute and print the confusion matrix
    calc_confusion_matrix = confusion_matrix(y_true, y_pred)
    calc_confusion_matrix_df = pd.DataFrame(calc_confusion_matrix, index=['Healthy', 'Sick'], columns=['Healthy', 'Sick'])
        
    classes = ['Healthy', 'Sick']
    plt.figure()
    plt.imshow(calc_confusion_matrix, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title('Confusion Matrix')
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = calc_confusion_matrix.max() / 2.
    for i, j in itertools.product(range(calc_confusion_matrix.shape[0]), range(calc_confusion_matrix.shape[1])):
        plt.text(j, i, format(calc_confusion_matrix[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if calc_confusion_matrix[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()
    
    # cost matrix
    
    cost_tp = 0
    cost_tn = 0
    cost_fn = 1
    cost_fp = 2
    cost_matrix = np.array([[cost_tn, cost_fn], [cost_fp, cost_tp]])
    
    plt.figure()
    plt.imshow(cost_matrix, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title('Cost Matrix')
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cost_matrix.max() / 2.
    for i, j in itertools.product(range(cost_matrix.shape[0]), range(cost_matrix.shape[1])):
        plt.text(j, i, format(cost_matrix[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cost_matrix[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()
    
    # calculate and print cost
    calc_cost = calc_confusion_matrix[0][0] * cost_tn + calc_confusion_matrix[0][1] * cost_fn + calc_confusion_matrix[1][0] * cost_fp + calc_confusion_matrix[1][1] * cost_tp
    print('Cost:')
    print(calc_cost)
    print()
        
    if y_score is not None:       
        # Compute micro-average ROC curve and ROC area
        fpr, tpr, _ = roc_curve(y_test, y_score)
        roc_auc = auc(fpr, tpr)

        plt.figure()
        lw = 2
        plt.plot(fpr, tpr, color='darkorange', label='ROC curve (area = %0.2f)' % roc_auc)
        plt.plot([0, 1], [0, 1], color='navy', linestyle='--', label='Random Guess')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver operating characteristic example')
        plt.legend(loc='lower right')
        plt.show()
        print()
        
        # compute and print roc_score
        roc_score  = roc_auc_score(y_true, y_score)
        print('ROC Score:')
        print(roc_score)