In [1]:
import os
import shutil

import tensorflow as tf

import pandas as pd
import numpy as np

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [2]:
def true_positive(y_true, y_pred):
    tp = 0
    for yt, yp in zip(y_true, y_pred):
        
        if yt == 1 and yp == 1:
            tp += 1
    return tp

def true_negative(y_true, y_pred):
    tn = 0
    for yt, yp in zip(y_true, y_pred):
        
        if yt == 0 and yp == 0:
            tn += 1
    return tn

def false_positive(y_true, y_pred):
    fp = 0
    for yt, yp in zip(y_true, y_pred):
        
        if yt == 0 and yp == 1:
            fp += 1
    return fp

def false_negative(y_true, y_pred):
    fn = 0
    for yt, yp in zip(y_true, y_pred):
        
        if yt == 1 and yp == 0:
            fn += 1
    return fn


def accuracy(y_true, y_pred):
    correct_pred = 0
    for yt, yp in zip(y_true, y_pred):
        if yt == yp:
            correct_pred += 1
    return correct_pred / len(y_true)


# Precision = TP / (TP + FP)
# Macro averaged precision: calculate precision for all classes individually and then average them
# Micro averaged precision: calculate class wise true positive and false positive and then use that to calculate overall precision
def macro_precision(y_true, y_pred):
    # find the number of classes
    num_classes = len(np.unique(y_true))
    # initialize precision to 0
    precision = 0
    # loop over all classes
    for class_ in list(y_true.unique()):
        # all classes except current are considered negative
        temp_true = [1 if p == class_ else 0 for p in y_true]
        temp_pred = [1 if p == class_ else 0 for p in y_pred]
        # compute true positive for current class
        tp = true_positive(temp_true, temp_pred)
        # compute false positive for current class
        fp = false_positive(temp_true, temp_pred)
        # compute precision for current class
        temp_precision = tp / (tp + fp + 1e-6)
        # keep adding precision for all classes
        precision += temp_precision
    # calculate and return average precision over all classes
    precision /= num_classes
    return round(precision, 2)

def micro_precision(y_true, y_pred):
    # find the number of classes 
    num_classes = len(np.unique(y_true))
    # initialize tp and fp to 0
    tp = 0
    fp = 0
    # loop over all classes
    for class_ in y_true.unique():
        # all classes except current are considered negative
        temp_true = [1 if p == class_ else 0 for p in y_true]
        temp_pred = [1 if p == class_ else 0 for p in y_pred]
        # calculate true positive for current class
        # and update overall tp
        tp += true_positive(temp_true, temp_pred)
        # calculate false positive for current class
        # and update overall tp
        fp += false_positive(temp_true, temp_pred)
    # calculate and return overall precision
    precision = tp / (tp + fp)
    return round(precision, 6)


# Recall = TP / (TP + FN)
def macro_recall(y_true, y_pred):
    # find the number of classes
    num_classes = len(np.unique(y_true))
    # initialize recall to 0
    recall = 0
    # loop over all classes
    for class_ in list(y_true.unique()):
        # all classes except current are considered negative
        temp_true = [1 if p == class_ else 0 for p in y_true]
        temp_pred = [1 if p == class_ else 0 for p in y_pred]
        # compute true positive for current class
        tp = true_positive(temp_true, temp_pred)
        # compute false negative for current class
        fn = false_negative(temp_true, temp_pred)
        # compute recall for current class
        temp_recall = tp / (tp + fn + 1e-6)
        # keep adding recall for all classes
        recall += temp_recall
    # calculate and return average recall over all classes
    recall /= num_classes
    return round(recall, 2)


def micro_recall(y_true, y_pred):
    # find the number of classes 
    num_classes = len(np.unique(y_true))
    # initialize tp and fp to 0
    tp = 0
    fn = 0
    # loop over all classes
    for class_ in y_true.unique():
        # all classes except current are considered negative
        temp_true = [1 if p == class_ else 0 for p in y_true]
        temp_pred = [1 if p == class_ else 0 for p in y_pred]
        # calculate true positive for current class
        # and update overall tp
        tp += true_positive(temp_true, temp_pred)
        # calculate false negative for current class
        # and update overall tp
        fn += false_negative(temp_true, temp_pred)
    # calculate and return overall recall
    recall = tp / (tp + fn)
    return round(recall, 6)


# F1 = 2PR / (P + R)
# Macro averaged F1 Score: calculate f1 score of every class and then average them
# Micro averaged F1 Score: calculate macro-averaged precision score and macro-averaged recall score and then take there harmonic mean
def macro_f1(y_true, y_pred):
    # find the number of classes
    num_classes = len(np.unique(y_true))
    # initialize f1 to 0
    f1 = 0
    # loop over all classes
    for class_ in list(y_true.unique()):
        # all classes except current are considered negative
        temp_true = [1 if p == class_ else 0 for p in y_true]
        temp_pred = [1 if p == class_ else 0 for p in y_pred]
        # compute true positive for current class
        tp = true_positive(temp_true, temp_pred)
        # compute false negative for current class
        fn = false_negative(temp_true, temp_pred)
        # compute false positive for current class
        fp = false_positive(temp_true, temp_pred)
        # compute recall for current class
        temp_recall = tp / (tp + fn + 1e-6)
        # compute precision for current class
        temp_precision = tp / (tp + fp + 1e-6)
        temp_f1 = 2 * temp_precision * temp_recall / (temp_precision + temp_recall + 1e-6)
        # keep adding f1 score for all classes
        f1 += temp_f1
    # calculate and return average f1 score over all classes
    f1 /= num_classes
    return round(f1, 2)

def micro_f1(y_true, y_pred):
    #micro-averaged precision score
    P = micro_precision(y_true, y_pred)
    #micro-averaged recall score
    R = micro_recall(y_true, y_pred)
    #micro averaged f1 score
    f1 = 2*P*R / (P + R)    
    return round(f1, 6)


from sklearn.metrics import roc_auc_score
def roc_auc_score_multiclass(actual_class, pred_class, average = "macro"):
    #creating a set of all the unique classes using the actual class list
    unique_class = set(actual_class)
    roc_auc_dict = {}
    for per_class in unique_class:
        #creating a list of all the classes except the current class 
        other_class = [x for x in unique_class if x != per_class]
        #marking the current class as 1 and all other classes as 0
        new_actual_class = [0 if x in other_class else 1 for x in actual_class]
        new_pred_class = [0 if x in other_class else 1 for x in pred_class]
        #using the sklearn metrics method to calculate the roc_auc_score
        roc_auc = roc_auc_score(new_actual_class, new_pred_class, average = average)
        roc_auc_dict[per_class] = roc_auc
    return roc_auc_dict

In [21]:
result_df = pd.read_csv("y_preds_hy_5.csv")
# result_df = result_df.drop('Unnamed: 0', axis = 1)
result_df = result_df.rename(columns = {"y_pred": "y_pred", "y_test": "y_test"})
result_df.head()

Unnamed: 0.1,Unnamed: 0,0,1,2,y_pred,y_test
0,0,0.903952,0.095999,4.95e-05,1,2
1,1,0.034686,0.965305,8.8e-06,2,2
2,2,0.999055,0.000945,1.08e-07,1,1
3,3,0.024263,0.975735,1.34e-06,2,2
4,4,0.969356,0.030628,1.6e-05,1,1


In [22]:
result_df["y_test"].value_counts()

1    1446
2     425
3     288
Name: y_test, dtype: int64

In [23]:
result_df["y_pred"].value_counts()

1    1384
2     772
3       3
Name: y_pred, dtype: int64

In [24]:
accuracy_score = round(accuracy(result_df["y_test"], result_df["y_pred"]), 2)

mac_precision = round(macro_precision(result_df["y_test"], result_df["y_pred"]), 2)
mic_precision = round(micro_precision(result_df["y_test"], result_df["y_pred"]), 6)

mac_recall = round(macro_recall(result_df["y_test"], result_df["y_pred"]), 2)
mic_recall = round(micro_recall(result_df["y_test"], result_df["y_pred"]), 6)

mac_f1 = round(macro_f1(result_df["y_test"], result_df["y_pred"]), 2)
mic_f1 = round(micro_f1(result_df["y_test"], result_df["y_pred"]), 6)

roc_auc_dict = roc_auc_score_multiclass(result_df["y_test"], result_df["y_pred"])

In [25]:
y_true, y_pred = result_df["y_test"], result_df["y_pred"]

In [27]:
tp11 = 0
tp12 = 0
tp13 = 0
tp21 = 0
tp22 = 0
tp23 = 0
tp31 = 0
tp32 = 0
tp33 = 0


for t, p in zip(y_true, y_pred):
    if t == 1:
        if p == 1:
            tp11 += 1
        elif p == 2:
            tp12 += 1
        elif p == 3:
        # else:
            tp13 += 1

    elif t == 2:
        if p == 1:
            tp21 += 1
        elif p == 2:
            tp22 += 1
        elif p == 3:
        # else:
            tp23 += 1
    
    elif t == 3:
        if p == 1:
            tp31 += 1
        elif p == 2:
            tp32 += 1
        elif p == 3:
        # else:
             tp33 += 1
    
confusion_matrix = pd.DataFrame(np.array([[tp11, tp12, tp13], 
                       [tp21, tp22, tp23], 
                       [tp31, tp32, tp33]]), 
             columns = ['pred1', 'pred2', 'pred3'], 
             index = ['true1', 'true2', 'true3'])

print("Recall:", recall_score(y_true, y_pred, average = 'weighted'))
print("Precision:",precision_score(y_true, y_pred, average = 'macro'))
print("F1_Score:",f1_score(y_true, y_pred, average = 'weighted'))

confusion_matrix

Recall: 0.8179712830013895
Precision: 0.8347022971637367
F1_Score: 0.7791657084800875


Unnamed: 0,pred1,pred2,pred3
true1,1361,85,0
true2,23,402,0
true3,0,285,3
