In [4]:
import numpy as np
import os
import inspect
import sys
import pandas as pd
from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score, roc_auc_score, classification_report

In [5]:
results_dir = '/share/pi/rubin/siyitang/eeg/output/SeizureNet/train/train-05'

In [6]:
NUM_FOLDS = 5

In [29]:
all_f1 = []
all_precision = []
all_recall = []
all_acc = []

for fold_idx in range(NUM_FOLDS):
    print('Fold {}...'.format(fold_idx))
    # Get true labels
    curr_true_txt = '../data/fold' + str(fold_idx) + '_testSet_seizure_files.txt'   
    with open(curr_true_txt, 'r') as f:
        true_str = f.readlines()
        
    # Get predicted labels
    preds_file = os.path.join(os.path.join(results_dir, 'fold_' + str(fold_idx)), 'test_prediction.csv')
    df_preds = pd.read_csv(preds_file).dropna(how='all')
    pred_labels = df_preds['seizure_class']
    pred_files = df_preds['file']
    
    assert(len(true_str) == len(pred_files)) # sanity check they have the same length
    
    true_labels_list = []
    pred_labels_list = []
    for i in range(len(true_str)):
        tup = true_str[i].strip("\n").split(",")
        curr_true_file = tup[0] + '_' + tup[2]
        
        assert(curr_true_file == pred_files[i]) # double check the files are the same
        
        true_labels_list.append(int(tup[1]))
        pred_labels_list.append(pred_labels[i])
        
    f1 = f1_score(y_true=true_labels_list, y_pred=pred_labels_list, average='macro')
    all_f1.append(f1)
    print('F1: {}'.format(f1))

    precision = precision_score(y_true=true_labels_list, y_pred=pred_labels_list, average='macro')
    all_precision.append(precision)
    print('Precision: {}'.format(precision))
    
    recall = recall_score(y_true=true_labels_list, y_pred=pred_labels_list, average='macro')
    all_recall.append(recall)
    print('Recall: {}'.format(recall))

    acc = accuracy_score(y_true=true_labels_list, y_pred=pred_labels_list)
    all_acc.append(acc)
    print('Acc: {}'.format(acc))

    print(classification_report(y_true=true_labels_list, y_pred=pred_labels_list))

Fold 0...
F1: 0.6632688447835299
Precision: 0.6666492519993309
Recall: 0.6979257772060181
Acc: 0.6732673267326733
              precision    recall  f1-score   support

           0       0.72      0.83      0.78       199
           1       0.68      0.46      0.55        83
           2       0.75      0.67      0.71         9
           3       0.46      0.38      0.42        69
           4       0.76      0.65      0.70        20
           5       0.88      1.00      0.93        14
           6       0.41      0.90      0.56        10

    accuracy                           0.67       404
   macro avg       0.67      0.70      0.66       404
weighted avg       0.67      0.67      0.66       404

Fold 1...
F1: 0.7596842218239839
Precision: 0.7444347853711156
Recall: 0.7808891467382418
Acc: 0.7326732673267327
              precision    recall  f1-score   support

           0       0.77      0.79      0.78       199
           1       0.72      0.73      0.73        83
           2

In [30]:
mean_f1 = np.mean(all_f1)
print('Averaged F1: {}'.format(mean_f1))

mean_precision = np.mean(all_precision)
print('Averaged precision: {}'.format(mean_precision))

mean_recall = np.mean(all_recall)
print('Averaged recall: {}'.format(mean_recall))

mean_acc = np.mean(all_acc)
print('Averaged acc: {}'.format(mean_acc))


Averaged F1: 0.6921798757009849
Averaged precision: 0.6858278746574531
Averaged recall: 0.7308029106083668
Averaged acc: 0.6888281665621796
