In [None]:
from sklearn.metrics import r2_score, f1_score
import pandas as pd
from statistics import harmonic_mean
import os

In [None]:
def return_class_dct(csv_file):
    df = pd.read_csv(csv_file)
    ids = list(df['ID'])
    labels = list(df['Label'])
    return dict(zip(ids, labels))

In [None]:
def return_mixed_dct(csv_file):
    df = pd.read_csv(csv_file)
    ids = list(df['FileName'])
    labels = list(df['Label'])
    return dict(zip(ids, labels))

In [None]:
def return_lists(dct_pred, dct_true):
    keys = list(dct_true.keys())
    pred = []
    true = []
    for key in keys:
        pred.append(dct_pred[key])
        true.append(dct_true[key])
    return pred, true

In [None]:
def SeperateFiles(files):
    mixed = {}
    tel_no_tel = {}
    for file in files:    
        splitted = file.split('test_mixed_')
        if splitted[0] == '':
            name = splitted[-1].split('.')[0]
            mixed[str(name)] = file
        else:
            name = file.split('test_tel_no-tel_')[-1].split('.')[0]
            tel_no_tel[str(name)] = file
    return mixed, tel_no_tel

In [None]:
def R_squared(true, pred):
    score = r2_score(true, pred)
    return score
#    if score < 0:
#        return 0
#    else:
#        return score

In [None]:
def Accuracy(pred_length, pred_classification, true_length, true_classification):
    R2 = R_squared(true_length, pred_length)
    F1 = f1_score(true_classification, pred_classification)
    if R2>0:
        Accuracy = harmonic_mean([R2, F1])
    else:
        Accuracy = 0
    result = {'R_squared': R2, 'F1_score': F1, 'Accuracy': Accuracy}
    return result

In [None]:
TRUE_LENGTH_dct = return_mixed_dct('test_mixed_labeled.csv')
TRUE_TELOMERES_dct = return_class_dct('test_data_labeled.csv') 

def Accuracy_i(mixed_file, tel_no_tel_file,
               true_length_dct = TRUE_LENGTH_dct, true_classification_dct = TRUE_TELOMERES_dct):
    pred_length_dct = return_mixed_dct(mixed_file)
    pred_classification_dct = return_class_dct(tel_no_tel_file)
    
    pred_length, true_length = return_lists(pred_length_dct, true_length_dct)
    pred_classification, true_classification = return_lists(pred_classification_dct, true_classification_dct)
    return Accuracy(pred_length, pred_classification, true_length, true_classification)

In [None]:
def Results(path):
    
    files = os.listdir(path)
    mixed_files, tel_no_tel_files = SeperateFiles(files)
    
    if len(mixed_files) != len(tel_no_tel_files):
        return "Files are messed up!"
        
    R_squared = []
    F1_score = []
    Accuracy = []
        
    names = list(mixed_files.keys())
    
    print(names)
    for i, name in enumerate(names):
        print(name)
        
        mixed_file = mixed_files[name]
        tel_no_tel_file = tel_no_tel_files[name]

        result_i = Accuracy_i(path + mixed_file, path + tel_no_tel_file)

        R_squared.append(result_i['R_squared'])
        F1_score.append(result_i['F1_score'])
        Accuracy.append(result_i['Accuracy'])
        
        print(name, ":", R_squared[i], " ", F1_score[i], " ", Accuracy[i])
        
    results = pd.DataFrame({"Name":names, "R_squared":R_squared, "F1_score":F1_score, "Accuracy":Accuracy })
    results.to_csv("Results.csv", index = False)

In [None]:
path = 'Submissions/'
Results(path)

### NiTrio - classification

In [None]:
nitrio_class = 'Submissions_all/Submissions_Dec_10_2019/NiTrio/test_tel_no-tel_NiTrio.csv'
nitrio_dct = return_class_dct(nitrio_class)

In [None]:
nitrio_pred, nitrio_true = return_lists(nitrio_dct, TRUE_TELOMERES_dct)

In [None]:
f1_score(nitrio_true, nitrio_pred)