In [1]:
import pandas as pd
import numpy as np
import heapq
from sklearn.metrics import f1_score
from sklearn.metrics import balanced_accuracy_score

def Select_Threshold(df):
    full_threshold_list = []
    for threshold in np.arange(0,1.05,0.05):
        #df.drop(columns = ['y_pred'])
        df['y_pred'] = df['prediction'].apply(lambda x: 1 if x >= threshold else 0)
        
        y_pred = df["y_pred"].values
        y_true = df["y_true"].values
        
        f1_C1 = f1_score(y_true, y_pred)
        balanced_accuracy = balanced_accuracy_score(y_true, y_pred)

        
        full_threshold_list.append([threshold, f1_C1, balanced_accuracy])
        
    df_varying_threshold = pd.DataFrame(full_threshold_list, columns = ['threshold', 'f1_score', 'balanced_accuracy'])
    
    # select three highest F1 score and the the highest balanced accuracy
    f1_scores = df_varying_threshold["f1_score"].values
    thresholds = df_varying_threshold["threshold"].values
    bal_acc_values = list(df_varying_threshold["balanced_accuracy"].values)
    
    #print(heapq.nlargest(3, f1_scores))
    list_index = heapq.nlargest(3, range(len(f1_scores)), key=f1_scores.__getitem__)
    opt_threshold = thresholds[bal_acc_values.index(max(bal_acc_values[list_index[0]], bal_acc_values[list_index[1]], bal_acc_values[list_index[2]]))]
    
    
    return opt_threshold, df_varying_threshold


def threshold_tuning(model_name, path = ""):
    sum_ = 0
    model = model_name
    for i in range(1, 3+1):
        name = model + str(i)
        df = pd.read_csv(path+name + '.csv')
        th, th_df = Select_Threshold(df)
        th_df.to_csv(path+"threshold_tuning-"+name +".csv", index=False)
        sum_ += th
        print(th)
    
    return sum_/3
    

## Base CW-LSTM model Threshold

In [None]:
sum_ = 0
model = 'CW-LSTM-'
for i in range(1, 3+1):
    name = model+ str(i)
    df = pd.read_csv(name + '.csv')
    th, th_df = Select_Threshold(df)
    th_df.to_csv("threshold_tuning-"+name +".csv", index=False)
    sum_ += th
    print(th)
    
print(sum_/3)

## Base LR model threshold

In [None]:
sum_ = 0
model = 'LR-'
for i in range(1, 3+1):
    name = model+ str(i)
    df = pd.read_csv(name + '.csv')
    th, th_df = Select_Threshold(df)
    th_df.to_csv("threshold_tuning-"+name +".csv", index=False)
    sum_ += th
    print(th)
    
print(sum_/3)

## Base LSTM model threshold

In [None]:
sum_ = 0
model = 'LSTM-'
for i in range(1, 3+1):
    name = model+ str(i)
    df = pd.read_csv(name + '.csv')
    th, th_df = Select_Threshold(df)
    th_df.to_csv("threshold_tuning-"+name +".csv", index=False)
    sum_ += th
    print(th)
    
print(sum_/3)