# Threshold Analysis

In [5]:
import os
import pandas as pd

# user defined methods
import sys
sys.path.append('../utils')

from threshold_analysis import extract_info_folder, extract_log, create_heatmap

log_path = '../../results/threshold/2023-11-13/'
log_dir = os.listdir(log_path)


In [8]:
# Initialize a dictionary to store the best model for each i and j
best_models = {}

for folder in os.listdir(log_path):
    folder_path = os.path.join(log_path, folder)
    if os.path.isdir(folder_path):
        log_file_path = os.path.join(folder_path, 'find_threshold.log')
        if os.path.isfile(log_file_path):
            dataset, model, target, threshold_j, threshold_i = extract_info_folder(folder)
            f1_test, f1_val, fn_rate, cm = extract_log(log_file_path)
            
            if fn_rate is not None:  # Ensure fn_rate is not None
                key = (threshold_j, threshold_i)
                if key not in best_models or (fn_rate < best_models[key][0] or 
                                              (fn_rate == best_models[key][0] and f1_test > best_models[key][1]) or 
                                              (fn_rate == best_models[key][0] and f1_test == best_models[key][1] and f1_val > best_models[key][2])):
                    best_models[key] = (fn_rate, f1_test, f1_val, model, threshold_j, threshold_i)

# Create a dataframe from the best models
df = pd.DataFrame.from_dict(best_models, orient='index', 
                            columns=['False Negative Rate', 'F1 Test Score', 'F1 Val Score', 'Model', 'j_value', 'i_value'])


df_sorted = df.sort_values(by=['j_value', 'i_value'])

In [9]:
df_sorted.head()

Unnamed: 0,False Negative Rate,F1 Test Score,F1 Val Score,Model,j_value,i_value
"(1, 1)",0.003681,0.996933,0.99343,xgboost,1,1
"(1, 2)",0.003681,0.996933,0.99343,xgboost,1,2
"(1, 3)",0.003681,0.996933,0.99343,xgboost,1,3
"(1, 4)",0.003681,0.996933,0.99343,xgboost,1,4
"(1, 5)",0.003681,0.996933,0.99343,xgboost,1,5


In [10]:
# Create and save heatmaps individually as images
pivot_f1_test = df.pivot("i_value", "j_value", "F1 Test Score")
create_heatmap(pivot_f1_test, 'F1 Test Score for Each i, j Combination', '../../visualizations/f1_test_score_heatmap.png')

pivot_f1_val = df.pivot("i_value", "j_value", "F1 Val Score")
create_heatmap(pivot_f1_val, 'F1 Validation Score for Each i, j Combination', '../../visualizations/f1_val_score_heatmap.png')

pivot_fn_rate = df.pivot("i_value", "j_value", "False Negative Rate")
create_heatmap(pivot_fn_rate, 'False Negative Rate for Each i, j Combination', '../../visualizations/fn_rate_heatmap.png')

In a future version of pandas all arguments of DataFrame.pivot will be keyword-only.
In a future version of pandas all arguments of DataFrame.pivot will be keyword-only.
In a future version of pandas all arguments of DataFrame.pivot will be keyword-only.
