# Analysis of the results

In [1]:
import os
import json
import numpy as np
from scipy import stats



## Get best results for each fold

In [2]:
def construct_path_from_parent(root_dir, subdir_list):
    # Construct the path using os.path.join
    return os.path.join(root_dir, *subdir_list)

In [3]:
current_dir = os.getcwd()
dataset_name = "NucleiSeg"
model_names = ["UNet_vanilla", "UNet_bcnn", "UNet_e2cnn"]

folds = [f'fold_{i}' for i in range(5)]
criteria = 'loss_dice'
perf_metrics = ['dice_score', 'IoU_score', 'precision', 'recall', 'accuracy']

In [4]:
def get_best_metrics(summary_loc: str, fold_nb: int, result_dict: dict, 
                     perf_metrics: list, criteria: str) -> None:
    with open(summary_loc) as file:
        data = json.load(file)
        
    data = data['test']

    best_idx = np.argmin(data[criteria])
    
    fold_dict: dict = {}
    for metric in perf_metrics:
        fold_dict[metric] = data[metric][best_idx]
    fold_dict['epoch'] = best_idx
    
    if fold_nb in result_dict:
        raise KeyError(f"Key '{fold_nb}' already present in dictionary.")
    
    result_dict[fold_nb] = fold_dict

In [5]:
def build_path(dataset_name: str, 
               model_name: str, 
               fold_nb: str, 
               root_dir: str = '/home/rob/Documents/3_projects/bench/_results') -> str:
    subdirs = [dataset_name, model_name, fold_nb, "summary.json"]

    summary_loc = construct_path_from_parent(root_dir, subdirs)
    if os.path.exists(summary_loc):
        print(f'summary.json loc: {summary_loc}')
    else:
        print(f'No file at the path you provided: {summary_loc}.')
    
    return summary_loc

In [6]:
for model_name in model_names:
    result_dict: dict = {}

    for fold in folds:
        summary_loc = build_path(dataset_name, model_name, fold)
        get_best_metrics(summary_loc, fold, result_dict, perf_metrics, criteria)
        
    results = {}
    
    for metric in perf_metrics:
        data = np.array([result_dict[f"fold_{i}"][metric] for i in range(5)])
        mean = np.mean(data)
        ci = stats.t.interval(alpha=0.95, df=len(data)-1, loc=np.mean(data), scale=stats.sem(data))
        results[metric] = {
            "mean": mean,
            "95% CI": ci
        }
        
    output_path = f'/home/rob/Documents/3_projects/bench/analysis/{dataset_name}'
    os.makedirs(output_path, exist_ok=True)
    
    json_name = f'/{dataset_name}_{model_name}_data.json'
    with open(f'{output_path}{json_name}', 'w') as fp:
        json.dump(results, fp, default=str, indent=4)

summary.json loc: /home/rob/Documents/3_projects/bench/_results/NucleiSeg/UNet_vanilla/fold_0/summary.json
summary.json loc: /home/rob/Documents/3_projects/bench/_results/NucleiSeg/UNet_vanilla/fold_1/summary.json
summary.json loc: /home/rob/Documents/3_projects/bench/_results/NucleiSeg/UNet_vanilla/fold_2/summary.json
summary.json loc: /home/rob/Documents/3_projects/bench/_results/NucleiSeg/UNet_vanilla/fold_3/summary.json
summary.json loc: /home/rob/Documents/3_projects/bench/_results/NucleiSeg/UNet_vanilla/fold_4/summary.json
summary.json loc: /home/rob/Documents/3_projects/bench/_results/NucleiSeg/UNet_bcnn/fold_0/summary.json
summary.json loc: /home/rob/Documents/3_projects/bench/_results/NucleiSeg/UNet_bcnn/fold_1/summary.json
summary.json loc: /home/rob/Documents/3_projects/bench/_results/NucleiSeg/UNet_bcnn/fold_2/summary.json
summary.json loc: /home/rob/Documents/3_projects/bench/_results/NucleiSeg/UNet_bcnn/fold_3/summary.json
summary.json loc: /home/rob/Documents/3_projects/