# Notebook overview

Evaluates the performance of DKNN (OOD) classifier by calculating metrics for different threshold(classifier) variants.

- Loads prediction results containing OOD detection flags.
- Calculates classification metrics: TPR, FPR, AUROC and FPR95.
- Saves the calculated metrics to CSV files.

The notebook was used for both datasets (“original” and “resized”); only the path variables need to be adapted.

# Preperation

### Import

In [55]:
import pandas as pd
import numpy as np
from pathlib import Path

import ast

from sklearn import metrics

### Path - prediction_dir_path, result_dir_path

In [56]:
### The paths for the original and resized data must be adjusted for calculation of original or resized ood Scores (Replace “origin” with “resized” and vice versa).
use_dataset = 'resized'
# use_dataset = 'origin'

### prediction folder to load df
PREDICTION_DIR_PATH = rf'/home/stud/jleick/masterArbeitProjekt/models/ood/predictions/{use_dataset}'
prediction_dir_path = Path(PREDICTION_DIR_PATH)
if not prediction_dir_path.exists():
    raise FileNotFoundError(f"Folder does not exist: {PREDICTION_DIR_PATH}")

### Folder to save results
RESULT_DIR_PATH = rf'//home/stud/jleick/masterArbeitProjekt/models/ood/scores_ood_detection_(corrected)/{use_dataset}'

result_dir_path = Path(RESULT_DIR_PATH)
if not result_dir_path.exists():
    raise FileNotFoundError(f"Folder does not exist: {RESULT_DIR_PATH}")

# Functions

### Function - predict_id_scores_over_percentile

In [57]:
# Calcuation TN and FP for testsets which are containing only ID sampels (TN and FP fields of a binary confusion matrix)
# OOD Classifier classifies OOD Sampel as True

# For the following calculation, it helps to imagine that the ID data set represents only half of the two-class confusion matrix.
# An ID data set describes only the TN and FP fields, while an OOD data set describes only the TP and FN fields.

# Calculates TNR of samples based on data which contains only ID Samples
# Calculates FNR of samples based on data which contains only OOD Samples

def predict_id_scores_over_percentile(prediction_df:pd.DataFrame, percentiles: pd.Series) -> dict[float,float]:
    id_scores = {}
    id_scores['0'] = 0 # Add 0 - for visualisation and AUROC calculation - Calculation with Threshold 0 was not calculated

    for percentile in percentiles:
        is_ood = (prediction_df[f'ood_{percentile}'] == -1).value_counts()

        is_ood = is_ood.reindex([True, False], fill_value=0) 
        # If there is no 'True' or 'False' value, fill empty values with 0. For example, when there are no 'False' values, reindex creates 'False' = 0. 

        id_score = is_ood[False] / (is_ood[True] + is_ood[False])        
        # is_ood[False] -> means for sampels (Datasets) wich are only ID they are TN
        # is_ood[True] -> means for sampels (Datasets) which are only ID they are FP

        # is_ood[False] -> means for sampels (Datasets) wich are only OOD they are FN
        # is_ood[True] -> means for sampels (Datasets) which are only OOD they are TP

        id_scores[percentile] = id_score
        # print(f'percentile: {percentile} - {id_score} ')

    id_scores['1'] = 1 # Add 0 - for visualisation and AUROC calculation - Calculation with Threshold max was not calculated
    
    return id_scores

### Function - predict_ood_scores_over_percentile

In [58]:
# Calcuation TP and FN for testsets which are containing only OOD sampels (TP and FN fields of a binary confusion matrix)
# OOD Classifier classifies OOD Sampel as True

# For the following calculation, it helps to imagine that the OOD data set represents only half of the two-class confusion matrix.
# An OOD data set describes only the TP and FN fields, while an ID data set describes only the TN and FP fields.

# Calculates TPR of samples based on data which contains only OOD Samples
# Calculates FPR of samples based on data which contains only ID Samples

def predict_ood_scores_over_percentile(prediction_df:pd.DataFrame, percentiles: pd.Series) -> dict[float,float]:
    ood_scores = {}
    ood_scores[0] = 1 # Add 0 - for visualisation - Calculation with Threshold 0 was not calculated

    for percentile in percentiles:
        is_ood = (prediction_df[f'ood_{percentile}'] == -1).value_counts()

        is_ood = is_ood.reindex([True, False], fill_value=0)
        # If there is no 'True' or 'False' value, fill empty values with 0. For example, when there are no 'False' values, reindex creates 'False' = 0. 

        ood_score = is_ood[True] / (is_ood[True] + is_ood[False])
        # is_ood[True] -> means for sampels (Datasets) which are only OOD they are TP
        # is_ood[True] -> means for sampels (Datasets) which are only ID they are FP

        ood_scores[percentile] = ood_score
        # print(f'percentile: {percentile} - {ood_score} ')

    ood_scores[1] = 0 # Add 0 - for visualisation - Calculation with Threshold max was not calculated
    return ood_scores

### Function - calculate_fpr95 - get_closest_fpr95

In [59]:
def calculate_fpr95( tpr: pd.Series, fpr: pd.Series) -> float:
    tpr_rev = tpr.values[::-1]
    fpr_rev = fpr.values[::-1]
    fpr_at_95_tpr = np.interp(0.95, tpr_rev, fpr_rev)
    return fpr_at_95_tpr


def get_closest_fpr95( tpr: pd.Series, fpr: pd.Series) -> tuple[float,float]:
    idx_95 = np.argmin(np.abs(tpr - 0.95))
    tpr_at_95 = tpr[idx_95]
    fpr_at_95_tpr = fpr[idx_95]
    return tpr_at_95, fpr_at_95_tpr

### Function - run_predict_scores_over_percentile

In [60]:
def run_predict_scores_over_percentile( prediction_load_path: Path, score_save_path: Path, percentiles:pd.Series):
    ### high
    # load predictions high
    high_id_test_prediction_df = pd.read_csv( prediction_load_path / 'high_id_test_prediction_ood.csv', index_col=False, converters={"k_distances": ast.literal_eval})
    high_ood_test_prediction_df = pd.read_csv( prediction_load_path / 'high_ood_test_prediction_ood.csv', index_col=False, converters={"k_distances": ast.literal_eval})

    # calculate scores high
    high_TPR_scores_dict = predict_ood_scores_over_percentile( high_ood_test_prediction_df, percentiles ) # TPR
    high_FPR_scores_dict = predict_ood_scores_over_percentile( high_id_test_prediction_df, percentiles ) # TPR

    # save scores high
    high_df = pd.DataFrame( [high_TPR_scores_dict, high_FPR_scores_dict], index=['TPR_scores', 'FPR_scores'] ).T # TNR, TPR, TPR
    high_df.to_csv( score_save_path / 'high_id_ood_scores.csv', index=True)

    # calculate AURCO
    high_score_auroc = metrics.auc(list(high_FPR_scores_dict.values()), list(high_TPR_scores_dict.values()))

    # save scores AURCO
    with open(score_save_path / 'high_auroc_score.txt', 'w') as f:
        f.write(str(high_score_auroc))

    # Calculate FPR95
    high_tpr_list = pd.Series(high_TPR_scores_dict.values())
    high_fpr_list = pd.Series(high_FPR_scores_dict.values())

    high_fpr95_interpolate = calculate_fpr95(high_tpr_list, high_fpr_list)
    high_tpr_closest, high_fpr95_closest = get_closest_fpr95(high_tpr_list, high_fpr_list)

    # Save FPR95 scores
    with open(score_save_path / 'high_fpr95_score.txt', 'w') as f:
        f.write(f'FPR95 (interpolated): {high_fpr95_interpolate:.4f}\n')
        f.write(f'TPR closest to 0.95: {high_tpr_closest:.4f}\n')
        f.write(f'FPR95 (closest): {high_fpr95_closest:.4f}\n')

    ### low
    # load prediction low    
    low_id_test_prediction_df = pd.read_csv( prediction_load_path / 'low_id_test_prediction_ood.csv', index_col=False, converters={"k_distances": ast.literal_eval})
    low_ood_test_prediction_df = pd.read_csv( prediction_load_path / 'low_ood_test_prediction_ood.csv', index_col=False, converters={"k_distances": ast.literal_eval})

    # calculate scores low
    low_TPR_scores_dict = predict_ood_scores_over_percentile( low_ood_test_prediction_df, percentiles )
    low_FPR_scores_dict = predict_ood_scores_over_percentile( low_id_test_prediction_df, percentiles )

    # save scores low
    low_df = pd.DataFrame( [low_TPR_scores_dict, low_FPR_scores_dict], index=['TPR_scores', 'FPR_scores'] ).T
    low_df.to_csv( score_save_path / 'low_id_ood_scores.csv', index=True)

    # calculate AURCO
    low_score_auroc = metrics.auc(list(low_FPR_scores_dict.values()), list(low_TPR_scores_dict.values()))
    
    # save scores AURCO
    with open(score_save_path / 'low_auroc_score.txt', 'w') as f:
        f.write(str(low_score_auroc))

    # Calculate FPR95
    low_tpr_list = pd.Series(low_TPR_scores_dict.values())
    low_fpr_list = pd.Series(low_FPR_scores_dict.values())

    low_fpr95_interpolate = calculate_fpr95(low_tpr_list, low_fpr_list)
    low_tpr_closest, low_fpr95_closest = get_closest_fpr95(low_tpr_list, low_fpr_list)

    # Save FPR95 scores
    with open(score_save_path / 'low_fpr95_score.txt', 'w') as f:
        f.write(f'FPR95 (interpolated): {low_fpr95_interpolate:.4f}\n')
        f.write(f'TPR closest to 0.95: {low_tpr_closest:.4f}\n')
        f.write(f'FPR95 (closest): {low_fpr95_closest:.4f}\n')

# Apply

### Apply - over_all_species

In [61]:
percentiles = np.arange(0.0,1.001,0.01)
percentiles = np.round(percentiles, 2)

In [62]:
run_predict_scores_over_percentile(
    prediction_dir_path / 'over_all_examples',
    result_dir_path / 'over_all_examples',
    percentiles
)

# knn

### Apply - for_each_species

In [63]:
run_predict_scores_over_percentile(
    prediction_dir_path / 'knn/for_each_species',
    result_dir_path / 'knn/for_each_species',
    percentiles
)

### Apply - in each species

In [64]:
run_predict_scores_over_percentile(
    prediction_dir_path / 'knn/in_each_species',
    result_dir_path / 'knn/in_each_species',
    percentiles
)

# mlp

### Apply - for_each_species

In [65]:
run_predict_scores_over_percentile(
    prediction_dir_path / 'mlp/for_each_species',
    result_dir_path / 'mlp/for_each_species',
    percentiles
)

### Apply - in each species

In [66]:
run_predict_scores_over_percentile(
    prediction_dir_path / 'mlp/in_each_species',
    result_dir_path / 'mlp/in_each_species',
    percentiles
)