# Import necessary packages

In [1]:
import os
import sys
import math
import numpy as np
import pandas as pd
from tqdm import tqdm
import datetime
from pprint import pprint
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from pathlib import PurePath
import warnings
warnings.filterwarnings("ignore")

# Defining the parameter space

- threshold: [1, 2, 4] (representing 1/2/4 times the experimental area radius for wind transpose model)
- ignore_duration: [30, 60, 120] (indicating event lengths below 30/60/120 seconds to be ignored)
- short_stack: [0, 1] (0 for analyzing all experimental dates, 1 for analyzing only the short stack scenario)

In [2]:
threshold = [1, 2, 4]
ignore_duration = [30, 60, 120]
short_stack = [0, 1]

inputs = [
    [1, 60, 0],
    [2, 60, 0],
    [4, 60, 0]
]

print("Input Parameters:")
print("{:<20} {:<20} {:<20}".format('Threshold', 'Ignore Duration', 'Short Stack'))
for i in inputs:
    print("{:<20} {:<20} {:<20}".format(i[0], i[1], i[2]))

Input Parameters:
Threshold            Ignore Duration      Short Stack         
1                    60                   0                   
2                    60                   0                   
4                    60                   0                   


# Load Confusion Matrix for Reliability metrics

For bellow inputs, load the data for the Event-based Confusion Matrix for metrics 
    "TP(%)",
    "FP(%)",
    "FN(%)",
    "TN(%)",
    "sample_size",
    "FPR(%)",
    "TPR(%)",
    "TNR(%)",
    "FNR(%)",
    "Accuracy(%)",
    "Precision(%)"
| threshold | ignore_duration | short_stack |
| :-: | :-: | :-: |
| 1 | 60 | 0 | 
| 2 | 60 | 0 |
| 4 | 60 | 0 |


In [3]:
data_dir = PurePath("../../results/03_DetectionAnalysis", "Time-based ConfusionMatrix")
path_pattern = "threshold={}xradius_duration={}xseconds.csv"

sensor_names = ["Andium", "Canary", "Ecoteco", "Kuva", "Oiler", "Qube", "Sensirion", "Soofie"]

def get_time_based_difference(data_dir, path_pattern_inputs, sensor_names):
    """
    Get the time-based difference of the confusion matrix.
    Args:
        data_dir: The directory of the confusion matrix.
        path_pattern_inputs: The input parameters of the confusion matrix.
        sensor_names: The sensor names.
    Returns:
        The time-based difference of the confusion matrix.
    """
    difference_dict = {}
    for sensor_name in sensor_names:
        difference_dict[sensor_name] = pd.DataFrame(
            columns=["threshold", "samples", "TP(%)", "FP(%)", "FN(%)", "TN(%)", 
                     "TPR(%)", "FPR(%)", "FNR(%)", "TNR(%)", "Accuracy(%)", "Precision(%)" ]
                     )
    for input in inputs:
        threshold = input[0]
        ignore_duration = input[1]
        short_stack = input[2]

        print("Threshold: {}, Ignore Duration: {}, Short Stack: {}".format(threshold, ignore_duration, short_stack))

        if short_stack == 0:
            pre_str = ""
        else:
            pre_str = "ss"

        df_time_based = pd.read_csv(os.path.join(data_dir, path_pattern.format(threshold, ignore_duration, pre_str)))
        for sensor_name in sensor_names:
            print("\t Sensor: {}".format(sensor_name))
            samples = df_time_based[df_time_based["Sensor"] == sensor_name]["sample_size"].values[0]
            TP = df_time_based[df_time_based["Sensor"] == sensor_name]["TP(%)"].values[0]
            FP = df_time_based[df_time_based["Sensor"] == sensor_name]["FP(%)"].values[0]
            FN = df_time_based[df_time_based["Sensor"] == sensor_name]["FN(%)"].values[0]
            TN = df_time_based[df_time_based["Sensor"] == sensor_name]["TN(%)"].values[0]

            TPR = df_time_based[df_time_based["Sensor"] == sensor_name]["TPR(%)"].values[0]
            FPR = df_time_based[df_time_based["Sensor"] == sensor_name]["FPR(%)"].values[0]
            FNR = df_time_based[df_time_based["Sensor"] == sensor_name]["FNR(%)"].values[0]
            TNR = df_time_based[df_time_based["Sensor"] == sensor_name]["TNR(%)"].values[0]

            Accuracy = df_time_based[df_time_based["Sensor"] == sensor_name]["Accuracy(%)"].values[0]
            Precision = df_time_based[df_time_based["Sensor"] == sensor_name]["Precision(%)"].values[0]


            difference_dict[sensor_name] = difference_dict[sensor_name].append(
                {
                    "threshold": threshold,
                    "samples": samples,
                    "TP(%)": TP,
                    "FP(%)": FP,
                    "FN(%)": FN,
                    "TN(%)": TN,
                    "TPR(%)": TPR,
                    "FPR(%)": FPR,
                    "FNR(%)": FNR,
                    "TNR(%)": TNR,
                    "Accuracy(%)": Accuracy,
                    "Precision(%)": Precision
                }, ignore_index=True
            )
        for sensor_name in sensor_names:
            difference_dict[sensor_name]["threshold"] = difference_dict[sensor_name]["threshold"].astype(int)
            difference_dict[sensor_name]["samples"] = difference_dict[sensor_name]["samples"].astype(int)
            
    return difference_dict

print("Get the time-based difference of the confusion matrix.")
difference_dict = get_time_based_difference(data_dir, path_pattern, sensor_names)
print("Done.")
        

Get the time-based difference of the confusion matrix.
Threshold: 1, Ignore Duration: 60, Short Stack: 0
	 Sensor: Andium
	 Sensor: Canary
	 Sensor: Ecoteco
	 Sensor: Kuva
	 Sensor: Oiler
	 Sensor: Qube
	 Sensor: Sensirion
	 Sensor: Soofie
Threshold: 2, Ignore Duration: 60, Short Stack: 0
	 Sensor: Andium
	 Sensor: Canary
	 Sensor: Ecoteco
	 Sensor: Kuva
	 Sensor: Oiler
	 Sensor: Qube
	 Sensor: Sensirion
	 Sensor: Soofie
Threshold: 4, Ignore Duration: 60, Short Stack: 0
	 Sensor: Andium
	 Sensor: Canary
	 Sensor: Ecoteco
	 Sensor: Kuva
	 Sensor: Oiler
	 Sensor: Qube
	 Sensor: Sensirion
	 Sensor: Soofie
Done.


# Save results


In [4]:
save_path = PurePath("../../results/05_SensitivityAnalysis/", "Time-based Difference")
if not os.path.exists(save_path):
    os.makedirs(save_path)

save_pattern = "{}_difference.csv"
filename = "time_based_results.csv"


save_df = pd.DataFrame(
    columns=["sensor", "threshold", "samples", "TP(%)", "FP(%)", "FN(%)", "TN(%)", 
             "TPR(%)", "FPR(%)", "FNR(%)", "TNR(%)", "Accuracy(%)", "Precision(%)" ]
)
n = 0
for sensor_name in sensor_names:
    cur_df = difference_dict[sensor_name]
    for i in range(len(cur_df)):
        save_df.loc[n] = [sensor_name] + list(cur_df.iloc[i])
        n += 1
save_df.to_csv(PurePath(save_path, filename), index=False)