# Import necessary packages

In [1]:
import os
import sys
import math
import numpy as np
import pandas as pd
from tqdm import tqdm
import datetime
from pprint import pprint
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from pathlib import PurePath
import warnings
warnings.filterwarnings("ignore")

# Defining the parameter space

- threshold: [1, 2, 4] (representing 1/2/4 times the experimental area radius for wind transpose model)
- ignore_duration: [30, 60, 120] (indicating event lengths below 30/60/120 seconds to be ignored)
- short_stack: [0, 1] (0 for analyzing all experimental dates, 1 for analyzing only the short stack scenario)

In [2]:
threshold = [1, 2, 4]
ignore_duration = [30, 60, 120]
short_stack = [0, 1]

inputs = [
    [1, 60, 0],
    [2, 60, 0],
    [4, 60, 0]
]

print("Input Parameters:")
print("{:<20} {:<20} {:<20}".format('Threshold', 'Ignore Duration', 'Short Stack'))
for i in inputs:
    print("{:<20} {:<20} {:<20}".format(i[0], i[1], i[2]))

Input Parameters:
Threshold            Ignore Duration      Short Stack         
1                    60                   0                   
2                    60                   0                   
4                    60                   0                   


# Load Confusion Matrix for Reliability metrics

For bellow inputs, load the data for the Event-based Confusion Matrix for metrics 
                                                                "Detection Rate (%)",
                                                                "Non-Emission Accuracy (%)",
                                                                "Reliability of Identifications (%)",
                                                                "Reliability of Non-Emission Identifications (%)"
| threshold | ignore_duration | short_stack |
| :-: | :-: | :-: |
| 1 | 60 | 0 | 
| 2 | 60 | 0 |
| 4 | 60 | 0 |


In [3]:

# set the path of all confusion matrix data
data_dir_stanford_defined = PurePath("../../results/03_DetectionAnalysis/Event-based ConfusionMatrix/Stanford Defined Events_Reliability/")
data_dir_team_defined = PurePath("../../results/03_DetectionAnalysis/Event-based ConfusionMatrix/Team Defined Events_Reliability/")
path_pattern = "threshold={}xradius_duration={}xseconds.csv"

# set the path of all event-based difference data
save_path = PurePath("../../results/05_SensitivityAnalysis/Event-based Difference Analysis/")
save_pattern = "{}_difference.csv"


# set sensor names
sensor_names = ["Andium", "Canary", "Ecoteco", "Kuva", "Oiler", "Qube", "Sensirion"]


def get_number_of_events_for_stanford(sensor_name, threshold, ignore_duration, short_stack, is_team_defined=False):
    """
    This function is used to get the number of events for a sensor.
    Args:
        sensor_name: the name of a sensor
        threshold: the threshold of a sensor
        ignore_duration: the ignore duration of a sensor
        short_stack: the short stack of a sensor
    Returns:
        number_of_events: the number of events for a sensor
    """
    if short_stack:
        pre_str = "ss_"
    else:
        pre_str = ""

    if is_team_defined:
        match_events_save_pattern = PurePath(
            "../../results/03_DetectionAnalysis/Test-case Matching Data/Team Defined Events/",
            "{}{}xradius_{}xseconds_{}_match_events.csv".format(
                pre_str, threshold, ignore_duration, sensor_name
            )
        )
    else:
        match_events_save_pattern = PurePath(
            "../../results/03_DetectionAnalysis/Test-case Matching Data/Stanford Defined Events/",
            "{}{}xradius_{}xseconds_{}_match_events.csv".format(
                pre_str, threshold, ignore_duration, sensor_name
            )
        )
    df_match_events = pd.read_csv(match_events_save_pattern)
    df_match_events.dropna(inplace=True)
    df_match_events.reset_index(drop=True, inplace=True)
    number_of_events = df_match_events.shape[0]

    return number_of_events

def get_event_based_difference_for_Reliability(data_dir, path_pattern, inputs, sensor_names):
    """
    This function is used to get the event-based difference between Stanford Defined Events and Team Defined Events.
    Args:
        data_dir: the directory of all confusion matrix data
        path_pattern: the pattern of all confusion matrix data
        inputs: the list of all inputs
        sensor_names: the list of all sensor names
    Returns:
        difference_dict: the dictionary of all event-based difference data
    """

    difference_dict = {}
    for sensor_name in sensor_names:
        difference_dict[sensor_name] = pd.DataFrame(columns=["threshold", 
                                                                "Detection Rate (%)",
                                                                "Non-Emission Accuracy (%)",
                                                                "Reliability of Identifications (%)",
                                                                "Reliability of Non-Emission Identifications (%)"
                                                             ])

    for input in inputs:
        threshold = input[0]
        ignore_duration = input[1]
        short_stack = input[2]
        print("\tcurrent parameter: threshold = {}, ignore_duration = {}, short_stack = {}".format(threshold, ignore_duration, short_stack))

        # set the path of all confusion matrix data
        if short_stack == 0:
            prev_save_str = ""
        else:
            prev_save_str = "ss_"

        # set the path of all event-based confusion matrix data
        data_path_stanford_defined = PurePath(data_dir[0], prev_save_str + path_pattern.format(threshold, ignore_duration))
        data_path_team_defined = PurePath(data_dir[1], prev_save_str + path_pattern.format(threshold, ignore_duration))

        # load data
        df_stanford_defined = pd.read_csv(data_path_stanford_defined, index_col=0)
        df_team_defined = pd.read_csv(data_path_team_defined, index_col=0)
        for sensor_name in sensor_names:
            # calculate TPR and FNR for Stanford Defined Events
            print("\t\tcurrent sensor: {}".format(sensor_name))
            DR = df_stanford_defined.loc[df_stanford_defined.index == sensor_name, "Detection Rate (%)"] 
            NEA = df_stanford_defined.loc[df_stanford_defined.index == sensor_name, "Non-Emission Accuracy (%)"]

            # calculate TPR and FNR for Team Defined Events
            RoI = df_team_defined.loc[df_team_defined.index == sensor_name, "Reliability of Identifications (%)"]
            RoNEI = df_team_defined.loc[df_team_defined.index == sensor_name, "Reliability of Non-Emission Identifications (%)"]

            number_of_stanford_defined_events = get_number_of_events_for_stanford(sensor_name, threshold, ignore_duration, short_stack)
            number_of_team_defined_events = get_number_of_events_for_stanford(sensor_name, threshold, ignore_duration, short_stack, True)


            # add data to the dataframe
            difference_dict[sensor_name] = difference_dict[sensor_name].append({"threshold": threshold,
                                                                                "Number of Stanford Defined Events": number_of_stanford_defined_events,
                                                                                "Number of Team Defined Events": number_of_team_defined_events,
                                                                                "Detection Rate (%)": float("{:.2f}".format(DR.values[0])),
                                                                                "Non-Emission Accuracy (%)": float("{:.2f}".format(NEA.values[0])),
                                                                                "Reliability of Identifications (%)": float("{:.2f}".format(RoI.values[0])),
                                                                                "Reliability of Non-Emission Identifications (%)": float("{:.2f}".format(RoNEI.values[0]))
                                                                                }, ignore_index=True)
        
        # set type of threshold and ignore_duration, short_stack
        for sensor_name in sensor_names:
            difference_dict[sensor_name]["threshold"] = difference_dict[sensor_name]["threshold"].astype(int)
            difference_dict[sensor_name]["Number of Stanford Defined Events"] = difference_dict[sensor_name]["Number of Stanford Defined Events"].astype(int)
            difference_dict[sensor_name]["Number of Team Defined Events"] = difference_dict[sensor_name]["Number of Team Defined Events"].astype(int)
        
        # reorder columns
        for sensor_name in sensor_names:
            difference_dict[sensor_name] = difference_dict[sensor_name][["threshold", 
                                                                        "Number of Stanford Defined Events",
                                                                        "Number of Team Defined Events",
                                                                        "Detection Rate (%)",
                                                                        "Non-Emission Accuracy (%)",
                                                                        "Reliability of Identifications (%)",
                                                                        "Reliability of Non-Emission Identifications (%)"
                                                                        ]]
    return difference_dict

print("Reading metrics data...")
difference_dict = get_event_based_difference_for_Reliability([data_dir_stanford_defined, data_dir_team_defined], path_pattern, inputs, sensor_names)
print("Done")


Reading metrics data...
	current parameter: threshold = 1, ignore_duration = 60, short_stack = 0
		current sensor: Andium
		current sensor: Canary
		current sensor: Ecoteco
		current sensor: Kuva
		current sensor: Oiler
		current sensor: Qube
		current sensor: Sensirion
	current parameter: threshold = 2, ignore_duration = 60, short_stack = 0
		current sensor: Andium
		current sensor: Canary
		current sensor: Ecoteco
		current sensor: Kuva
		current sensor: Oiler
		current sensor: Qube
		current sensor: Sensirion
	current parameter: threshold = 4, ignore_duration = 60, short_stack = 0
		current sensor: Andium
		current sensor: Canary
		current sensor: Ecoteco
		current sensor: Kuva
		current sensor: Oiler
		current sensor: Qube
		current sensor: Sensirion
Done


# Save results

In [4]:
# for all sensors

save_columns = [
    "sensor",
    "threshold", 
    "Number of Stanford Defined Events",
    "Number of Team Defined Events",
    "Detection Rate (%)",
    "Non-Emission Accuracy (%)",
    "Reliability of Identifications (%)",
    "Reliability of Non-Emission Identifications (%)"
            ]
save_df = pd.DataFrame(columns=save_columns)
n = 0
for sensor_name in sensor_names:

    cur_df = difference_dict[sensor_name]
    for i in range(cur_df.shape[0]):
        save_df.loc[n, save_columns] = [sensor_name] + cur_df.loc[i, :].tolist()
        n += 1

if not os.path.exists(save_path):
    os.makedirs(save_path)

save_df["threshold"] = save_df["threshold"].astype(int)
save_df["Number of Stanford Defined Events"] = save_df["Number of Stanford Defined Events"].astype(int)
save_df["Number of Team Defined Events"] = save_df["Number of Team Defined Events"].astype(int)
save_df.to_csv(PurePath(save_path, "event_based_results.csv"))