# 1. Import necessary packages

In [1]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import warnings
import re
from pathlib import PurePath

# Ignore warnings
warnings.filterwarnings("ignore")

# Pre-defined parameters


- **threshold**: Represents the distance parameter used by the wind transpose model (1/2/4 times the experimental area radius).
- **duration**: Denotes the minimum event length in Stanford Defined Events that should be disregarded (30/60/120 seconds).

- **short_stack**: which means whether we only conside short head stack 

In [2]:
# ignore_duration = 30 # Ignore events with a duration of less than 30 seconds
duration = 60 # Ignore events with a duration of less than 1 minute
# ignore_duration = 120 # Ignore events with a duration of less than 2 minutes

# threshold = 1
threshold = 2
# threshold = 4

short_stack = 0
# short_stack = 1


print("Input Parameters: ")
print("ignore_duration: ", duration)
print("threshold: ", threshold)
print("short_stack: ", short_stack)

Input Parameters: 
ignore_duration:  60
threshold:  2
short_stack:  0


# 1. Load Data

## 1.1 Stanford-defined events dataset

- Stanford-Defined Dataset (Camera-Based): Categorized with Positive and Negative labels
- Stanford-Defined Dataset (Sensor-Based): Categorized as Positive or Negative

In [3]:
# 1. Stanford-Defined Events Dataset(Camera-based)
# which saved in the file: assets/events/PN/candidate_event_duration=60xseconds_[P/N/NA].csv

print("Loading data for camera-based stanford-defined events...")
camera_based_stanford_defined_events_p = pd.read_csv(
    f'../../assets/events_PN/candidate_event_duration={duration}xseconds_P.csv')
camera_based_stanford_defined_events_p["ManualDate"] = \
    camera_based_stanford_defined_events_p["ManualStartDateTime"].map(lambda x: pd.to_datetime(x).date())

camera_based_stanford_defined_events_n = pd.read_csv(
    f'../../assets/events_PN/candidate_event_duration={duration}xseconds_N.csv')
camera_based_stanford_defined_events_n["ManualDate"] = \
    camera_based_stanford_defined_events_n["ManualStartDateTime"].map(lambda x: pd.to_datetime(x).date())
camera_based_stanford_defined_events_na = pd.read_csv(
    f'../../assets/events_PN/candidate_event_duration={duration}xseconds_NA.csv')
camera_based_stanford_defined_events_na["ManualDate"] = \
    camera_based_stanford_defined_events_na["ManualStartDateTime"].map(lambda x: pd.to_datetime(x).date())


print("Loaded data for camera-based stanford-defined events")

# 2. Stanford-Defined Events Dataset(Sensor-based)
# which saved in the file: assets/events/PN/true_event_threshold=2xradius_duration=60xseconds_[P/N/NA].csv

print("Loading data for sensor-based stanford-defined events...")
sensor_based_stanford_defined_events_p = pd.read_csv(
    f'../../assets/events_PN/true_event_threshold={threshold}xradius_duration={duration}xseconds_P.csv')
sensor_based_stanford_defined_events_p["ManualDate"] = \
    sensor_based_stanford_defined_events_p["ManualStartDateTime"].map(lambda x: pd.to_datetime(x).date())
sensor_based_stanford_defined_events_n = pd.read_csv(
    f'../../assets/events_PN/true_event_threshold={threshold}xradius_duration={duration}xseconds_N.csv')
sensor_based_stanford_defined_events_n["ManualDate"] = \
    sensor_based_stanford_defined_events_n["ManualStartDateTime"].map(lambda x: pd.to_datetime(x).date())
sensor_based_stanford_defined_events_na = pd.read_csv(
    f'../../assets/events_PN/true_event_threshold={threshold}xradius_duration={duration}xseconds_NA.csv')
sensor_based_stanford_defined_events_na["ManualDate"] = \
    sensor_based_stanford_defined_events_na["ManualStartDateTime"].map(lambda x: pd.to_datetime(x).date())
print("Loaded data for sensor-based stanford-defined events")


if short_stack:
    camera_based_stanford_defined_events_p = camera_based_stanford_defined_events_p[(camera_based_stanford_defined_events_p["ManualDate"] == pd.to_datetime("2022-10-31").date()) | 
                      ((camera_based_stanford_defined_events_p["ManualDate"] >= pd.to_datetime("2022-11-15").date()) & (camera_based_stanford_defined_events_p["ManualDate"] <= pd.to_datetime("2022-11-30").date()))]
    camera_based_stanford_defined_events_n = camera_based_stanford_defined_events_n.append(camera_based_stanford_defined_events_n[(camera_based_stanford_defined_events_n["ManualDate"] == pd.to_datetime("2022-10-31").date()) |
                        ((camera_based_stanford_defined_events_n["ManualDate"] >= pd.to_datetime("2022-11-15").date()) & (camera_based_stanford_defined_events_n["ManualDate"] <= pd.to_datetime("2022-11-30").date()))])
    camera_based_stanford_defined_events_na = camera_based_stanford_defined_events_na.append(camera_based_stanford_defined_events_na[(camera_based_stanford_defined_events_na["ManualDate"] == pd.to_datetime("2022-10-31").date()) |
                        ((camera_based_stanford_defined_events_na["ManualDate"] >= pd.to_datetime("2022-11-15").date()) & (camera_based_stanford_defined_events_na["ManualDate"] <= pd.to_datetime("2022-11-30").date()))])
    camera_based_stanford_defined_events_p.reset_index(drop=True, inplace=True)
    camera_based_stanford_defined_events_n.reset_index(drop=True, inplace=True)
    camera_based_stanford_defined_events_na.reset_index(drop=True, inplace=True)
    

    sensor_based_stanford_defined_events_p = sensor_based_stanford_defined_events_p[(sensor_based_stanford_defined_events_p["ManualDate"] == pd.to_datetime("2022-10-31").date()) |
                        ((sensor_based_stanford_defined_events_p["ManualDate"] >= pd.to_datetime("2022-11-15").date()) & (sensor_based_stanford_defined_events_p["ManualDate"] <= pd.to_datetime("2022-11-30").date()))]
    sensor_based_stanford_defined_events_n = sensor_based_stanford_defined_events_n.append(sensor_based_stanford_defined_events_n[(sensor_based_stanford_defined_events_n["ManualDate"] == pd.to_datetime("2022-10-31").date()) |
                        ((sensor_based_stanford_defined_events_n["ManualDate"] >= pd.to_datetime("2022-11-15").date()) & (sensor_based_stanford_defined_events_n["ManualDate"] <= pd.to_datetime("2022-11-30").date()))])
    sensor_based_stanford_defined_events_na = sensor_based_stanford_defined_events_na.append(sensor_based_stanford_defined_events_na[(sensor_based_stanford_defined_events_na["ManualDate"] == pd.to_datetime("2022-10-31").date()) |
                        ((sensor_based_stanford_defined_events_na["ManualDate"] >= pd.to_datetime("2022-11-15").date()) & (sensor_based_stanford_defined_events_na["ManualDate"] <= pd.to_datetime("2022-11-30").date()))])
    sensor_based_stanford_defined_events_p.reset_index(drop=True, inplace=True)
    sensor_based_stanford_defined_events_n.reset_index(drop=True, inplace=True)
    sensor_based_stanford_defined_events_na.reset_index(drop=True, inplace=True)



Loading data for camera-based stanford-defined events...
Loaded data for camera-based stanford-defined events
Loading data for sensor-based stanford-defined events...
Loaded data for sensor-based stanford-defined events


## 1.2 Team-defined events dataset

- Team-Defined Events Dataset(Camera-based): Categorized with Positive, Negative, and N/A labels for camera-based operators: Andium, Oiler, Kuva
- Team-Defined Events Dataset(Sensor-based): Categorized with Positive, Negative, and N/A labels for sensor-based operators: Canary, Ecoteco, Qube, Sensirion

In [4]:
# 3. Team-Defined Events Dataset
# which stored in the file: assets/sensor_data/[sensor_name]_caliddata.csv

sensor_names = [
    "Andium", 
    "Canary",
    "Ecoteco",
    "Kuva",
    "Oiler",
    "Qube",
    "Sensirion",
    "Soofie"
]

camera_based_sensor_names = [
    "Andium", 
    "Kuva",
    "Oiler"
]
sensor_based_sensor_names = [
    "Canary",
    "Ecoteco",
    "Qube",
    "Sensirion",
    "Soofie"
]

df_team_defined_events = {}
for sn in sensor_names:
    print("Loading data for sensor: " + sn, end="\t")
    df_team_defined_events[sn] = pd.read_csv(
        '../../assets/sensor_data/' + sn + '_validdata.csv', 
        parse_dates=['EmissionStartDateTime', "EmissionEndDateTime"]
    )[["EmissionStartDateTime", "EmissionEndDateTime", "ReportLabel"]]
    print("Done.")

    if short_stack:
        df_team_defined_events[sn]["ReportDate"] = df_team_defined_events[sn]["EmissionStartDateTime"].map(lambda x: pd.to_datetime(x).date())
        df_team_defined_events[sn] = df_team_defined_events[sn][(df_team_defined_events[sn]["ReportDate"] == pd.to_datetime("2022-10-31").date()) |
                        ((df_team_defined_events[sn]["ReportDate"] >= pd.to_datetime("2022-11-15").date()) & (df_team_defined_events[sn]["ReportDate"] <= pd.to_datetime("2022-11-30").date()))]
        df_team_defined_events[sn].reset_index(drop=True, inplace=True)

Loading data for sensor: Andium	Done.
Loading data for sensor: Canary	Done.
Loading data for sensor: Ecoteco	Done.
Loading data for sensor: Kuva	Done.
Loading data for sensor: Oiler	Done.
Loading data for sensor: Qube	Done.
Loading data for sensor: Sensirion	Done.
Loading data for sensor: Soofie	Done.


# 2.1 Continuous Time Series: Stanford Defined Time Series
1. Time Range: 2022-10-10 00:00:00 to 2022-11-30 23:59:59
2. Initial Category: All timestamps set to N/A
3. Final Assignment: Timestamps categorized as Positive, Negative, or remain N/A
4. For each event in the team-defined events dataset, find its start time, end time and label, For the continuous time series: Assign timestamps that fall within the current event to the label of the current event 

In [5]:
# camera-based stanford-defined time series
camera_based_stanford_defined_events_p["ManualStartDateTime"] = pd.to_datetime(
    camera_based_stanford_defined_events_p["ManualStartDateTime"])
camera_based_stanford_defined_events_p["ManualEndDateTime"] = pd.to_datetime(
    camera_based_stanford_defined_events_p["ManualEndDateTime"])
camera_based_stanford_defined_events_n["ManualStartDateTime"] = pd.to_datetime(
    camera_based_stanford_defined_events_n["ManualStartDateTime"])
camera_based_stanford_defined_events_n["ManualEndDateTime"] = pd.to_datetime(
    camera_based_stanford_defined_events_n["ManualEndDateTime"])
camera_based_stanford_defined_events_na["ManualStartDateTime"] = pd.to_datetime(
    camera_based_stanford_defined_events_na["ManualStartDateTime"])
camera_based_stanford_defined_events_na["ManualEndDateTime"] = pd.to_datetime(
    camera_based_stanford_defined_events_na["ManualEndDateTime"])

# initialize the dataframe for camera-based stanford-defined time series, Datetime (UTC) from 2022-10-10 00:00:00 to 2022-11-30 23:59:59
# TrueLabel is set to NA defaultly
camera_based_stanford_defined_time_series = pd.DataFrame(columns=["Datetime (UTC)", "TrueLabel"])
camera_based_stanford_defined_time_series["Datetime (UTC)"] = pd.date_range(
    start="2022-10-10 00:00:00", end="2022-11-30 23:59:59", freq="S")
camera_based_stanford_defined_time_series["TrueLabel"] = "NA"


print("Generating camera-based stanford-defined time series...")
print("Generating camera-based stanford-defined time series for Positive events", end="\t")
for i in range(camera_based_stanford_defined_events_p.shape[0]):
    start = camera_based_stanford_defined_events_p.iloc[i]["ManualStartDateTime"]
    end = camera_based_stanford_defined_events_p.iloc[i]["ManualEndDateTime"]
    camera_based_stanford_defined_time_series.loc[
        (camera_based_stanford_defined_time_series["Datetime (UTC)"] >= start) & 
        (camera_based_stanford_defined_time_series["Datetime (UTC)"] <= end), 
        "TrueLabel"] = "P"
print("Done.")
print("Generating camera-based stanford-defined time series for Negative events", end="\t")
for i in range(camera_based_stanford_defined_events_n.shape[0]):
    start = camera_based_stanford_defined_events_n.iloc[i]["ManualStartDateTime"]
    end = camera_based_stanford_defined_events_n.iloc[i]["ManualEndDateTime"]
    camera_based_stanford_defined_time_series.loc[
        (camera_based_stanford_defined_time_series["Datetime (UTC)"] >= start) & 
        (camera_based_stanford_defined_time_series["Datetime (UTC)"] <= end), 
        "TrueLabel"] = "N"
print("Done.")

Generating camera-based stanford-defined time series...
Generating camera-based stanford-defined time series for Positive events	Done.
Generating camera-based stanford-defined time series for Negative events	Done.


In [6]:
# sensor-based stanford-defined time series
sensor_based_stanford_defined_events_p["ManualStartDateTime"] = pd.to_datetime(
    sensor_based_stanford_defined_events_p["ManualStartDateTime"])
sensor_based_stanford_defined_events_p["ManualEndDateTime"] = pd.to_datetime(
    sensor_based_stanford_defined_events_p["ManualEndDateTime"])
sensor_based_stanford_defined_events_n["ManualStartDateTime"] = pd.to_datetime(
    sensor_based_stanford_defined_events_n["ManualStartDateTime"])
sensor_based_stanford_defined_events_n["ManualEndDateTime"] = pd.to_datetime(
    sensor_based_stanford_defined_events_n["ManualEndDateTime"])
sensor_based_stanford_defined_events_na["ManualStartDateTime"] = pd.to_datetime(
    sensor_based_stanford_defined_events_na["ManualStartDateTime"])
sensor_based_stanford_defined_events_na["ManualEndDateTime"] = pd.to_datetime(
    sensor_based_stanford_defined_events_na["ManualEndDateTime"])


# initialize the dataframe for sensor-based stanford-defined time series, Datetime (UTC) from 2022-10-10 00:00:00 to 2022-11-30 23:59:59
# TrueLabel is set to NA defaultly
sensor_based_stanford_defined_time_series = pd.DataFrame(columns=["Datetime (UTC)", "TrueLabel"])
sensor_based_stanford_defined_time_series["Datetime (UTC)"] = pd.date_range(
    start="2022-10-10 00:00:00", end="2022-11-30 23:59:59", freq="S")
sensor_based_stanford_defined_time_series["TrueLabel"] = "NA"

print("Generating sensor-based stanford-defined time series...")
print("Generating sensor-based stanford-defined time series for Positive events", end="\t")
for i in range(sensor_based_stanford_defined_events_p.shape[0]):
    start = sensor_based_stanford_defined_events_p.iloc[i]["ManualStartDateTime"]
    end = sensor_based_stanford_defined_events_p.iloc[i]["ManualEndDateTime"]
    sensor_based_stanford_defined_time_series.loc[
        (sensor_based_stanford_defined_time_series["Datetime (UTC)"] >= start) & 
        (sensor_based_stanford_defined_time_series["Datetime (UTC)"] <= end), 
        "TrueLabel"] = "P"
print("Done.")

print("Generating sensor-based stanford-defined time series for Negative events", end="\t")
for i in range(sensor_based_stanford_defined_events_n.shape[0]):
    start = sensor_based_stanford_defined_events_n.iloc[i]["ManualStartDateTime"]
    end = sensor_based_stanford_defined_events_n.iloc[i]["ManualEndDateTime"]
    sensor_based_stanford_defined_time_series.loc[
        (sensor_based_stanford_defined_time_series["Datetime (UTC)"] >= start) & 
        (sensor_based_stanford_defined_time_series["Datetime (UTC)"] <= end), 
        "TrueLabel"] = "N"
print("Done.")

Generating sensor-based stanford-defined time series...
Generating sensor-based stanford-defined time series for Positive events	Done.
Generating sensor-based stanford-defined time series for Negative events	Done.


# 2.2 Continuous Time Series: Team Defined Time Series

In [7]:
team_defined_time_series = {}

for sn in sensor_names:

    print("Generating team-defined time series for sensor: " + sn, end="\t")
    df_team_defined_events[sn]["EmissionStartDateTime"] = pd.to_datetime(
        df_team_defined_events[sn]["EmissionStartDateTime"])
    df_team_defined_events[sn]["EmissionEndDateTime"] = pd.to_datetime(
        df_team_defined_events[sn]["EmissionEndDateTime"])
    
    current_team_defined_time_series = pd.DataFrame(columns=["Datetime (UTC)", "TrueLabel"])
    current_team_defined_time_series["Datetime (UTC)"] = pd.date_range(
        start="2022-10-10 00:00:00", end="2022-11-30 23:59:59", freq="S")
    current_team_defined_time_series["ReportLabel"] = "NA"


    for i in range(df_team_defined_events[sn].shape[0]):
        start = df_team_defined_events[sn].iloc[i]["EmissionStartDateTime"]
        end = df_team_defined_events[sn].iloc[i]["EmissionEndDateTime"]
        current_team_defined_time_series.loc[
            (current_team_defined_time_series["Datetime (UTC)"] >= start) & 
            (current_team_defined_time_series["Datetime (UTC)"] <= end), 
            "ReportLabel"] = df_team_defined_events[sn].iloc[i]["ReportLabel"]
    
    team_defined_time_series[sn] = current_team_defined_time_series
    print("Done.")

Generating team-defined time series for sensor: Andium	Done.
Generating team-defined time series for sensor: Canary	Done.
Generating team-defined time series for sensor: Ecoteco	Done.
Generating team-defined time series for sensor: Kuva	Done.
Generating team-defined time series for sensor: Oiler	Done.
Generating team-defined time series for sensor: Qube	Done.
Generating team-defined time series for sensor: Sensirion	Done.
Generating team-defined time series for sensor: Soofie	Done.


# 3. Merge Continues Time-series

Merge Stanford and Team-Defined Time Series: Create a continuous timeline from 2022-10-10 00:00:00 to 2022-11-30 23:59:59. Each timestamp has two attributes:
- GT (Ground Truth): Derived from the Stanford-defined category
- Pred (Prediction): Derived from the team-defined category

In [8]:
combine_time_series = {}

for sn in sensor_names:
    print("Combining time series for sensor: " + sn, end="\t")
    if sn in camera_based_sensor_names:
        stanford_defined_time_series = camera_based_stanford_defined_time_series
    else:
        stanford_defined_time_series = sensor_based_stanford_defined_time_series

    # Combine the team-defined time series and stanford-defined time series
    combine_time_series[sn] = pd.DataFrame(columns=["Datetime (UTC)", "GT", "Pred"])
    combine_time_series[sn]["Datetime (UTC)"] = stanford_defined_time_series["Datetime (UTC)"]
    combine_time_series[sn]["GT"] = stanford_defined_time_series["TrueLabel"]
    combine_time_series[sn]["Pred"] = team_defined_time_series[sn]["ReportLabel"]
    print("Done.")

Combining time series for sensor: Andium	Done.
Combining time series for sensor: Canary	Done.
Combining time series for sensor: Ecoteco	Done.
Combining time series for sensor: Kuva	Done.
Combining time series for sensor: Oiler	Done.
Combining time series for sensor: Qube	Done.
Combining time series for sensor: Sensirion	Done.
Combining time series for sensor: Soofie	Done.


3. Calculation of time-based metrics:
- Total sample size: The number of timestamps in the time-based sequence where GT is not N/A and Pred is not N/A, marked as Ntotal
- TP: The number of timestamps in the time-based sequence where GT is P and Pred is P, marked as NTP
- FP: The number of timestamps in the time-based sequence where GT is N and Pred is P, marked as NFP
- TN: The number of timestamps in the time-based sequence where GT is N and Pred is N, marked as NTN
- FN: The number of timestamps in the time-based sequence where GT is P and Pred is N, marked as NFN
- TP(%): NTP / Ntotal * 100 
- FP(%): NFP / Ntotal * 100 
- FN(%): NFN / Ntotal * 100 
- TN(%): NFN / Ntotal * 100 
- FPR(%): NFP / (NFP +NTN) * 100
- TNR(%): NTN / (NFP +NTN) * 100
- TPR(%): NTP / (NTP +NFN) * 100
- FNR(%): NFN / (NFN +NTP) * 100
- Accuracy(%): (NTP + NTN) / Ntotal * 100
- Precision(%): NTP/(NFP +NTP) * 100

In [9]:
# - Total sample size: The number of timestamps in the time-based sequence where GT is not N/A and Pred is not N/A, marked as Ntotal
# - TP: The number of timestamps in the time-based sequence where GT is P and Pred is P, marked as NTP
# - FP: The number of timestamps in the time-based sequence where GT is N and Pred is P, marked as NFP
# - TN: The number of timestamps in the time-based sequence where GT is N and Pred is N, marked as NTN
# - FN: The number of timestamps in the time-based sequence where GT is P and Pred is N, marked as NFN
# - TP(%): NTP / Ntotal * 100 
# - FP(%): NFP / Ntotal * 100 
# - FN(%): NFN / Ntotal * 100 
# - TN(%): NFN / Ntotal * 100 
# - FPR(%): NFP / (NFP +NTN) * 100
# - TNR(%): NTN / (NFP +NTN) * 100
# - TPR(%): NTP / (NTP +NFN) * 100
# - FNR(%): NFN / (NFN +NTP) * 100
# - Accuracy(%): (NTP + NTN) / Ntotal * 100
# - Precision(%): NTP/(NFP +NTP) * 100

metrics_df = pd.DataFrame(columns=[
    "Sensor",
    "TP(%)",
    "FP(%)",
    "FN(%)",
    "TN(%)",
    "sample_size",
    "FPR(%)",
    "TPR(%)",
    "TNR(%)",
    "FNR(%)",
    "Accuracy(%)",
    "Precision(%)"
])

print("Calculating metrics...")
for sn in sensor_names:
    print("Calculating metrics for sensor: " + sn)
    cur_time_series = combine_time_series[sn]
    total_sample_size = ((cur_time_series["GT"] != "NA") & (cur_time_series["Pred"] != "NA") &
                         (pd.notna(cur_time_series["GT"])) & (pd.notna(cur_time_series["Pred"]))
                         ).sum()
    TP = ((cur_time_series["GT"] == "P") & (cur_time_series["Pred"] == "P")).sum()
    FP = ((cur_time_series["GT"] == "N") & (cur_time_series["Pred"] == "P")).sum()
    FN = ((cur_time_series["GT"] == "P") & (cur_time_series["Pred"] == "N")).sum()
    TN = ((cur_time_series["GT"] == "N") & (cur_time_series["Pred"] == "N")).sum()

    TP_percent = TP / total_sample_size * 100
    FP_percent = FP / total_sample_size * 100
    FN_percent = FN / total_sample_size * 100
    TN_percent = TN / total_sample_size * 100

    FPR_percent = FP / (FP + TN) * 100
    TNR_percent = TN / (FP + TN) * 100
    TPR_percent = TP / (TP + FN) * 100
    FNR_percent = FN / (FN + TP) * 100

    Accuracy_percent = (TP + TN) / total_sample_size * 100
    Precision_percent = TP / (FP + TP) * 100

    print("\tTP, FP, FN, TN: ", TP, FP, FN, TN)
    print("\tTP(%), FP(%), FN(%), TN(%): ", TP_percent, FP_percent, FN_percent, TN_percent)
    print("\tFPR(%), TPR(%), TNR(%), FNR(%): ", FPR_percent, TPR_percent, TNR_percent, FNR_percent)
    print("\tAccuracy(%), Precision(%): ", Accuracy_percent, Precision_percent)

    metrics_df = metrics_df.append({
        "Sensor": sn,
        "TP(%)": "{}({:.2f})".format(TP, float(TP_percent)),
        "FP(%)": "{}({:.2f})".format(FP, float(FP_percent)),
        "FN(%)": "{}({:.2f})".format(FN, float(FN_percent)),
        "TN(%)": "{}({:.2f})".format(TN, float(TN_percent)),
        "sample_size": total_sample_size,
        "FPR(%)": "{:.2f}".format(float(FPR_percent)),
        "TPR(%)": "{:.2f}".format(float(TPR_percent)),
        "TNR(%)": "{:.2f}".format(float(TNR_percent)),
        "FNR(%)": "{:.2f}".format(float(FNR_percent)),
        "Accuracy(%)": "{:.2f}".format(float(Accuracy_percent)),
        "Precision(%)": "{:.2f}".format(float(Precision_percent))
    }, ignore_index=True)
    print("Done.")



Calculating metrics...
Calculating metrics for sensor: Andium
	TP, FP, FN, TN:  191798 4379 141025 2791778
	TP(%), FP(%), FN(%), TN(%):  6.129729176920274 0.13994975998568224 4.507059808627732 89.22326125446631
	FPR(%), TPR(%), TNR(%), FNR(%):  0.15660780135021032 57.62762789831232 99.8433921986498 42.37237210168768
	Accuracy(%), Precision(%):  95.35299043138659 97.7678321107979
Done.
Calculating metrics for sensor: Canary
	TP, FP, FN, TN:  139394 19236 5487 1190630
	TP(%), FP(%), FN(%), TN(%):  10.289301249606014 1.41989611344406 0.40502027315801403 87.8857823637919
	FPR(%), TPR(%), TNR(%), FNR(%):  1.58992814080237 96.2127539152822 98.41007185919763 3.7872460847178027
	Accuracy(%), Precision(%):  98.17508361339793 87.87366828468764
Done.
Calculating metrics for sensor: Ecoteco
	TP, FP, FN, TN:  25948 22202 241302 2349707
	TP(%), FP(%), FN(%), TN(%):  0.9831919941163074 0.8412528384989308 9.143139916920504 89.03241525046425
	FPR(%), TPR(%), TNR(%), FNR(%):  0.9360392831259546 9.709260

# 4. Save Result

In [10]:
# Save the metrics to csv file

if short_stack:
    pre_str = "ss_"
else:
    pre_str = ""
metrics_df.to_csv(f"../../results/03_DetectionAnalysis/Time-based ConfusionMatrix/{pre_str}threshold={threshold}xradius_duration={duration}xseconds.csv", index=False)


# Save match events to csv file
for sn in sensor_names:
    print("Saving match events for sensor: " + sn, end="\t")
    cur_time_series = combine_time_series[sn]
    # cur_time_series = cur_time_series[(cur_time_series["GT"] != "NA") & (cur_time_series["Pred"] != "NA")]
    # set norm_class
    cur_time_series["norm_class"] = "NA"
    cur_time_series.loc[(cur_time_series["GT"] == "P") & (cur_time_series["Pred"] == "P"), "norm_class"] = "TP"
    cur_time_series.loc[(cur_time_series["GT"] == "N") & (cur_time_series["Pred"] == "P"), "norm_class"] = "FP"
    cur_time_series.loc[(cur_time_series["GT"] == "P") & (cur_time_series["Pred"] == "N"), "norm_class"] = "FN"
    cur_time_series.loc[(cur_time_series["GT"] == "N") & (cur_time_series["Pred"] == "N"), "norm_class"] = "TN"
    cur_time_series.to_csv(f"../../results/03_DetectionAnalysis/Test-case Matching Data/Time-based Events/{pre_str}{threshold}xradius_{duration}xseconds_{sn}_match_events.csv", index=False)
    print("Done.")

Saving match events for sensor: Andium	Done.
Saving match events for sensor: Canary	Done.
Saving match events for sensor: Ecoteco	Done.
Saving match events for sensor: Kuva	Done.
Saving match events for sensor: Oiler	Done.
Saving match events for sensor: Qube	Done.
Saving match events for sensor: Sensirion	Done.
Saving match events for sensor: Soofie	Done.


## 5. Plot a heatmap for Confusion Matrix

In this section, we will use a heatmap to visualize the number of TP/FP/FN/TN for each team.

In [11]:
def plot_heatmap(ax, sensor:str, confusion_data: np.ndarray, save_path: PurePath):
    """
    This function plots the confusion matrix heatmap.
    :param sensor: sensor name
    :param confusion_data: confusion matrix data, a numpy array of shape (2, 2)
    :param save_path: save path
    """

    plt.imshow(confusion_data, cmap=plt.cm.Oranges, interpolation='nearest')
    plt.title("Confusion Matrix for {}".format(sensor), fontsize=16)
    plt.xlabel("Released", fontsize=14)
    plt.ylabel("Reported", fontsize=14)

    plt.colorbar()

    # Add x and y ticks
    plt.xticks([0, 1], ["Positive", "Negative"], fontsize=12)
    plt.yticks([0, 1], ["Positive", "Negative"], fontsize=12)

    classifier = [["TP", "FP"], ["FN", "TN"]]
    rates = confusion_data / confusion_data.sum()

    # Add text annotations
    for i in range(confusion_data.shape[0]):
        for j in range(confusion_data.shape[1]):
            plt.text(j, i, 
                    "{}\n".format(classifier[i][j]) +
                    format(confusion_data[i, j], ",") + "\n" +
                    "{:.2f}%".format(rates[i, j] * 100),
                     ha="center", va="center", color="black", fontsize=12)
    
    # Save the confusion matrix heatmap
    plt.savefig(save_path, dpi=300, box_inches='tight')
    plt.close()

save_dir = PurePath("../../results/03_DetectionAnalysis/Test-case Matching Data/Time-based Events/Heatmap/")
if not os.path.exists(save_dir):
    os.makedirs(save_dir)


for i in range(metrics_df.shape[0]):
    sn = metrics_df.loc[i, "Sensor"]
    save_path = PurePath(save_dir, f"./{pre_str}{threshold}xradius_{duration}xseconds_{sn}_heatmap.jpg")
    sensor_name = metrics_df.loc[i, "Sensor"]
    print("Plotting heatmap for {}...".format(sensor_name))

    TP = metrics_df.loc[i, "TP(%)"].split("(")[0]
    FP = metrics_df.loc[i, "FP(%)"].split("(")[0]
    FN = metrics_df.loc[i, "FN(%)"].split("(")[0]
    TN = metrics_df.loc[i, "TN(%)"].split("(")[0]
    print("statics: TP: {}, FP: {}, FN: {}, TN: {}".format(TP, FP, FN, TN))

    confusion_data = np.array([[int(TP), int(FP)], [int(FN), int(TN)]])
    fig, ax = plt.subplots(figsize=(8, 8))
    plot_heatmap(ax, sensor_name, confusion_data, save_path)
    print("Done.")

Plotting heatmap for Andium...
statics: TP: 191798, FP: 4379, FN: 141025, TN: 2791778
Done.
Plotting heatmap for Canary...
statics: TP: 139394, FP: 19236, FN: 5487, TN: 1190630
Done.
Plotting heatmap for Ecoteco...
statics: TP: 25948, FP: 22202, FN: 241302, TN: 2349707
Done.
Plotting heatmap for Kuva...
statics: TP: 267398, FP: 9048, FN: 57178, TN: 580518
Done.
Plotting heatmap for Oiler...
statics: TP: 72248, FP: 2404, FN: 57696, TN: 949495
Done.
Plotting heatmap for Qube...
statics: TP: 188323, FP: 20255, FN: 169929, TN: 2768740
Done.
Plotting heatmap for Sensirion...
statics: TP: 333671, FP: 92137, FN: 38007, TN: 3275171
Done.
Plotting heatmap for Soofie...
statics: TP: 182773, FP: 201239, FN: 29244, TN: 2115234
Done.
