In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import re
import itertools

In [2]:
# Load results.csv file in results folder
path = Path.cwd().joinpath('results', 'result.csv')
column = ['Date_Time', 'Dataset', 'Epoch', 'Slide_Win', 'FC_Layer_Num', 'Topk']

df = pd.read_csv(path)
df

Unnamed: 0,Date_Time,Dataset,Epoch,Slide_Win,Dim,Slide_Stride,FC_Layer_Num,FC_Dim,Decay,Val_Ratio,Topk,F1 Score,Precision,Recall,Confusion Matrix,Anomalies
0,"28-Sep-2022, 12:20:28",anomaly,100,30,64,2,2,128,0.10,0.1,2,0.059182,0.032661,0.317757,"[[33604, 1007], [73, 34]]",0.0031
1,"28-Sep-2022, 12:52:18",anomaly,100,30,64,2,2,128,0.10,0.1,20,0.081119,0.047776,0.271028,"[[34033, 578], [78, 29]]",0.0031
2,"28-Sep-2022, 13:24:07",anomaly,100,30,64,2,2,128,0.10,0.1,25,0.079618,0.048077,0.233645,"[[34116, 495], [82, 25]]",0.0031
3,"28-Sep-2022, 13:56:01",anomaly,100,30,64,2,2,128,0.01,0.1,2,0.060160,0.032421,0.420561,"[[33268, 1343], [62, 45]]",0.0031
4,"28-Sep-2022, 14:27:02",anomaly,100,30,64,2,2,128,0.01,0.1,20,0.054422,0.029630,0.336449,"[[33432, 1179], [71, 36]]",0.0031
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1629,"26-Oct-2022, 14:00:51",swat_ori_all_90,100,5,64,1,3,128,0.00,0.1,10,0.487855,0.371516,0.710288,"[[296512, 59168], [14266, 34976]]",0.1216
1630,"26-Oct-2022, 14:09:43",swat_ori_all_90,100,5,64,1,2,128,0.00,0.1,20,0.784041,0.987960,0.649913,"[[355290, 390], [17239, 32003]]",0.1216
1631,"26-Oct-2022, 21:06:23",swat_ori_all_90,100,5,64,1,3,128,0.00,0.1,20,0.836949,0.983920,0.728159,"[[355094, 586], [13386, 35856]]",0.1216
1632,"27-Oct-2022, 02:48:12",swat_ori_all_90,100,5,64,1,3,128,0.00,0.1,5,0.503018,0.390539,0.706511,"[[301388, 54292], [14452, 34790]]",0.1216


In [3]:
# Select only Date_Time, Dataset, Epoch, Slide_Win, FC_Layer_Num and Topk
df1 = pd.DataFrame(df, columns=column)
df1

Unnamed: 0,Date_Time,Dataset,Epoch,Slide_Win,FC_Layer_Num,Topk
0,"28-Sep-2022, 12:20:28",anomaly,100,30,2,2
1,"28-Sep-2022, 12:52:18",anomaly,100,30,2,20
2,"28-Sep-2022, 13:24:07",anomaly,100,30,2,25
3,"28-Sep-2022, 13:56:01",anomaly,100,30,2,2
4,"28-Sep-2022, 14:27:02",anomaly,100,30,2,20
...,...,...,...,...,...,...
1629,"26-Oct-2022, 14:00:51",swat_ori_all_90,100,5,3,10
1630,"26-Oct-2022, 14:09:43",swat_ori_all_90,100,5,2,20
1631,"26-Oct-2022, 21:06:23",swat_ori_all_90,100,5,3,20
1632,"27-Oct-2022, 02:48:12",swat_ori_all_90,100,5,3,5


In [4]:
# Select swat_ori datasets with epochs >= 150 or 
# smap_m datasets with epochs = 500 or 
# msl_m datasets with epochs = 500

smap_msl = re.compile('msl_m.*|smap_m.*')
swat = re.compile('swat_ori.*')
temp = []
sort_col = ['Dataset', 'Epoch', 'Slide_Win', 'FC_Layer_Num', 'Topk']

for i in range(len(df1)):
    dataset = df1.loc[i, 'Dataset']
    epoch = df1.loc[i, 'Epoch']
            
    if (smap_msl.match(dataset) and epoch == 500) or (swat.match(dataset) and epoch >= 150):
        temp.append(df1.loc[i, :])

df2 = pd.DataFrame(temp).sort_values(by=sort_col).reset_index(drop=True)
df2

Unnamed: 0,Date_Time,Dataset,Epoch,Slide_Win,FC_Layer_Num,Topk
0,"12-Oct-2022, 10:21:17",msl_m,500,5,2,5
1,"13-Oct-2022, 21:42:45",msl_m,500,5,2,10
2,"15-Oct-2022, 20:48:49",msl_m,500,5,2,20
3,"18-Oct-2022, 12:50:30",msl_m,500,5,3,5
4,"19-Oct-2022, 08:36:25",msl_m,500,5,3,10
...,...,...,...,...,...,...
332,"22-Oct-2022, 03:22:45",swat_ori_90,150,20,2,10
333,"23-Oct-2022, 15:11:49",swat_ori_90,150,20,2,20
334,"20-Oct-2022, 01:26:37",swat_ori_90,150,20,3,5
335,"20-Oct-2022, 17:16:59",swat_ori_90,150,20,3,10


In [5]:
combi_list = []   # combo_list to save win, layer and topk combination for respective dataset
combi_dict = {}   # combo_dict to store completed sample for respective dataset

for i in range(len(df2)):
    dataset = df2.loc[i, 'Dataset']
    win = df2.loc[i, 'Slide_Win']
    layer = df2.loc[i, 'FC_Layer_Num']
    topk = df2.loc[i, 'Topk']
    combi = (win, layer, topk)
    
    combi_list.append(combi)
    
    if dataset in combi_dict.keys():
        combi_dict[dataset].append(combi)
    else:
        combi_dict[dataset] = [combi]
        
df2['combi'] = combi_list
df2
    

Unnamed: 0,Date_Time,Dataset,Epoch,Slide_Win,FC_Layer_Num,Topk,combi
0,"12-Oct-2022, 10:21:17",msl_m,500,5,2,5,"(5, 2, 5)"
1,"13-Oct-2022, 21:42:45",msl_m,500,5,2,10,"(5, 2, 10)"
2,"15-Oct-2022, 20:48:49",msl_m,500,5,2,20,"(5, 2, 20)"
3,"18-Oct-2022, 12:50:30",msl_m,500,5,3,5,"(5, 3, 5)"
4,"19-Oct-2022, 08:36:25",msl_m,500,5,3,10,"(5, 3, 10)"
...,...,...,...,...,...,...,...
332,"22-Oct-2022, 03:22:45",swat_ori_90,150,20,2,10,"(20, 2, 10)"
333,"23-Oct-2022, 15:11:49",swat_ori_90,150,20,2,20,"(20, 2, 20)"
334,"20-Oct-2022, 01:26:37",swat_ori_90,150,20,3,5,"(20, 3, 5)"
335,"20-Oct-2022, 17:16:59",swat_ori_90,150,20,3,10,"(20, 3, 10)"


In [6]:
# Select unique combi and sort in ascending order
for dataset in combi_dict.keys():
    combi_dict[dataset] = sorted(list(set(combi_dict[dataset])))
    
combi_dict

{'msl_m': [(5, 2, 5),
  (5, 2, 10),
  (5, 2, 20),
  (5, 3, 5),
  (5, 3, 10),
  (5, 3, 20),
  (10, 2, 5),
  (10, 2, 10),
  (10, 2, 20),
  (10, 3, 5),
  (10, 3, 10),
  (10, 3, 20),
  (20, 2, 5),
  (20, 2, 10),
  (20, 2, 20),
  (20, 3, 5),
  (20, 3, 10),
  (20, 3, 20)],
 'msl_m_10': [(5, 2, 5),
  (5, 2, 10),
  (5, 2, 20),
  (5, 3, 5),
  (5, 3, 10),
  (5, 3, 20),
  (10, 2, 5),
  (10, 2, 10),
  (10, 2, 20),
  (10, 3, 5),
  (10, 3, 10),
  (10, 3, 20),
  (20, 2, 5),
  (20, 2, 10),
  (20, 2, 20),
  (20, 3, 5),
  (20, 3, 10),
  (20, 3, 20)],
 'msl_m_30': [(5, 2, 5),
  (5, 2, 10),
  (5, 2, 20),
  (5, 3, 5),
  (5, 3, 10),
  (5, 3, 20),
  (10, 2, 5),
  (10, 2, 10),
  (10, 2, 20),
  (10, 3, 5),
  (10, 3, 10),
  (10, 3, 20),
  (20, 2, 5),
  (20, 2, 10),
  (20, 2, 20),
  (20, 3, 5),
  (20, 3, 10),
  (20, 3, 20)],
 'msl_m_50': [(5, 2, 5),
  (5, 2, 10),
  (5, 2, 20),
  (5, 3, 5),
  (5, 3, 10),
  (5, 3, 20),
  (10, 2, 5),
  (10, 2, 10),
  (10, 2, 20),
  (10, 3, 5),
  (10, 3, 10),
  (10, 3, 20),
  (20, 2

In [7]:
win = [5, 10, 20]
layer = [2, 3]
topk = [5, 10, 20]
stat = {}

# Iterate through respective dataset
for k, v in combi_dict.items():
    
    # Compare required combinations with completed combinations
    for exp in itertools.product(win, layer, topk):
        status = 1 if exp in v else 0
        
        if k in stat.keys():
            stat[k].append(status)
        else:
            stat[k] = [status]

stat

{'msl_m': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
 'msl_m_10': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
 'msl_m_30': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
 'msl_m_50': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
 'msl_m_70': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
 'msl_m_90': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
 'smap_m': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
 'smap_m_10': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
 'smap_m_30': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
 'smap_m_50': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
 'smap_m_60': [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'smap_m_70': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
 'smap_m_90': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
 'swat_ori': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
 'swat_ori_10': [1, 1, 1, 1, 1, 1

In [8]:
df_stat = pd.DataFrame(stat, index=list(itertools.product(win, layer, topk)))
df_stat.drop(columns=['smap_m_60'], inplace=True)
df_stat.to_csv('stat.csv')
df_stat

Unnamed: 0,msl_m,msl_m_10,msl_m_30,msl_m_50,msl_m_70,msl_m_90,smap_m,smap_m_10,smap_m_30,smap_m_50,smap_m_70,smap_m_90,swat_ori,swat_ori_10,swat_ori_30,swat_ori_50,swat_ori_70,swat_ori_90
"(5, 2, 5)",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
"(5, 2, 10)",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
"(5, 2, 20)",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
"(5, 3, 5)",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
"(5, 3, 10)",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
"(5, 3, 20)",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
"(10, 2, 5)",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
"(10, 2, 10)",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
"(10, 2, 20)",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
"(10, 3, 5)",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1


In [9]:
combi_dict['smap_m_70']

[(5, 2, 5),
 (5, 2, 10),
 (5, 2, 20),
 (5, 3, 5),
 (5, 3, 10),
 (5, 3, 20),
 (10, 2, 5),
 (10, 2, 10),
 (10, 2, 20),
 (10, 3, 5),
 (10, 3, 10),
 (10, 3, 20),
 (20, 2, 5),
 (20, 2, 10),
 (20, 2, 20),
 (20, 3, 5),
 (20, 3, 10),
 (20, 3, 20)]