In [1]:
import pandas as pd
from tqdm import tqdm
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = "plotly_mimetype"
import tsfel
import numpy as np
import mt_utils as ut
import importlib
import re

In [2]:
importlib.reload(ut)

<module 'mt_utils' from 'E:\\Melli\\HU\\Masterarbeit Save\\Implementation\\mt_utils.py'>

In [11]:
main_exp_tssb = pd.read_pickle('results/zwischenergebnisse/main_experiment_tssb.pkl')
main_exp_hasc = pd.read_pickle('results/zwischenergebnisse/main_experiment_hasc.pkl')

In [13]:
from claspy.data_loader import load_tssb_dataset
from claspy.data_loader import load_has_dataset

In [14]:
tssb = load_tssb_dataset()
hasc = load_has_dataset()

In [3]:
algo_order=["EveryNth","MinMax","M4","LTTB","MinMaxLTTB","LTD"]
groups = ['Algorithm','TS_Length', 'statistical','temporal','spectral','window_size','score']

# Functions

In [4]:
def add_dataset_name_tssb(row):
    return tssb.iloc[row.orig_TS_ID].dataset

def add_dataset_name_hasc(row):
    return hasc.iloc[row.orig_TS_ID].dataset

In [5]:
def add_types_tssb(row):
    return tssb.iloc[row.orig_TS_ID].Type

def add_types_hasc(row):
    # indoor/outdoor + subject
    return re.sub(r'_routine\d+ \(id\d+\)', '', row.dataset)

In [6]:
def get_best_algo_for_type(df):    
    algo_matcher = {}
    for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing rows"):
        algo_matcher[index] = row[row == row.max()].index.tolist()
    
    return algo_matcher

In [7]:
def score_per_type_matrix(df):
    df=df[['type','Algorithm','score']].copy()
    t = df.groupby(["Algorithm","type"])["score"].mean().reset_index()
    t = t.pivot(index='Algorithm', columns='type')['score'].fillna(0)
    t = t.reindex(algo_order).T
    return t

In [8]:
def add_best_as_per_type_tssb(row):
    return 1 if row.Algorithm in algo_matcher_tssb[row.type] else 0

def add_best_as_per_type_hasc(row):
    return 1 if row.Algorithm in algo_matcher_hasc[row.type] else 0

In [9]:
# summary statistics
def score_comparison_matrix(df, savepath=None):
    mean = df.groupby(by=['Algorithm']).mean().rename(columns={'score': 'mean'})
    median = df.groupby(by=['Algorithm']).median().rename(columns={'score': 'median'})
    std = df.groupby(by=['Algorithm']).std().rename(columns={'score': 'std'})
    
    out = mean.merge(median, left_on='Algorithm', right_on='Algorithm')
    out = out.merge(std, left_on='Algorithm', right_on='Algorithm')
    out = out.transpose()
    out['mean']=out.mean(axis=1)
    if(savepath):
        out.to_excel(savepath)
    return out

In [10]:
# summary statistics
def score_comparison_matrix_all_algos(tssb_df, hasc_df, savepath=None):
    tssb_dict = {'mean':tssb_df.score.mean(), 'median':tssb_df.score.median(),'std':tssb_df.score.std()}
    tssbs = pd.Series(tssb_dict, name='TSSB')

    hasc_dict = {'mean':hasc_df.score.mean(), 'median':hasc_df.score.median(),'std':hasc_df.score.std()}
    hascs = pd.Series(hasc_dict, name='HASC')

    out=pd.DataFrame([tssbs,hascs]).transpose()
    
    if(savepath):
        out.to_excel(savepath)
    return out

# Grouping

## TSSB

In [16]:
tssb_ref = pd.read_csv('datasets/DataSummary.csv', index_col=0)
tssb=pd.merge(tssb, tssb_ref[['Type','Name']], left_on="dataset", right_on='Name', how="left", sort=False).drop('Name', axis=1)

In [17]:
main_exp_tssb['dataset']=main_exp_tssb.apply(add_dataset_name_tssb, axis=1)
main_exp_tssb['type']=main_exp_tssb.apply(add_types_tssb, axis=1)

## HASC

In [18]:
main_exp_hasc['dataset']=main_exp_hasc.apply(add_dataset_name_hasc, axis=1)
main_exp_hasc['type']=main_exp_hasc.apply(add_types_hasc, axis=1)

# Results

## TSSB

In [19]:
score_matrix_tssb=score_per_type_matrix(main_exp_tssb[['type','Algorithm','score']])
score_matrix_tssb

Algorithm,EveryNth,MinMax,M4,LTTB,MinMaxLTTB,LTD
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Device,0.4165,0.565,0.965,0.5525,0.5525,0.747
ECG,0.8812,0.7014,0.64,0.7682,0.7682,0.7672
EOG,0.6185,0.529,0.683,0.4995,0.4995,0.4975
Image,0.860185,0.761,0.66663,0.818926,0.818481,0.829407
Motion,0.748167,0.715417,0.600333,0.777667,0.777667,0.702333
Sensor,0.775,0.750143,0.663143,0.712571,0.712571,0.81
Simulated,0.8446,0.842,0.8456,0.9016,0.9016,0.942
Spectro,0.6115,0.541833,0.546167,0.524333,0.524333,0.494833
Traffic,0.752,0.71,0.7085,0.8585,0.8585,0.826


In [20]:
algo_matcher_tssb=get_best_algo_for_type(score_matrix_tssb)
algo_matcher_tssb

Processing rows: 100%|█████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 3008.11it/s]


{'Device': ['M4'],
 'ECG': ['EveryNth'],
 'EOG': ['M4'],
 'Image': ['EveryNth'],
 'Motion': ['LTTB', 'MinMaxLTTB'],
 'Sensor': ['LTD'],
 'Simulated': ['LTD'],
 'Spectro': ['EveryNth'],
 'Traffic': ['LTTB', 'MinMaxLTTB']}

In [21]:
main_exp_tssb['is_best'] = main_exp_tssb.apply(add_best_as_per_type_tssb, axis=1)
best_for_type_tssb=main_exp_tssb[main_exp_tssb.is_best==1]

In [22]:
score_comparison_matrix(best_for_type_tssb[['Algorithm','score']])

Algorithm,EveryNth,LTD,LTTB,M4,MinMaxLTTB,mean
mean,0.823684,0.844737,0.789214,0.824,0.789214,0.81417
median,0.9635,0.932,0.851,0.841,0.851,0.8877
std,0.236314,0.163158,0.219317,0.167376,0.219317,0.201096


## HASC

In [23]:
score_matrix_hasc=score_per_type_matrix(main_exp_hasc[['type','Algorithm','score']])
score_matrix_hasc

Algorithm,EveryNth,MinMax,M4,LTTB,MinMaxLTTB,LTD
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
indoor_subject1,0.609231,0.560385,0.508462,0.576538,0.576538,0.531462
indoor_subject2,0.610625,0.606875,0.606375,0.549375,0.549375,0.636375
indoor_subject3,0.719636,0.716409,0.657136,0.671682,0.671682,0.647318
indoor_subject4,0.723632,0.758316,0.722947,0.720263,0.720263,0.666895
indoor_subject5,0.769333,0.820667,0.838,0.7505,0.7505,0.824333
indoor_subject6,0.830625,0.732375,0.774,0.756875,0.756875,0.78375
indoor_subject7,0.697368,0.705632,0.653684,0.694053,0.694053,0.694895
indoor_subject8,0.796545,0.829364,0.686273,0.704818,0.704818,0.697818
outdoor_subject1,0.598636,0.601273,0.626364,0.616,0.616,0.626545
outdoor_subject2,0.7515,0.743889,0.761778,0.790111,0.790111,0.789056


In [24]:
algo_matcher_hasc=get_best_algo_for_type(score_matrix_hasc)
algo_matcher_hasc

Processing rows: 100%|███████████████████████████████████████████████████████████████| 15/15 [00:00<00:00, 5013.51it/s]


{'indoor_subject1': ['EveryNth'],
 'indoor_subject2': ['LTD'],
 'indoor_subject3': ['EveryNth'],
 'indoor_subject4': ['MinMax'],
 'indoor_subject5': ['M4'],
 'indoor_subject6': ['EveryNth'],
 'indoor_subject7': ['MinMax'],
 'indoor_subject8': ['MinMax'],
 'outdoor_subject1': ['LTD'],
 'outdoor_subject2': ['LTTB', 'MinMaxLTTB'],
 'outdoor_subject3': ['LTTB', 'MinMaxLTTB'],
 'outdoor_subject4': ['M4'],
 'outdoor_subject5': ['LTD'],
 'outdoor_subject6': ['LTTB', 'MinMaxLTTB'],
 'outdoor_subject7': ['M4']}

In [25]:
main_exp_hasc['is_best'] = main_exp_hasc.apply(add_best_as_per_type_hasc, axis=1)
best_for_type_hasc=main_exp_hasc[main_exp_hasc.is_best==1]

In [26]:
score_comparison_matrix(best_for_type_hasc[['Algorithm','score']])

Algorithm,EveryNth,LTD,LTTB,M4,MinMax,MinMaxLTTB,mean
mean,0.706907,0.711927,0.769265,0.783241,0.753837,0.769265,0.749074
median,0.782,0.785,0.859,0.8355,0.749,0.859,0.811583
std,0.209905,0.237409,0.215415,0.207378,0.197394,0.215415,0.213819


## Summary

In [27]:
score_comparison_matrix_all_algos(best_for_type_tssb,best_for_type_hasc,savepath='results/tables/best_algo_per_type_summary.xlsx')

Unnamed: 0,TSSB,HASC
mean,0.817348,0.749746
median,0.941,0.8
std,0.211009,0.214842
