In [None]:
import pandas as pd
from tqdm import tqdm
import plotly.graph_objects as go
import tsfel
import numpy as np
import mt_utils as ut
import importlib
import re

In [None]:
importlib.reload(ut)

In [None]:
main_exp_tssb = pd.read_pickle('results/zwischenergebnisse/main_experiment_tssb.pkl')
main_exp_hasc = pd.read_pickle('results/zwischenergebnisse/main_experiment_hasc.pkl')

In [None]:
from claspy.data_loader import load_tssb_dataset
from claspy.data_loader import load_has_dataset

In [None]:
tssb = load_tssb_dataset()
hasc = load_has_dataset()

In [None]:
algo_order=["EveryNth","MinMax","M4","LTTB","MinMaxLTTB","LTD"]
groups = ['Algorithm','TS_Length', 'statistical','temporal','spectral','window_size','score']

# Functions

In [None]:
def add_dataset_name_tssb(row):
    return tssb.iloc[row.orig_TS_ID].dataset

def add_dataset_name_hasc(row):
    return hasc.iloc[row.orig_TS_ID].dataset

In [None]:
def add_types_tssb(row):
    return tssb.iloc[row.orig_TS_ID].Type

def add_types_hasc(row):
    # indoor/outdoor + subject
    return re.sub(r'_routine\d+ \(id\d+\)', '', row.dataset)

In [None]:
def get_best_algo_for_type(df):    
    algo_matcher = {}
    for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing rows"):
        algo_matcher[index] = row[row == row.max()].index.tolist()
    
    return algo_matcher

In [None]:
def score_per_type_matrix(df):
    df=df[['type','Algorithm','score']].copy()
    t = df.groupby(["Algorithm","type"])["score"].mean().reset_index()
    t = t.pivot(index='Algorithm', columns='type')['score'].fillna(0)
    t = t.reindex(algo_order).T
    return t

In [None]:
def add_best_as_per_type_tssb(row):
    return 1 if row.Algorithm in algo_matcher_tssb[row.type] else 0

def add_best_as_per_type_hasc(row):
    return 1 if row.Algorithm in algo_matcher_hasc[row.type] else 0

In [None]:
# summary statistics
def score_comparison_matrix(df, savepath=None):
    mean = df.groupby(by=['Algorithm']).mean().rename(columns={'score': 'mean'})
    median = df.groupby(by=['Algorithm']).median().rename(columns={'score': 'median'})
    std = df.groupby(by=['Algorithm']).std().rename(columns={'score': 'std'})
    
    out = mean.merge(median, left_on='Algorithm', right_on='Algorithm')
    out = out.merge(std, left_on='Algorithm', right_on='Algorithm')
    out = out.transpose()
    out['mean']=out.mean(axis=1)
    if(savepath):
        out.to_excel(savepath)
    return out

In [None]:
# summary statistics
def score_comparison_matrix_all_algos(tssb_df, hasc_df, savepath=None):
    tssb_dict = {'mean':tssb_df.score.mean(), 'median':tssb_df.score.median(),'std':tssb_df.score.std()}
    tssbs = pd.Series(tssb_dict, name='TSSB')

    hasc_dict = {'mean':hasc_df.score.mean(), 'median':hasc_df.score.median(),'std':hasc_df.score.std()}
    hascs = pd.Series(hasc_dict, name='HASC')

    out=pd.DataFrame([tssbs,hascs]).transpose()
    
    if(savepath):
        out.to_excel(savepath)
    return out

# Feature Selection and Grouping

## Features

In [None]:
domains = ['statistical','temporal','spectral']
for domain in domains:
    features = ut.get_significant_features(list(main_exp_tssb.Time_Series),domain)
    mean_features = np.mean(features, axis=1)
    main_exp_tssb[domain]=mean_features

## Grouping

### TSSB

In [None]:
tssb_ref = pd.read_csv('datasets/DataSummary.csv', index_col=0)
tssb=pd.merge(tssb, tssb_ref[['Type','Name']], left_on="dataset", right_on='Name', how="left", sort=False).drop('Name', axis=1)

In [None]:
main_exp_tssb['dataset']=main_exp_tssb.apply(add_dataset_name_tssb, axis=1)
main_exp_tssb['type']=main_exp_tssb.apply(add_types_tssb, axis=1)

### HASC

In [None]:
main_exp_hasc['dataset']=main_exp_hasc.apply(add_dataset_name_hasc, axis=1)
main_exp_hasc['type']=main_exp_hasc.apply(add_types_hasc, axis=1)

# Results

## TSSB

In [None]:
score_matrix_tssb=score_per_type_matrix(main_exp_tssb[['type','Algorithm','score']])
score_matrix_tssb

In [None]:
algo_matcher_tssb=get_best_algo_for_type(score_matrix_tssb)
algo_matcher_tssb

In [None]:
main_exp_tssb['is_best'] = main_exp_tssb.apply(add_best_as_per_type_tssb, axis=1)
best_for_type_tssb=main_exp_tssb[main_exp_tssb.is_best==1]

In [None]:
score_comparison_matrix(best_for_type_tssb[['Algorithm','score']])

## HASC

In [None]:
score_matrix_hasc=score_per_type_matrix(main_exp_hasc[['type','Algorithm','score']])
score_matrix_hasc

In [None]:
algo_matcher_hasc=get_best_algo_for_type(score_matrix_hasc)
algo_matcher_hasc

In [None]:
main_exp_hasc['is_best'] = main_exp_hasc.apply(add_best_as_per_type_hasc, axis=1)
best_for_type_hasc=main_exp_hasc[main_exp_hasc.is_best==1]

In [None]:
score_comparison_matrix(best_for_type_hasc[['Algorithm','score']])

## Summary

In [None]:
score_comparison_matrix_all_algos(best_for_type_tssb,best_for_type_hasc,savepath='results/tables/best_algo_per_type_summary.xlsx')