# TimeEval analysis of algorithm failures

In [1]:
# Automatically reload packages:
%load_ext autoreload
%autoreload 2

In [2]:
# imports
import warnings
import pandas as pd
import numpy as np
import scipy as sp
import plotly.offline as py
import plotly.graph_objects as go
import plotly.figure_factory as ff
import plotly.express as px
from plotly.subplots import make_subplots
from pathlib import Path
from timeeval import Datasets

In [3]:
# constants and configuration
data_path = Path("../data") / "test-cases"
result_root_path = Path("../results")
result_paths = [d for d in result_root_path.iterdir() if d.is_dir()]
result_path = result_root_path / "2021-09-22_default-params-1&2&3-merged"
#result_path = result_paths[-1]

# load results
result_path = result_path / "results"
print(f"Reading results from {result_path.resolve()}")

df = pd.read_csv(result_path / "results.csv")
df["dataset_name"] = df["dataset"].str.split(".").str[0]

def load_scores_df(algorithm_name, dataset_id, repetition=1):
    params_id = df.loc[(df["algorithm"] == algorithm_name) & (df["collection"] == dataset_id[0]) & (df["dataset"] == dataset_id[1]), "hyper_params_id"].item()
    path = (
        result_path /
        algorithm_name /
        params_id /
        dataset_id[0] /
        dataset_id[1] /
        str(repetition) /
        "anomaly_scores.ts"
    )
    return pd.read_csv(path, header=None)

# load dataset metadata
dmgr = Datasets(data_path)

def plot_scores(algorithm_name, dataset_name):
    if isinstance(algorithm_name, str):
        algorithms = [algorithm_name]
    else:
        algorithms = algorithm_name
    # construct dataset ID
    dataset_id = ("GutenTAG", f"{dataset_name}.unsupervised")

    # load dataset details
    df_dataset = dmgr.get_dataset_df(dataset_id)

    # check if dataset is multivariate
    dataset_dim = df.loc[df["dataset_name"] == dataset_name, "dataset_input_dimensionality"].unique().item()
    dataset_dim = dataset_dim.lower()
    
    auroc = {}
    df_scores = pd.DataFrame(index=df_dataset.index)
    skip_algos = []
    for algo in algorithms:
        # get algorithm metric results
        try:
            auroc[algo] = df.loc[(df["algorithm"] == algo) & (df["dataset_name"] == dataset_name), "ROC_AUC"].item()
        except ValueError:
            warnings.warn(f"No scores found! Probably {algo} was not executed on {dataset_name}.")
            auroc[algo] = -1
            skip_algos.append(algo)
            continue

        # load scores
        training_type = df.loc[df["algorithm"] == algo, "algo_training_type"].values[0].lower().replace("_", "-")
        try:
            df_scores[algo] = load_scores_df(algo, ("GutenTAG", f"{dataset_name}.{training_type}")).iloc[:, 0]
        except (ValueError, FileNotFoundError):
            warnings.warn(f"No scores found! Probably {algo} was not executed on {dataset_name}.")
            df_scores[algo] = np.nan
            skip_algos.append(algo)
    algorithms = [a for a in algorithms if a not in skip_algos]

    # Create plot
    fig = make_subplots(2, 1)
    if dataset_dim == "multivariate":
        for i in range(1, df_dataset.shape[1]-1):
            fig.add_trace(go.Scatter(x=df_dataset.index, y=df_dataset.iloc[:, i], name=f"channel-{i}"), 1, 1)
    else:
        fig.add_trace(go.Scatter(x=df_dataset.index, y=df_dataset.iloc[:, 1], name="timeseries"), 1, 1)
    fig.add_trace(go.Scatter(x=df_dataset.index, y=df_dataset["is_anomaly"], name="label"), 2, 1)
    for algo in algorithms:
        fig.add_trace(go.Scatter(x=df_scores.index, y=df_scores[algo], name=f"{algo}={auroc[algo]:.4f}"), 2, 1)
    fig.update_xaxes(matches="x")
    fig.update_layout(
        title=f"Results of {','.join(algorithms)} on {dataset_name}",
        height=400
    )
    return py.iplot(fig)

def plot_datasets(datasets, max_channels = 20):
    if isinstance(datasets, str):
        datasets = [datasets]
    else:
        datasets = datasets
    n_datasets = len(datasets)
    
    # Create plot
    fig = make_subplots(n_datasets, 1)
    for i, d in enumerate(datasets):
        # construct dataset ID
        dataset_id = ("GutenTAG", f"{d}.unsupervised")
        
        # load dataset details
        try:
            df_dataset = dmgr.get_dataset_df(dataset_id)
        except Exception as e:
            warnings.warn(f"Could not load dataset {d}, because {repr(e)}")
            continue

        # get algorithm metric results
        try:
            auroc = df.loc[df["dataset_name"] == d, "ROC_AUC"].median()
        except ValueError:
            warnings.warn(f"No scores found for dataset {d} found!")
            auroc = -1
            continue

        for j in range(1, min(df_dataset.shape[1]-1, max_channels+1)):
            fig.add_trace(go.Scatter(
                x=df_dataset.index,
                y=df_dataset.iloc[:, j],
                name=f"{d} channel {j} ({auroc:.4f})",
            ), i+1, 1)

        # mark anomaly regions
        s = df_dataset["is_anomaly"].diff()
        anomaly_regions = zip(s[s== 1].index, s[s == -1].index)
        for s, e in anomaly_regions:
            fig.add_vrect(x0=s-1, x1=e,
                          exclude_empty_subplots=True,
                          line_width=0,
                          fillcolor="red",
                          opacity=0.3,
                          annotation_text="anomaly",
                          annotation_position="top left",
                          row=i+1,
                          col=1)

#     fig.update_xaxes(matches="x")
    fig.update_layout(
        title=f"Datasets and ground truth of {','.join(datasets)} datasets",
        height=200*n_datasets if n_datasets > 1 else 400
    )
    return py.iplot(fig)

Reading results from /c/Users/sebastian.schmidl/Documents/Projects/timeeval/results/2021-07-21_default-params/results


## Algorithm status

In [None]:
df_error_counts = df.pivot_table(index=["algo_training_type", "algorithm"], columns=["status"], values="repetition", aggfunc="count")
df_error_counts = df_error_counts.fillna(value=0).astype(np.int64)
for tpe in ["SEMI_SUPERVISED", "SUPERVISED", "UNSUPERVISED"]:
    print(tpe)
    py.iplot(ff.create_table(df_error_counts.loc[tpe], index=True))

### Overview and status tracking

- `DeepAnT`: Most errors due to missing post-processing function
  - --> added post-processing function
- `DeepNAP`: Likely the training of a single epoch took longer than 2 hrs, so no `model.pkl`-file was stored during training. Wrong post-processing function (5).
  - --> fixed post-processing function
  - --> changed docker adapter to (hopefully) record the logs of docker container even in the case of a timeout
  - --> changed docker adapter to check for existing `model.pkl`-file; if not: raise timeout error anyway
- `EncDec-AD`: Checkpoint did not include scoring component state and wrong post-processing function
  - --> fixed post-processing function
  - --> fixed model checkpointing
- `Hybrid-KNN`: wrong parameter in post-processing function (`view()`-error already fixed)
  - --> fixed post-processing function
- `ImageEmbeddingCAE`: ?
  ```
  Traceback (most recent call last):
    File "/app/algorithm.py", line 85, in <module>
      train(config)
    File "/app/algorithm.py", line 51, in train
      model = CAE(**parameters)
    File "/app/img_embedding_cae/cae.py", line 122, in __init__
      self.encoder = Encoder(self.conv_spec, self.activation)
    File "/app/img_embedding_cae/cae.py", line 32, in __init__
      stride=size),
    File "/usr/local/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 412, in __init__
      False, _pair(0), groups, bias, padding_mode)
    File "/usr/local/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 52, in __init__
      if out_channels % groups != 0:
  TypeError: not all arguments converted during string formatting
  ```
- `LSTM-AD`: Checkpoint did not include scoring component state and wrong post-processing function
  - --> fixed post-processing function
  - --> fixed model checkpointing
- `LaserDBN`: most (124) errors are Inf or NaN errors during metric calculation, some exit code 139 (segfault) during train (no error logs)
- `MSCRED`: most errors due to MSCRED not finishing a single epoch during training (missing model file); when training successful (69), exit code 139 (segfault) during execute
- `MTAD-GAT`: wrong post-processing function (54), some (121) failures due to missing model (unfinished training), some (18) failures due to exit code 137 (killed by linux OOM killer) during execute
  - --> fixed post-processing function
- `OmniAnomaly`: no model checkpointing and wrong parameter in post-processing function
  - --> added model checkpointing
  - --> fixed post-processing function
- `TARZAN`:
  - bug for datasets i.a. `cbf-combined-diff-1`, `cbf-position-end`
  ```
  Traceback (most recent call last):
    File "/app/algorithm.py", line 68, in <module>
      execute(args)
    File "/app/algorithm.py", line 56, in execute
      scores = TARZAN.TARZAN(train_data, data, args.customParameters.anomaly_window_size, args.customParameters.alphabet_size)
    File "/app/tarzan/TARZAN.py", line 25, in TARZAN
      Rtree.root._annotate_nodes()
    File "/app/tarzan/TARZAN.py", line 177, in _annotate_nodes
      self.frequency = len(self._get_leaves())
    [...]
    File "/app/tarzan/TARZAN.py", line 159, in _get_leaves
      return [x for (n,_) in self.child for x in n._get_leaves()]
    File "/app/tarzan/TARZAN.py", line 159, in <listcomp>
      return [x for (n,_) in self.child for x in n._get_leaves()]
    File "/app/tarzan/TARZAN.py", line 156, in _get_leaves
      if self.is_leaf():
    File "/app/tarzan/TARZAN.py", line 150, in is_leaf
      return self.child == []
  RecursionError: maximum recursion depth exceeded in comparison
  ```
  - bug for datasets i.a. `sinus-same-count-1`, `ecg-combined-diff-2`
  ```
  Traceback (most recent call last):
    File "/app/algorithm.py", line 68, in <module>
      execute(args)
    File "/app/algorithm.py", line 56, in execute
      scores = TARZAN.TARZAN(train_data, data, args.customParameters.anomaly_window_size, args.customParameters.alphabet_size)
    File "/app/tarzan/TARZAN.py", line 36, in TARZAN
      E = compute_expectation(w, R, X, Rtree)
    File "/app/tarzan/TARZAN.py", line 82, in compute_expectation
      E = alpha(m) * ((num+1e-10)/(den+1e-10)) # may divide by zero?
  OverflowError: int too large to convert to float
  ```
- `TAnoGan`: wrong post-processing function (79), else: no model-file, because not a single epoch finished during train
  - --> fixed post-processing function
- `Telemanom`: wrong post-processing function
  - --> fixed post-processing function
- `HIF`: [akita/timeeval-algorithms!34](https://gitlab.hpi.de/akita/timeeval-algorithms/-/issues/34)
- `MultiHMM`: deserialization (of model) error, when distribution could not be fit to data correctly (_do not fix_)
- `Normalizing Flows`: most (111) due to not finishing a single epoch during training (missing model file), some (11) failures due to exit code 137 (killed by linux OOM killer) during train, rest (8) due to 0 zero val los' `ZeroDivisionError`
- `Isolation Forest - Local Outier Factor`: typo in name! errors due to non-absolute outlier coefficient calculation
  - --> fixed coefficient calculation
- `TripleES`: some (4) errors are Inf or NaN errors during metric calculation (already fixed), rest due to [akita/timeeval-algorithms!29](https://gitlab.hpi.de/akita/timeeval-algorithms/-/issues/29) (ValueError: Data must be positive.)
- `VALMOD`: some (3) errors are Inf or NaN errors during metric calculation (already fixed), most (44) due to error `window_size` must be at least 4

## Show error message of algorithm

In [13]:
algo = "Series2Graph"

errors = df.loc[df["algorithm"] == algo].groupby(by=["error_message", "hyper_params"])["repetition"].count()
errors.sort_values(inplace=True, ascending=False)
errors = errors.reset_index(drop=False)

print(f"Errors of {algo}!\n")
for i, s in errors.iterrows():
    print(s.loc["error_message"])
    print(f"\t{s['repetition']}\t{s['hyper_params']}")

Errors of Series2Graph!

DockerAlgorithmFailedError('Please consider log files in /home/sebastian.schmidl/projects/timeeval/timeeval_experiments/results/2021_07_21_12_59_17/Series2Graph/f5eaed5902d16ec32c4dc0d884947b8e/GutenTAG/poly-same-count-1.unsupervised/1!')
	1	{"query_window_size": 225, "random_state": 42, "window_size": 150}
DockerAlgorithmFailedError('Please consider log files in /home/sebastian.schmidl/projects/timeeval/timeeval_experiments/results/2021_07_21_12_59_17/Series2Graph/f5eaed5902d16ec32c4dc0d884947b8e/GutenTAG/poly-same-count-2.unsupervised/1!')
	1	{"query_window_size": 225, "random_state": 42, "window_size": 150}
DockerAlgorithmFailedError('Please consider log files in /home/sebastian.schmidl/projects/timeeval/timeeval_experiments/results/2021_07_21_12_59_17/Series2Graph/f5eaed5902d16ec32c4dc0d884947b8e/GutenTAG/poly-same-count-5.unsupervised/1!')
	1	{"query_window_size": 225, "random_state": 42, "window_size": 150}
DockerAlgorithmFailedError('Please consider log 

In [19]:
execution_path = result_path / "Series2Graph/f5eaed5902d16ec32c4dc0d884947b8e/GutenTAG/poly-trend-quadratic.unsupervised/1"
with (execution_path / "execution.log").open() as fh:
    print("".join(fh.readlines()))

Performing execution for UNSUPERVISED algorithm Series2Graph
Running container 'mut:5000/akita/series2graph:latest' with uid=1009 and gid=1024 privileges in ExecutionType.EXECUTE mode.
Restricting container to 1.0 CPUs and 6.182 GB RAM

#### Docker container logs ####
Matplotlib created a temporary config/cache directory at /tmp/matplotlib-ocwcqysh because the default path (/root/.config/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.
Config(dataInput=/data/test.csv, dataOutput=/results/docker-algorithm-scores.csv, executionType=execute,l=150, ql=225, latent=50, rate=30)
Read input time series from /data/test.csv: [TS([ 1.99744101  1.95287653  2.1094688  ... -1.62112511 -1.85080624
 -1.64702529])]
Traceback (most recent call last):
  File "/app/algorithm.py", line 94, in <module>
    main(config)
  File "/app/a

In [17]:
executions = [f for f in (result_path / algo).glob("**/execution.log") if not (f.parent / "anomaly_scores.ts").is_file()]
c = 0
for x in executions:
    with x.open() as fh:
        log = "".join(fh.readlines())
    if "ValueError" in log:
        c += 1
        print(x.parent.parent.name)
#         print("---------------------------------------------------------------------------------")
#         print(x.parent)
#         print()
#         print(log)
print(c)

poly-same-count-1.unsupervised
poly-same-count-2.unsupervised
poly-same-count-5.unsupervised
poly-trend-quadratic.unsupervised
4


In [30]:
plot_scores(["Series2Graph", "STOMP", "Subsequence LOF"], "rw-diff-count-3")