# Genesis Demonstrator

In [1]:
import pandas as pd
import numpy as np
from typing import Final
from config import data_raw_folder, data_processed_folder
from timeeval import Datasets
from timeeval.datasets import DatasetAnalyzer, DatasetRecord
from pathlib import Path

In [2]:
dataset_collection_name = "Genesis"
source_folder = Path(data_raw_folder) / "genesis-demonstrator/data"
target_folder = Path(data_processed_folder)

print(f"Looking for source datasets in {source_folder.resolve()} and\nsaving processed datasets in {target_folder.resolve()}")

Looking for source datasets in /home/projects/akita/data/benchmark-data/data-raw/genesis-demonstrator/data and
saving processed datasets in /home/projects/akita/data/benchmark-data/data-processed


## Dataset transformation and pre-processing

In [3]:
train_type = "unsupervised"
train_is_normal = False
input_type = "multivariate"
datetime_index = True
dataset_type = "real"

# create target directory
dataset_subfolder = Path(input_type) / dataset_collection_name
target_subfolder = target_folder / dataset_subfolder
try:
    target_subfolder.mkdir(parents=True, exist_ok=True)
    print(f"Created directories {target_subfolder}")
except FileExistsError:
    print(f"Directories {target_subfolder} already exist")
    pass

dm = Datasets(target_folder, create_if_missing=False)

Created directories /home/projects/akita/data/benchmark-data/data-processed/multivariate/Genesis


In [4]:
# get target filenames
dataset_name = "genesis-anomalies"
filename = f"{dataset_name}.test.csv"

source_file = source_folder / "Genesis_AnomalyLabels.csv"
path = dataset_subfolder / filename
target_filepath = target_subfolder / filename
target_meta_filepath = target_filepath.parent / f"{dataset_name}.{Datasets.METADATA_FILENAME_PREFIX}"

# transform file
print("Preparing dataset")
df = pd.read_csv(source_file)
#df = df.rename(columns={"Timestamp": "timestamp"})
df.insert(len(df.columns), "is_anomaly", df.loc[:, "Label"])
df["is_anomaly"] = (df["is_anomaly"] != 0).astype(np.int_)
df.insert(1, "timestamp", pd.to_datetime(df["Timestamp"], unit='s'))
df = df.drop(columns=["Timestamp", "Label"])
df.to_csv(target_filepath, index=False)

print("Analyzing metadata")
da = DatasetAnalyzer((dataset_collection_name, dataset_name), is_train=False, df=df)
da.save_to_json(target_meta_filepath, overwrite=True)
meta = da.metadata

# save metadata
dm.add_dataset(DatasetRecord(
    collection_name=dataset_collection_name,
    dataset_name=dataset_name,
    train_path=None,
    test_path=path,
    dataset_type=dataset_type,
    datetime_index=datetime_index,
    split_at=None,
    train_type=train_type,
    train_is_normal=train_is_normal,
    input_type=input_type,
    length=meta.length,
    dimensions=meta.dimensions,
    contamination=meta.contamination,
    num_anomalies=meta.num_anomalies,
    min_anomaly_length=meta.anomaly_length.min,
    median_anomaly_length=meta.anomaly_length.median,
    max_anomaly_length=meta.anomaly_length.max,
    mean=meta.mean,
    stddev=meta.stddev,
    trend=meta.trend,
    stationarity=meta.get_stationarity_name(),
    period_size=np.nan
))
print(f"Processed source dataset {source_file} -> {target_filepath}")

dm.save()

Preparing dataset
Analyzing metadata


[('Genesis', 'genesis-anomalies') (test)] /home/projects/akita/data/benchmark-data/data-processed/multivariate/Genesis/genesis-anomalies.metadata.json already exists, but 'overwrite' was specified! Ignoring existing contents.


Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/genesis-demonstrator/data/Genesis_AnomalyLabels.csv -> /home/projects/akita/data/benchmark-data/data-processed/multivariate/Genesis/genesis-anomalies.test.csv


In [5]:
dm.refresh()
dm._df.loc["Genesis"]

Unnamed: 0_level_0,train_path,test_path,dataset_type,datetime_index,split_at,train_type,train_is_normal,input_type,length,dimensions,contamination,num_anomalies,min_anomaly_length,median_anomaly_length,max_anomaly_length,mean,stddev,trend,stationarity,period_size
dataset_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
genesis-anomalies,,multivariate/Genesis/genesis-anomalies.test.csv,real,True,,unsupervised,False,multivariate,16220,18,0.003083,3,2,22,26,11525.074236,9261.502003,no trend,difference_stationary,
