In [1]:
import sys
sys.path.append("/home/stachu/Projects/Anomaly_detection/TSAD")

from pytorch_lightning.loggers import TensorBoardLogger
import pickle
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler
# from tsai.models import TCN, ResNet, TST, RNN, TransformerModel, FCN
import pandas as pd
from torch.utils.data import DataLoader, Dataset
from torch import nn

from predpy.dataset import MultiTimeSeriesDataset
from predpy.data_module import MultiTimeSeriesModule
from predpy.wrapper import Autoencoder, Predictor, VAE
from predpy.experimentator import (
    DatasetParams, ModelParams,
    Experimentator, load_experimentator)
from predpy.plotter import (
    plot_exp_predictions
)
from predpy.preprocessing import set_index
from predpy.preprocessing import moving_average
from predpy.preprocessing import (
    load_and_preprocess, set_index, moving_average, drop_if_is_in,
    use_dataframe_func, loc, iloc, get_isoforest_filter, get_variance_filter)
from predpy.trainer import (
    CheckpointParams, TrainerParams, EarlyStoppingParams, LoggerParams)
from tsad.noiser import apply_noise_on_dataframes, white_noise
from tsad.anomaly_detector import PredictionAnomalyDetector, ReconstructionAnomalyDetector
from models import LSTMAE, LSTMVAE
from literature.anom_trans import AnomalyTransformer, ATWrapper
from literature.velc import VELC, VELCWrapper
from literature.dagmm import DAGMM, DAGMMWrapper
from literature.tadgan import TADGAN, TADGANWrapper

# ServerMachineDataset

## TADGAN

### Dataset

In [2]:
window_size = 100

load_params = {
    "header": None, "names": [str(i) for i in range(38)]
}

drop_refill_pipeline = []
preprocessing_pipeline = [
    (use_dataframe_func, "astype", "float"),
]
detect_anomalies_pipeline = []

datasets_params = [
    DatasetParams(
        path="/home/stachu/Projects/Anomaly_detection/TSAD/data/Industry/ServerMachineDataset/train/machine-1-1.csv",
        load_params=load_params,
        target=[str(i) for i in range(38)],
        split_proportions=[0.8, 0.1, 0.1],
        window_size=window_size,
        batch_size=64,
        drop_refill_pipeline=drop_refill_pipeline,
        preprocessing_pipeline=preprocessing_pipeline,
        detect_anomalies_pipeline=detect_anomalies_pipeline,
        scaler=StandardScaler()),
]

### Models

In [3]:
c_in = 38
c_out = 38

models_params = [
    ModelParams(
        name_="TadGAN", cls_=TADGAN,
        init_params=dict(
            c_in=c_in, h_size=200, n_layers=2, z_size=100),
        WrapperCls=TADGANWrapper),
]

### Experiments

In [4]:
chp_p = CheckpointParams(
    dirpath="../checkpoints", monitor='val_loss', verbose=True,
    save_top_k=1)
tr_p = TrainerParams(
    max_epochs=5, gpus=1, auto_lr_find=True)
es_p = EarlyStoppingParams(
    monitor='val_loss', patience=2, verbose=True)

exp = Experimentator(
    models_params=models_params,
    datasets_params=datasets_params,
    trainer_params=tr_p,
    checkpoint_params=chp_p,
    early_stopping_params=es_p,
    LoggersClasses=[TensorBoardLogger],
    loggers_params=[LoggerParams(save_dir="../lightning_logs")]
)

### Run / Load

In [5]:
exp.run_experiments(experiments_path="../saved_experiments", safe=False)
# exp = load_experimentator(
#     "./saved_experiments/2021-12-29_01:31:42.pkl"
# )


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.

Global seed set to 42
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

Setting `Trainer(flush_logs_every_n_steps=50)` is deprecated in v1.5 and will be removed in v1.7. Please configure flushing in the logger instead.


DataModule.setup has already been called, so it will not be called again. In v1.6 this behavior will change to always call DataModule.setup.

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


TypeError: ('optimizer can only optimize Tensors, but one of the params is Module.parameters', 'Problem with training 0 model named TadGAN on 0 dataset named machine-1-1')

### Plot

In [None]:
plot_exp_predictions(exp, dataset_idx=0, models_ids=[0])

In [None]:
None = 1

## DAGMM

### Dataset

In [2]:
window_size = 100

load_params = {
    "header": None, "names": [str(i) for i in range(38)]
}

drop_refill_pipeline = []
preprocessing_pipeline = [
    (use_dataframe_func, "astype", "float"),
]
detect_anomalies_pipeline = []

datasets_params = [
    DatasetParams(
        path="/home/stachu/Projects/Anomaly_detection/TSAD/data/Industry/ServerMachineDataset/train/machine-1-1.csv",
        load_params=load_params,
        target=[str(i) for i in range(38)],
        split_proportions=[0.8, 0.1, 0.1],
        window_size=window_size,
        batch_size=64,
        drop_refill_pipeline=drop_refill_pipeline,
        preprocessing_pipeline=preprocessing_pipeline,
        detect_anomalies_pipeline=detect_anomalies_pipeline,
        scaler=StandardScaler()),
]

### Models

In [3]:
c_in = 38
c_out = 38

models_params = [
    ModelParams(
        name_="DAGMM", cls_=DAGMM,
        init_params=dict(
            c_in=c_in, z_size=100,  n_layers=2,
            n_gmm=10, estimation_net=nn.Sequential(*[
                nn.Linear(100, 10),
                nn.Tanh(),
                nn.Dropout(p=0.5),
                nn.Linear(10, 10),
                nn.Softmax(dim=1)
            ])),
        WrapperCls=DAGMMWrapper, wrapper_kwargs=dict(
            lambda_energy=0.1, lambda_cov_diag=0.005
        )),
]

### Experiments

In [4]:
chp_p = CheckpointParams(
    dirpath="../checkpoints", monitor='val_loss', verbose=True,
    save_top_k=1)
tr_p = TrainerParams(
    max_epochs=5, gpus=1, auto_lr_find=True)
es_p = EarlyStoppingParams(
    monitor='val_loss', patience=2, verbose=True)

exp = Experimentator(
    models_params=models_params,
    datasets_params=datasets_params,
    trainer_params=tr_p,
    checkpoint_params=chp_p,
    early_stopping_params=es_p,
    LoggersClasses=[TensorBoardLogger],
    loggers_params=[LoggerParams(save_dir="../lightning_logs")]
)

### Run / Load

In [5]:
exp.run_experiments(experiments_path="../saved_experiments", safe=False)
# exp = load_experimentator(
#     "./saved_experiments/2021-12-29_01:31:42.pkl"
# )


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.

Global seed set to 42


AttributeError: ("'DAGMM' object has no attribute 'to_var'", 'Problem with training 0 model named DAGMM on 0 dataset named machine-1-1')

### Plot

In [None]:
plot_exp_predictions(exp, dataset_idx=0, models_ids=[0])

In [None]:
None = 1

## VELC

### Dataset

In [2]:
window_size = 100

load_params = {
    "header": None, "names": [str(i) for i in range(38)]
}

drop_refill_pipeline = []
preprocessing_pipeline = [
    (use_dataframe_func, "astype", "float"),
]
detect_anomalies_pipeline = []

datasets_params = [
    DatasetParams(
        path="/home/stachu/Projects/Anomaly_detection/TSAD/data/Industry/ServerMachineDataset/train/machine-1-1.csv",
        load_params=load_params,
        target=[str(i) for i in range(38)],
        split_proportions=[0.8, 0.1, 0.1],
        window_size=window_size,
        batch_size=64,
        drop_refill_pipeline=drop_refill_pipeline,
        preprocessing_pipeline=preprocessing_pipeline,
        detect_anomalies_pipeline=detect_anomalies_pipeline,
        scaler=StandardScaler()),
]

### Models

In [3]:
c_in = 38
c_out = 38

models_params = [
    ModelParams(
        name_="VELC", cls_=VELC,
        init_params=dict(
            c_in=c_in, h_size=200,  n_layers=2,
            z_size=100, N_constraint=10, threshold=0.5),
        WrapperCls=VELCWrapper),
]

### Experiments

In [4]:
chp_p = CheckpointParams(
    dirpath="../checkpoints", monitor='val_loss', verbose=True,
    save_top_k=1)
tr_p = TrainerParams(
    max_epochs=5, gpus=1, auto_lr_find=True)
es_p = EarlyStoppingParams(
    monitor='val_loss', patience=2, verbose=True)

exp = Experimentator(
    models_params=models_params,
    datasets_params=datasets_params,
    trainer_params=tr_p,
    checkpoint_params=chp_p,
    early_stopping_params=es_p,
    LoggersClasses=[TensorBoardLogger],
    loggers_params=[LoggerParams(save_dir="../lightning_logs")]
)

### Run / Load

In [5]:
exp.run_experiments(experiments_path="../saved_experiments", safe=False)
# exp = load_experimentator(
#     "./saved_experiments/2021-12-29_01:31:42.pkl"
# )


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.

Global seed set to 42
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

Setting `Trainer(flush_logs_every_n_steps=50)` is deprecated in v1.5 and will be removed in v1.7. Please configure flushing in the logger instead.


DataModule.setup has already been called, so it will not be called again. In v1.6 this behavior will change to always call DataModule.setup.

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type    | Params
--------------------------------------
0 | model     | VELC    | 1.7 M 
1 | criterion | MSELoss | 0     
--------------------------------------
1.7 M     Trainable params
0         Non-trainable params
1.7 M     Total params
6.853     Total estimated model params size (MB)


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

TypeError: ("forward() missing 1 required positional argument: 'z'", 'Problem with training 0 model named VELC on 0 dataset named machine-1-1')

### Plot

In [None]:
plot_exp_predictions(exp, dataset_idx=0, models_ids=[0])

In [None]:
None = 1

## AnomTrans

### Dataset

In [None]:
window_size = 100

load_params = {
    "header": None, "names": [str(i) for i in range(38)]
}

drop_refill_pipeline = []
preprocessing_pipeline = [
    (use_dataframe_func, "astype", "float"),
]
detect_anomalies_pipeline = []

datasets_params = [
    DatasetParams(
        path="/home/stachu/Projects/Anomaly_detection/TSAD/data/Industry/ServerMachineDataset/train/machine-1-1.csv",
        load_params=load_params,
        target=[str(i) for i in range(38)],
        split_proportions=[0.8, 0.1, 0.1],
        window_size=window_size,
        batch_size=64,
        drop_refill_pipeline=drop_refill_pipeline,
        preprocessing_pipeline=preprocessing_pipeline,
        detect_anomalies_pipeline=detect_anomalies_pipeline,
        scaler=StandardScaler()),
]

### Models

In [None]:
c_in = 38
c_out = 38

models_params = [
    ModelParams(
        name_="AnomTrans", cls_=AnomalyTransformer,
        init_params=dict(
            N=window_size, d_model=c_in, layers=2, lambda_=0.5),
        WrapperCls=ATWrapper),
]

### Experiments

In [None]:
chp_p = CheckpointParams(
    dirpath="../checkpoints", monitor='val_min_loss', verbose=True,
    save_top_k=1)
tr_p = TrainerParams(
    max_epochs=5, gpus=1, auto_lr_find=True)
es_p = EarlyStoppingParams(
    monitor='val_min_loss', patience=2, verbose=True)

exp = Experimentator(
    models_params=models_params,
    datasets_params=datasets_params,
    trainer_params=tr_p,
    checkpoint_params=chp_p,
    early_stopping_params=es_p,
    LoggersClasses=[TensorBoardLogger],
    loggers_params=[LoggerParams(save_dir="../lightning_logs")]
)

### Run / Load

In [None]:
exp.run_experiments(experiments_path="../saved_experiments", safe=False)
# exp = load_experimentator(
#     "./saved_experiments/2021-12-29_01:31:42.pkl"
# )

In [None]:
exp = load_experimentator('../saved_experiments/2022-05-12_15:46:15.pkl')

In [None]:
exp.get_models_predictions(0, [0])

### Plot

In [None]:
plot_exp_predictions(exp, dataset_idx=0, models_ids=[0])

In [None]:
None = 1

## LSTMVAE

### Dataset

In [None]:
window_size = 100

load_params = {
    "header": None
}

# columns = ["Global_active_power"]  # , "Voltage"]
drop_refill_pipeline = [
    # (loc, {"columns": columns}),
    # (drop_if_is_in, (["?", np.nan]), {"columns": columns}),
    # (iloc, {"rows_end": 5000}),
    # (iloc, {"rows_start": -20000}),
]
preprocessing_pipeline = [
    (use_dataframe_func, "astype", "float"),
]
detect_anomalies_pipeline = [
    # (get_isoforest_filter, dict(
    #     scores_threshold=-0.36, window_size=500, target="Global_active_power"))
    # (get_variance_filter, dict(
    #     window_size=3000, log_variance_limits=(-7, 0),
    #     target="Global_active_power"))
]


datasets_params = [
    DatasetParams(
        path="/home/stachu/Projects/Anomaly_detection/TSAD/data/Industry/ServerMachineDataset/train/machine-1-1.csv",
        load_params=load_params,
        target=list(range(38)),
        split_proportions=[0.8, 0.1, 0.1],
        window_size=window_size,
        batch_size=64,
        drop_refill_pipeline=drop_refill_pipeline,
        preprocessing_pipeline=preprocessing_pipeline,
        detect_anomalies_pipeline=detect_anomalies_pipeline,
        scaler=StandardScaler()),
]

### Models

In [None]:
c_in = 38
c_out = 38

models_params = [
    ModelParams(
        name_="LSTMVAE_h200_l1", cls_=LSTMVAE,
        init_params=dict(
            c_in=window_size, h_size=200, n_layers=1),
        WrapperCls=VAE, wrapper_kwargs=dict(kld_weight=0.005)),
    ModelParams(
        name_="LSTMVAE_h200_l1", cls_=LSTMVAE,
        init_params=dict(
            c_in=window_size, h_size=200, n_layers=1),
        WrapperCls=VAE, wrapper_kwargs=dict(kld_weight=0.005)),
]

### Experiments

In [None]:
chp_p = CheckpointParams(
    dirpath="../checkpoints", monitor='val_loss', verbose=True,
    save_top_k=1)
tr_p = TrainerParams(
    max_epochs=5, gpus=1, auto_lr_find=True)
es_p = EarlyStoppingParams(
    monitor='val_loss', patience=2, verbose=True)

exp = Experimentator(
    models_params=models_params,
    datasets_params=datasets_params,
    trainer_params=tr_p,
    checkpoint_params=chp_p,
    early_stopping_params=es_p,
    LoggersClasses=[TensorBoardLogger],
    loggers_params=[LoggerParams(save_dir="../lightning_logs")]
)

### Run / Load

In [None]:
exp.run_experiments(experiments_path="../saved_experiments", safe=False)
# exp = load_experimentator(
#     "./saved_experiments/2021-12-29_01:31:42.pkl"
# )

### Plot

In [None]:
plot_exp_predictions(exp, dataset_idx=0, models_ids=[0])
# plot_anomalies(exp.datasets_params.iloc[0])

# household_power_consumption

# Template

In [None]:
# window_size = 366

# load_params = {
#     "sep": ';', "header": 0, "low_memory": False,
#     "infer_datetime_format": True, "parse_dates": {'datetime': [0, 1]},
#     "index_col": ['datetime']
# }

# columns = ["Global_active_power"]  # , "Voltage"]
# drop_refill_pipeline = [
#     (loc, {"columns": columns}),
#     (drop_if_is_in, (["?", np.nan]), {"columns": columns}),
#     (iloc, {"rows_end": 5000}),
#     # (iloc, {"rows_start": -20000}),
# ]
# preprocessing_pipeline = [
#     (use_dataframe_func, "astype", "float"),
# ]
# detect_anomalies_pipeline = [
#     # (get_isoforest_filter, dict(
#     #     scores_threshold=-0.36, window_size=500, target="Global_active_power"))
#     (get_variance_filter, dict(
#         window_size=3000, log_variance_limits=(-7, 0),
#         target="Global_active_power"))
# ]


# datasets_params = [
#     DatasetParams(
#         path="/home/stachu/Projects/Anomaly_detection/TSAD/data/Energy/household_power_consumption/household_power_consumption.csv",
#         load_params=load_params,
#         target="Global_active_power",
#         split_proportions=[0.8, 0.1, 0.1],
#         window_size=window_size,
#         batch_size=64,
#         drop_refill_pipeline=drop_refill_pipeline,
#         preprocessing_pipeline=preprocessing_pipeline,
#         detect_anomalies_pipeline=detect_anomalies_pipeline,
#         scaler=StandardScaler(),
#         resample_params=dict(
#             resampler_method_str="fillna", rule="1min", resample_kwargs={},
#             resampler_method_kwargs=dict(method="backfill")),
# ]

# c_in = 1
# c_out = 1

# models_params = [
#     # # ModelParams(
#     # #     name_="TST_l3_fcDrop0.1", cls_=TST.TST,
#     # #     init_params={
#     # #         "c_in": c_in, "c_out": c_out, "seq_len": window_size,
#     # #         "max_seq_len": window_size, "n_layers": 3, "fc_dropout": 0.1}),
#     # # ModelParams(
#     # #     name_="TST_l2_fcDrop0.1", cls_=TST.TST,
#     # #     init_params={
#     # #         "c_in": c_in, "c_out": c_out, "seq_len": window_size,
#     # #         "max_seq_len": window_size, "n_layers": 2, "fc_dropout": 0.1}),
#     # ModelParams(
#     #     name_="TST_l2_fcDrop0.0", cls_=TST.TST,
#     #     init_params={
#     #         "c_in": c_in, "c_out": c_out, "seq_len": window_size,
#     #         "max_seq_len": window_size, "n_layers": 2, "fc_dropout": 0.0}),
#     # ModelParams(
#     #     name_="ResNet", cls_=ResNet.ResNet,
#     #     init_params={"c_in": c_in, "c_out": c_out}),
#     # # ModelParams(
#     # #     name_="LSTM_h200_l1", cls_=RNN.LSTM,
#     # #     init_params={
#     # #         "c_in": c_in, "c_out": c_out, "hidden_size": 200, "n_layers": 1}),
#     # # ModelParams(
#     # #     name_="LSTM_h200_l2", cls_=RNN.LSTM,
#     # #     init_params={
#     # #         "c_in": c_in, "c_out": c_out, "hidden_size": 200, "n_layers": 2}),
#     # ModelParams(
#     #     name_="LSTM_h400_l2", cls_=RNN.LSTM,
#     #     init_params={
#     #         "c_in": c_in, "c_out": c_out, "hidden_size": 400, "n_layers": 2}),
#     # ModelParams(
#     #     name_="LSTMAutoencoder_h200_l1", cls_=LSTMAE,
#     #     init_params=dict(
#     #         c_in=window_size, h_size=200, n_layers=1),
#     #     WrapperCls=Autoencoder),
#     ModelParams(
#         name_="LSTMVAE_h200_l1", cls_=LSTMVAE,
#         init_params=dict(
#             c_in=window_size, h_size=200, n_layers=1),
#         WrapperCls=VAE, wrapper_kwargs=dict(kld_weight=0.005)),
#     # ModelParams(
#     #     name_="LSTMVAE_h200_l1", cls_=LSTMVAE,
#     #     init_params=dict(
#     #         c_in=window_size, h_size=200, n_layers=1),
#     #     WrapperCls=VAE, wrapper_kwargs=dict(kld_weight=0.002)),
# ]

# chp_p = CheckpointParams(
#     dirpath="../checkpoints", monitor='val_loss', verbose=True,
#     save_top_k=1)
# tr_p = TrainerParams(
#     max_epochs=1, gpus=1, auto_lr_find=True)
# es_p = EarlyStoppingParams(
#     monitor='val_loss', patience=2, verbose=True)

# # import pytorch_lightning as pl
# # from pytorch_lightning.loggers import TensorBoardLogger

# # tmp = pl.Trainer(logger=TensorBoardLogger("./"))

# exp = Experimentator(
#     models_params=models_params,
#     datasets_params=datasets_params,
#     trainer_params=tr_p,
#     checkpoint_params=chp_p,
#     early_stopping_params=es_p,
#     LoggersClasses=[TensorBoardLogger],
#     loggers_params=[LoggerParams(save_dir="../lightning_logs")]
# )

# exp.run_experiments(experiments_path="../saved_experiments", safe=False)
# # exp = load_experimentator(
# #     "./saved_experiments/2021-12-29_01:31:42.pkl"
# # )

# plot_exp_predictions(exp, dataset_idx=0, models_ids=[0])
