In [None]:
import os
import sys
sys.path.append(os.path.join(os.path.abspath(''), "..",".."))

from src.utils import setup_device, setup_seed

local_path = os.path.join(os.path.abspath(''), "..","..", "data", "raw", "Neu", "anomaly")
remote_path = os.path.join(os.path.abspath(''), "..","..", "..", "anomaly")

device, data_path = setup_device(local_path, remote_path)
_init_fn = None #setup_seed()

In [None]:
import torch
import numpy as np

In [None]:
model_name = "GCN"
dataset_names = ["cassandra"]

In [None]:
from src.preparation.transforms import *
import torch_geometric.transforms as T

data_transform_steps = {
    "transforms": [
        MinMaxTransform()
    ],
    "clear_after_use": True
}

In [None]:
training_config = {
    "extraction_target": "window",
    "epochs": 15,
    "device": device,
    "early_stopping": None,
    "checkpoint_saving": True,
    "graph_dataset_config": {
        "window_width": 20,
        "sliding_window": 1,
        "flatten_slice": False,        
        "shuffle_settings": {
            "train": True,
            "valid": False,
            "test": False
        },    
        "complete_batches": True,  
        "use_custom_sampler": None,    
        "batch_size": 128,
        "init_fn": _init_fn    
    },
    "augmentate": {
        "proba": 0.0,
        "mu": [0,0],
        "sigma": [0.05, 0.1],
        "offline": True,
        "cat": True
    },
    "optimizer_config": {
        "lr": 0.01,
        "weight_decay": 1e-5,
        "betas":(0.9, 0.999)
    }
}

In [None]:
from src.modeling.models import GCN

In [None]:
model_settings = {
    "optimizer_class": torch.optim.Adam,
    "loss_class": torch.nn.NLLLoss,
    "model_class": GCN
}

In [None]:
model_config = {
    "num_node_features": 20,
    "num_hidden": 64,
    "num_flex": 512,
    "dropout": 0.5,
    "nb_polynomial_order": 3,
    "graclus_pooling": True
}

In [None]:
def score_function(engine):
    val_loss = engine.state.metrics['nll']
    return -val_loss

In [None]:
from src.preparation.utils import load_datasets
from src.modeling.classification_pipeline import ClassificationPipeline
from src.utils import store_results

In [None]:
runs = 10

exclude_anomalies = ["packet_duplication", "packet_loss", "bandwidth"]

test_acc = []
test_loss = []

In [None]:
for i in range(runs):
    print("Run: {}".format(i + 1))
    datasets = load_datasets(dataset_names, data_path, exclude_anomalies=exclude_anomalies)
    
    pipeline = ClassificationPipeline(datasets,
                        data_transform_steps=data_transform_steps,
                        model_settings=model_settings,
                        model_config=model_config,
                        training_config=training_config,
                        score_function=score_function
                       )
    
    # evaluate model & hyperparameters
    acc, loss = pipeline.run(dataset_names, model_name, folds=5, test=True, validation=True, verbose=False)
    
    test_acc.append(acc)
    test_loss.append(loss)

In [None]:
# compute average test accuracy and average test loss
test_acc, test_loss = sum(test_acc) / len(test_acc), sum(test_loss) / len(test_loss)

In [None]:
# store results
store_results(test_acc, 
              test_loss, 
              dataset_names, 
              model_name, [
                  training_config,
                  model_config,
                  model_settings
              ],
             suffix="_{}runs".format(runs))