In [None]:
import argparse
import math
import uuid
from pathlib import Path
import numpy
import pandas as pd
import torch
import yaml
from sklearn.metrics import accuracy_score, mean_squared_error
from niapy.algorithms.basic import ParticleSwarmAlgorithm, DifferentialEvolution, FireflyAlgorithm, GeneticAlgorithm
from niapy.algorithms.modified import SelfAdaptiveDifferentialEvolution
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.utilities.seed import seed_everything
from tabulate import tabulate

from dataloaders.tabular import TabularDataset
from experiments.dnn_ae_experiment import DNNAEExperiment
from models.dnn_ae import Autoencoder
from niapy_extension.wrapper import *
from storage.database import SQLiteConnector
from sklearn.manifold import TSNE
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.pyplot as plt

### Setup configuration

In [None]:
RUN_UUID = uuid.uuid4().hex

with open("configs/dnn_ae.yaml", 'r') as file:
    try:
        config = yaml.safe_load(file)
    except yaml.YAMLError as exc:
        print(exc)

config['logging_params']['save_dir'] += RUN_UUID + '/'
Path(config['logging_params']['save_dir']).mkdir(parents=True, exist_ok=True)
seed_everything(config['exp_params']['manual_seed'], True)
datamodule = TabularDataset(**config["data_params"], pin_memory=True)
datamodule.setup()

### Construct model and experiment

In [None]:
solution = [0.0, 1.0, 0.3238545402477898, 0.0, 0.2770718202737109, 0.6741137338475078, 1.0]
solution = numpy.array(solution)
model = Autoencoder(solution, **config)
saving_path = config['logging_params']['save_dir'] + "manual_alg_" + model.hash_id
Path(saving_path).mkdir(parents=True, exist_ok=True)

In [None]:
early_stop_callback = EarlyStopping(monitor=config['early_stop']['monitor'],
                                    min_delta=config['early_stop']['min_delta'],
                                    patience=config['early_stop']['patience'],
                                    verbose=False,
                                    check_finite=True,
                                    mode="max")

experiment = DNNAEExperiment(model, config['exp_params'], config['model_params']['n_features'])
config['trainer_params']['max_epochs'] = model.num_epochs
tb_logger = TensorBoardLogger(save_dir=config['logging_params']['save_dir'],
                              name="manual_alg_" + model.hash_id)

runner = Trainer(logger=tb_logger,
                 enable_progress_bar=False,
                 # accelerator="gpu",
                 # devices=1,
                 # auto_select_gpus=True,
                 callbacks=[
                     LearningRateMonitor(),
                     ModelCheckpoint(save_top_k=1,
                                     dirpath=os.path.join(tb_logger.log_dir, "checkpoints"),
                                     monitor="val_loss",
                                     save_last=True),
                     early_stop_callback,
                 ],
                 # strategy=DDPPlugin(find_unused_parameters=False),
                 **config['trainer_params'])

Train and save mode to file

In [None]:
print(f"======= Training {config['model_params']['name']} =======")
print(f'\nTraining start: {datetime.now().strftime("%H:%M:%S-%d/%m/%Y")}')
runner.fit(experiment, datamodule=datamodule)
print(f'\nTraining end: {datetime.now().strftime("%H:%M:%S-%d/%m/%Y")}')
torch.save(model.state_dict(), saving_path + "/manual_model.pt")

Load model from file

In [None]:
model = Autoencoder(solution, **config)
model.load_state_dict(torch.load(saving_path + "/manual_model.pt"))
#model.load_state_dict(torch.load("logs/add5e7b369a2493e9ec39f428d4c05fd/ParticleSwarmAlgorithm_e1b93f7da8ebc4a6cb5b2084e8c4f7d272b923c4.pt"))
model.eval()

### Predict with loaded model

In [None]:
# predict with the model
dataloader_iterator = iter(datamodule.test_dataloader())
rmse_list = list()
counter = 0
for data, target in dataloader_iterator:
    data = data.to('cpu')
    reconstructed, input =  model.forward(data)
    rmse = mean_squared_error(input.detach().numpy(), reconstructed.detach().numpy(), squared=True)
    rmse_list.append(rmse)
    counter +=1

print(counter)
print(f"Number of elements: {len(rmse_list)}")
print(sum(rmse_list) / len(rmse_list))

### Calculate AUC value based on anomaly detection

In [None]:
from experiments.anomalyDetection import AnomalyDetection

anomaly_detection = AnomalyDetection([0], [1])
dataloader_iterator = iter(datamodule.test_dataloader())

inputs = []
reconstructs = []
reconstrcution_errors = []
instance_number = []
instance_target = []
targets = []

index = 0
for data, target in dataloader_iterator:
    data = data.to('cpu')
    reconstructed, input = model.forward(data)



    for x, y, z in zip(reconstructed, input, target):
        inputs.append(x)
        reconstructs.append(y)
        targets.append(z)
        reconstructed, input = model.forward(data)
        rmse = mean_squared_error(input.detach().numpy(), reconstructed.detach().numpy(), squared=True)
        reconstrcution_errors.append(rmse)
        instance_number.append(index)
        instance_target.append(z.item())
        index +=1

anomaly_detection.find(inputs, reconstructs, targets)

AUC = anomaly_detection.AUC
print(f"Model AUC score: {anomaly_detection.AUC}")

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import roc_curve

def plot_roc_curve(y_test, FPR_array, TPR_array, view=False, filename='./logs/roc_curve.png'):
    # https://www.analyticsvidhya.com/blog/2020/06/auc-roc-curve-machine-learning/
    random_probs = [0 for i in range(len(y_test))]
    p_fpr, p_tpr, thresholds = roc_curve(y_test, random_probs, pos_label=1)

    # This is the ROC curve
    plt.style.use('seaborn')
    # FPR_array = [ round(elem, 2) for elem in FPR_array ]
    # TPR_array = [ round(elem, 2) for elem in TPR_array ]
    # plot roc curves
    plt.plot(FPR_array, TPR_array, linestyle='-', color='green', label='Autoencoder')
    plt.plot(p_fpr, p_tpr, linestyle='--', color='blue', label='Random')

    plt.title(f'ROC curve - AUC: {round(np.trapz(TPR_array, FPR_array), 3)}')
    # x label
    plt.xlabel('False Positive Rate')
    # y label
    plt.ylabel('True Positive rate')
    plt.legend(loc='best')
    plt.savefig(filename)

    if view:
        plt.show()

    plt.close()

plot_roc_curve(targets, anomaly_detection.FPR_array, anomaly_detection.TPR_array, True)

### 2D Scatter plot (anomalies vs. normal)

In [None]:
datamodule = TabularDataset(**config["data_params"], pin_memory=True)
datamodule.setup()
x = datamodule.test_dataset.x_test.cpu().detach().numpy()
y = datamodule.test_dataset.y_test.cpu().detach().numpy()
x.shape

In [None]:
# https://towardsdatascience.com/dimension-reduction-techniques-with-python-f36ca7009e5c
X_tsne = TSNE(n_components=2, learning_rate=1000, n_iter=1000, perplexity=60).fit_transform(x)
X_pca = PCA().fit_transform(x)
plt.figure(figsize=(10, 5))
plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y[:x.shape[0]], cmap='cool')
plt.savefig('scatter2D.png', dpi=300)

In [None]:
# https://towardsdatascience.com/dimension-reduction-techniques-with-python-f36ca7009e5c
X_tsne = TSNE(n_components=2, learning_rate=1000, n_iter=1000, perplexity=200).fit_transform(x)
X_pca = PCA().fit_transform(x)
plt.figure(figsize=(10, 5))
plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y[:x.shape[0]], cmap='cool')
plt.savefig('scatter2D.png', dpi=300)

### 3D Scatter plot (anomalies vs. normal)

In [None]:
X_tsne = TSNE(n_components=3,learning_rate=250, n_iter=1000, perplexity=10).fit_transform(x)
X_pca = PCA().fit_transform(x)
ax = plt.axes(projection='3d')
# Data for a three-dimensional line
# Data for three-dimensional scattered points
zdata = X_tsne[:, 0]
xdata = X_tsne[:, 1]
ydata = X_tsne[:, 2]
ax.set_proj_type('ortho')
ax.scatter3D(xdata, ydata, zdata, c=y[:x.shape[0]],cmap='cool')
plt.savefig('scatter3D.png', dpi=300)

In [None]:
df = pd.DataFrame(list(zip(reconstrcution_errors, instance_target)),columns =['Error', 'Class'])
sns.boxplot( y=df["Error"], x=df["Class"] );
plt.show()