# Debugging autoreload

In [None]:
%load_ext autoreload
%autoreload 2

# Load packages

In [None]:
import pandas as pd
import numpy as np
from src.utils.outliers.iqr import add_iqr_outs_to_df, plot_iqr_outs, plot_iqr_outs_regression_error
from src.utils.outliers.pyod import add_pyod_outs_to_df, plot_pyod_outs, plot_pyod_outs_regression_error
from scripts.python.dataset_specific.GSEUNN.tasks.routines_046 import plot_regression_error_distributions
from plotly.subplots import make_subplots
from scipy import stats
import plotly.express as px
from scripts.python.routines.plot.scatter import add_scatter_trace
import plotly.graph_objects as go
from scripts.python.routines.plot.save import save_figure
from scripts.python.routines.plot.layout import add_layout, get_axis
from statsmodels.stats.multitest import multipletests
import plotly.io as pio
import importlib
pio.kaleido.scope.mathjax = None
from plotly.offline import init_notebook_mode, iplot
from scipy.interpolate import interp1d
from src.utils.verbose import NoStdStreams
init_notebook_mode(connected=False)
import matplotlib.pyplot as plt
from matplotlib import colors
from omegaconf import OmegaConf
from tqdm import tqdm
import seaborn as sns
from glob import glob
import pathlib
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.decomposition import MiniBatchDictionaryLearning, FastICA
from sklearn.random_projection import GaussianRandomProjection, SparseRandomProjection
from sklearn.manifold import MDS, Isomap
from openTSNE import TSNE
from sklearn.metrics import mean_absolute_error
from scipy import stats
import patchworklib as pw
import os
import functools
from scipy.stats import iqr
from statannotations.Annotator import Annotator
from scipy.stats import mannwhitneyu
import shap
from slugify import slugify
from src.models.tabular.widedeep.ft_transformer import WDFTTransformerModel
from src.models.tabular.widedeep.tab_net import WDTabNetModel
from art.estimators.regression.pytorch import PyTorchRegressor
from art.estimators.classification import PyTorchClassifier
from art.estimators.regression.blackbox import BlackBoxRegressor
from art.attacks.evasion import ProjectedGradientDescentNumpy, FastGradientMethod, BasicIterativeMethod, MomentumIterativeMethod
from art.attacks.evasion import ZooAttack, CarliniL2Method, ElasticNet, NewtonFool
import torch
from src.tasks.metrics import get_cls_pred_metrics, get_cls_prob_metrics, get_reg_metrics
import matplotlib.lines as mlines

from sdv.metadata import SingleTableMetadata
from sdv.lite import SingleTablePreset
from sdv.single_table import GaussianCopulaSynthesizer, CTGANSynthesizer, TVAESynthesizer, CopulaGANSynthesizer
from sdv.evaluation.single_table import evaluate_quality
from sdv.evaluation.single_table import get_column_plot
from sdv.evaluation.single_table import get_column_pair_plot
from sklearn.preprocessing import StandardScaler
from scripts.python.routines.mvals import expit2

import missingno as msno

import joblib
import pickle

from pyod.models.ecod import ECOD
from pyod.models.abod import ABOD
from pyod.models.copod import COPOD
from pyod.models.sos import SOS
from pyod.models.kde import KDE
from pyod.models.sampling import Sampling
from pyod.models.gmm import GMM

from pyod.models.kpca import KPCA
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.lmdd import LMDD

from pyod.models.lof import LOF
from pyod.models.cof import COF
from pyod.models.cblof import CBLOF
from pyod.models.hbos import HBOS
from pyod.models.knn import KNN
from pyod.models.sod import SOD

from pyod.models.iforest import IForest
from pyod.models.inne import INNE
from pyod.models.loda import LODA
from pyod.models.suod import SUOD

from pyod.models.auto_encoder_torch import AutoEncoder
from pyod.models.vae import VAE
from pyod.models.deep_svdd import DeepSVDD

from pyod.models.lunar import LUNAR

from torchmetrics import BootStrapper


from pytorch_tabular.utils import make_mixed_dataset, print_metrics
from pytorch_tabular import available_models
from pytorch_tabular import TabularModel
from pytorch_tabular.models import CategoryEmbeddingModelConfig, GANDALFConfig, TabNetModelConfig, FTTransformerConfig, DANetConfig, GatedAdditiveTreeEnsembleConfig
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig
from pytorch_tabular.models.common.heads import LinearHeadConfig
from pytorch_tabular.tabular_model_tuner import TabularModelTuner
from torchmetrics.functional.classification import (
    multiclass_accuracy,
    multiclass_f1_score,
    multiclass_precision,
    multiclass_recall,
    multiclass_specificity,
    multiclass_cohen_kappa,
    multiclass_auroc
)
from pytorch_tabular import MODEL_SWEEP_PRESETS
from pytorch_tabular import model_sweep
import warnings


def conjunction(conditions):
    return functools.reduce(np.logical_and, conditions)


def disjunction(conditions):
    return functools.reduce(np.logical_or, conditions)



# Load data and model, define PyTorchRegressor, setup colors, dimensionality reduction models

In [None]:
path = "D:/YandexDisk/Work/pydnameth/datasets/GPL21145/GSEUNN"
path_model = f"{path}/data/immuno/models/SImAge"
path_save = f"{path}/special/064_tai_report_4/immuno"
pathlib.Path(f"{path_save}").mkdir(parents=True, exist_ok=True)
df = pd.read_excel(f"{path}/data/immuno/models/SImAge/data.xlsx", index_col='sample_id')
feats = pd.read_excel(f"{path}/data/immuno/models/SImAge/feats_con_top10.xlsx", index_col=0).index.values
ids_feat = list(range(len(feats)))
col_trgt = 'Age'
col_pred = 'SImAge'

df_preds = pd.read_excel(f"{path}/data/immuno/models/SImAge/results/predictions.xlsx", index_col=0)
ids_trn = df_preds.index[df_preds['fold_0002'] == 'trn'].values
ids_val = df_preds.index[df_preds['fold_0002'] == 'val'].values
ids_tst = df_preds.index[df_preds['fold_0002'] == 'tst_ctrl_central'].values
ids_all = df_preds.index[df_preds['fold_0002'].isin(['trn', 'val', 'tst_ctrl_central'])].values
ids_trn_val = df_preds.index[df_preds['fold_0002'].isin(['trn', 'val'])].values
ids_dict = {
    'all': ids_all,
    'trn_val': ids_trn_val,
    'tst': ids_tst
}

df = df.loc[ids_all, :]
df["SImAge Error"] = df["SImAge"] - df["Age"]
df["|SImAge Error|"] = df["SImAge Error"].abs()
df['Data'] = 'Real'
df['Eps'] = 'Origin'

model = WDFTTransformerModel.load_from_checkpoint(checkpoint_path=f"{path}/data/immuno/models/SImAge/best_fold_0002.ckpt")
model.eval()
model.freeze()

def predict_func_regression(X):
    model.produce_probabilities = True
    batch = {
        'all': torch.from_numpy(np.float32(X[:, ids_feat])),
        'continuous': torch.from_numpy(np.float32(X[:, ids_feat])),
        'categorical': torch.from_numpy(np.int32(X[:, []])),
    }
    tmp = model(batch)
    return tmp.cpu().detach().numpy()

art_regressor = PyTorchRegressor(
    model=model,
    loss=model.loss_fn,
    input_shape=[len(feats)],
    optimizer=torch.optim.Adam(
        params=model.parameters(),
        lr=model.hparams.optimizer_lr,
        weight_decay=model.hparams.optimizer_weight_decay
    ),
    use_amp=False,
    opt_level="O1",
    loss_scale="dynamic",
    channels_first=True,
    clip_values=None,
    preprocessing_defences=None,
    postprocessing_defences=None,
    preprocessing=(0.0, 1.0),
    device_type="cpu",
)

colors_atks = {
    "MomentumIterative": px.colors.qualitative.D3[0],
    "BasicIterative": px.colors.qualitative.D3[1],
    "FastGradient": px.colors.qualitative.D3[3],
}

dim_red_labels = {
    'PCA': ['PC 1', 'PC 2'],
    'SVD': ['SVD 1', 'SVD 2'],
    't-SNE': ['t-SNE 1', 't-SNE 2'],
    'GRP': ['GRP 1', 'GRP 2'],
    'SRP': ['SRP 1', 'SRP 2'],
    'IsoMap': ['IsoMap 1', 'IsoMap 2'],
    'MBDL': ['MBDL 1', 'MBDL 2'],
}
X_dim_red = df.loc[ids_trn_val, feats].values
dim_red_models = {
    'PCA': PCA(n_components=2, whiten=False).fit(X_dim_red),
    'SVD': TruncatedSVD(n_components=2, algorithm='randomized', n_iter=5).fit(X_dim_red),
    't-SNE': TSNE(n_components=2).fit(X_dim_red),
    'GRP': GaussianRandomProjection(n_components=2, eps=0.5).fit(X_dim_red),
    'SRP': SparseRandomProjection(n_components=2, density='auto', eps=0.5, dense_output=False).fit(X_dim_red),
    'IsoMap': Isomap(n_components=2, n_neighbors=5).fit(X_dim_red),
    'MBDL': MiniBatchDictionaryLearning(n_components=2, batch_size=100, alpha=1, n_iter=25).fit(X_dim_red),
}
for m, drm in dim_red_models.items():
    dim_red_res = drm.transform(df.loc[:, feats].values)
    df.loc[:, dim_red_labels[m][0]] = dim_red_res[:, 0]
    df.loc[:, dim_red_labels[m][1]] = dim_red_res[:, 1]
df.to_excel(f"{path_save}/df_origin.xlsx", index_label='sample_id')

### Create PyOD models, trained on trn_val samples

In [None]:
contamination = 0.1

pyod_methods = {
    'ECOD': ECOD(contamination=contamination),
    'LUNAR': LUNAR(),
    'DeepSVDD': DeepSVDD(contamination=contamination, verbose=0),
    'VAE': VAE(encoder_neurons=[32, 16, 8], decoder_neurons=[8, 16, 32], contamination=contamination),
    'LODA': LODA(contamination=contamination),
    'INNE': INNE(contamination=contamination),
    'IForest': IForest(contamination=contamination),
    'SOD': SOD(contamination=contamination),
    'KNN': KNN(contamination=contamination),
    'CBLOF': CBLOF(contamination=contamination),
    'COF': COF(contamination=contamination),
    'LOF': LOF(contamination=contamination),
    'LMDD': LMDD(contamination=contamination),
    'MCD': MCD(contamination=contamination),
    'GMM': GMM(contamination=contamination),
    'Sampling': Sampling(contamination=contamination),
    'SOS': SOS(contamination=contamination),
    'COPOD': COPOD(contamination=contamination),
}

for method_name, method in (pbar := tqdm(pyod_methods.items())):
    pbar.set_description(f"Processing {method_name}")
    
    method.fit(df.loc[ids_trn_val, feats].values)

### Apply and save or load processed data

In [None]:
# Load data with dim_red columns ==============================================
df = pd.read_excel(f"{path_save}/df_origin.xlsx", index_col=0)

In [None]:
# Add to df_origin.xlsx PyOD outliers columns =================================
add_pyod_outs_to_df(df, pyod_methods, feats)
df.to_excel(f"{path_save}/df_origin.xlsx", index_label='sample_id')

### Outliers analysis

In [None]:
# IQR plots
pathlib.Path(f"{path_save}/Origin/outliers_iqr").mkdir(parents=True, exist_ok=True)
plot_iqr_outs(df, feats, 'grey', 'Origin', f"{path_save}/Origin/outliers_iqr")
plot_iqr_outs_regression_error(df, feats, 'Origin', f"{path_save}/Origin/outliers_iqr", thld_outs_iqr, 'Age', 'SImAge', 'SImAge Error')

# PyOD plots
pathlib.Path(f"{path_save}/Origin/outliers_pyod").mkdir(parents=True, exist_ok=True)
plot_pyod_outs(df, pyod_methods, 'grey', 'Origin', f"{path_save}/Origin/outliers_pyod")
plot_pyod_outs_regression_error(df, pyod_methods, 'Origin', f"{path_save}/Origin/outliers_pyod", thld_outs_pyod, 'Age', 'SImAge', 'SImAge Error')

# Adversarial attacks

## Generate

In [None]:
epsilons = sorted(list(set.union(
    set(np.linspace(0.1, 1.0, 10)), 
    set(np.linspace(0.01, 0.1, 10)),
)))
df_eps = pd.DataFrame(index=epsilons)

for eps_raw in epsilons:

    eps = np.array([eps_raw * iqr(df.loc[:, feat].values) for feat in feats])
    eps_step = np.array([0.2 * eps_raw * iqr(df.loc[:, feat].values) for feat in feats])

    attacks = {
        'MomentumIterative': MomentumIterativeMethod(
            estimator=art_regressor,
            norm=np.inf,
            eps=eps,
            eps_step=eps_step,
            decay=0.1,
            max_iter=100,
            targeted=False,
            batch_size=512,
            verbose=True
        ),
        'BasicIterative': BasicIterativeMethod(
            estimator=art_regressor,
            eps=eps,
            eps_step=eps_step,
            max_iter=100,
            targeted=False,
            batch_size=512,
            verbose=True
        ),
        'FastGradient': FastGradientMethod(
            estimator=art_regressor,
            norm=np.inf,
            eps=eps,
            eps_step=eps_step,
            targeted=False,
            num_random_init=0,
            batch_size=512,
            minimal=False,
            summary_writer=False,
        ),
    }

    for attack_name, attack in attacks.items():
        path_curr = f"{path_save}/Evasion/{attack_name}/eps_{eps_raw:0.4f}"
        pathlib.Path(f"{path_curr}").mkdir(parents=True, exist_ok=True)

        X_adv = attack.generate(np.float32(df.loc[:, feats].values))
        
        df_adv = df.loc[:, ['Age']].copy()
        df_adv.loc[:, feats] = X_adv
        df_adv["SImAge"] = model(torch.from_numpy(np.float32(df_adv.loc[:, feats].values))).cpu().detach().numpy().ravel()
        df_adv["SImAge Error"] = df_adv["SImAge"] - df_adv["Age"]
        df_adv["|SImAge Error|"] = df_adv["SImAge Error"].abs()
        df_adv.loc[:, "Error Origin"] = df.loc[:, "SImAge"] - df.loc[:, "Age"]
        df_adv.loc[:, "Error Attack"] = df_adv.loc[:, "SImAge"] - df_adv.loc[:, "Age"]
        df_adv['Error Diff'] = df_adv['Error Attack'] - df_adv['Error Origin']
        df_adv['|Error Diff|'] = df_adv['Error Diff'].abs()
        for m, drm in dim_red_models.items():
            dim_red_res = drm.transform(df_adv.loc[:, feats].values)
            df_adv.loc[:, dim_red_labels[m][0]] = dim_red_res[:, 0]
            df_adv.loc[:, dim_red_labels[m][1]] = dim_red_res[:, 1]
            
        df_adv.to_excel(f"{path_curr}/df.xlsx", index_label='sample_id')

        metrics = get_reg_metrics()
        metrics_cols = [f"{m}_{p}" for m in metrics for p in ids_dict]
        df_metrics = pd.DataFrame(index=metrics_cols)
        for p, ids_part in ids_dict.items():
            for m in metrics:
                m_val = float(metrics[m][0](torch.from_numpy(np.float32(df.loc[ids_part, "SImAge"].values)), torch.from_numpy(np.float32(df.loc[ids_part, "Age"].values))).numpy())
                df_metrics.at[f"{m}_{p}", 'Origin'] = m_val
                metrics[m][0].reset()
                m_val = float(metrics[m][0](torch.from_numpy(np.float32(df_adv.loc[ids_part, "SImAge"].values)), torch.from_numpy(np.float32(df.loc[ids_part, "Age"].values))).numpy())
                df_metrics.at[f"{m}_{p}", 'Attack'] = m_val
                metrics[m][0].reset()
        df_metrics.to_excel(f"{path_curr}/metrics.xlsx", index_label='Metrics')
        
        for p in ids_dict:
            if attack_name == 'MomentumIterative':
                df_eps.loc[eps_raw, f"Origin_MAE_{p}"] = df_metrics.at[f'mean_absolute_error_{p}', 'Origin']
            df_eps.loc[eps_raw, f"{attack_name}_MAE_{p}"] = df_metrics.at[f'mean_absolute_error_{p}', 'Attack']
            
df_eps.to_excel(f"{path_save}/Evasion/df_eps.xlsx", index_label='eps')

## Plot Error from Eps

In [None]:
for p in ids_dict:
    df_fig = df_eps.loc[:, [f"{x}_MAE_{p}" for x in colors_atks]].copy()
    df_fig.rename(columns={f"{x}_MAE_{p}": x for x in colors_atks}, inplace=True)
    df_fig['Eps'] = df_fig.index.values
    df_fig = df_fig.melt(id_vars="Eps", var_name='Method', value_name="MAE")
    sns.set_theme(style='ticks', font_scale=1)
    fig = plt.figure()
    lines = sns.lineplot(
        data=df_fig,
        x='Eps',
        y="MAE",
        hue=f"Method",
        style=f"Method",
        palette=colors_atks,
        hue_order=list(colors_atks.keys()),
        markers=True,
        dashes=False,
    )
    plt.xscale('log')
    lines.set_xlabel(r'$\epsilon$')
    x_min = 0.009
    x_max = 1.05
    mae_basic = df_eps.at[0.01, f"Origin_MAE_{p}"]
    lines.set_xlim(x_min, x_max)
    plt.gca().plot(
        [x_min, x_max],
        [mae_basic, mae_basic],
        color='k',
        linestyle='dashed',
        linewidth=1
    )
    plt.savefig(f"{path_save}/Evasion/line_mae_vs_eps_{p}.png", bbox_inches='tight', dpi=200)
    plt.savefig(f"{path_save}/Evasion/line_mae_vs_eps_{p}.pdf", bbox_inches='tight')
    plt.close(fig)

## Plot in reduced dimension

In [None]:
epsilons_hglt = [0.01, 0.02, 0.05, 0.1, 0.2, 0.5]
colors_epsilons = {x: px.colors.qualitative.G10[x_id] for x_id, x in enumerate(['Origin'] + epsilons_hglt)}

for atk in colors_atks:
    for m in ['t-SNE']:
        df_fig_ori = df.loc[:, ['SImAge Error', dim_red_labels[m][0], dim_red_labels[m][1]]].copy()
        df_fig_ori['Symbol'] = 'o'
        df_fig_ori['index_origin'] = df_fig_ori.index
        df_fig_ori['Eps'] = 'Origin'
        
        dfs_fig_adv = [df_fig_ori]
        for eps in epsilons_hglt:
            path_curr = f"{path_save}/Evasion/{atk}/eps_{eps:0.4f}"
            pathlib.Path(f"{path_curr}/SImAgeError").mkdir(parents=True, exist_ok=True)
            
            df_adv = pd.read_excel(f"{path_curr}/df.xlsx", index_col='sample_id')
            df_fig_adv = df_adv.loc[:, ['SImAge Error', '|Error Diff|', dim_red_labels[m][0], dim_red_labels[m][1]]].copy()
            df_fig_adv['Eps'] = eps
            df_fig_adv['index_origin'] = df_fig_adv.index
            df_fig_adv.set_index(df_fig_adv.index.values + f'_adv_eps_{eps:0.4f}', inplace=True)
            df_fig_adv['Symbol'] = 'X'
            dfs_fig_adv.append(df_fig_adv)
            df_fig_all = pd.concat([df_fig_ori, df_fig_adv])
            
            norm = plt.Normalize(df_fig_all['SImAge Error'].min(), df_fig_all['SImAge Error'].max())
            sm = plt.cm.ScalarMappable(cmap="spring", norm=norm)
            sm.set_array([])
            
            sns.set_theme(style='ticks')
            fig, ax = plt.subplots(figsize=(5, 4))
            scatter = sns.scatterplot(
                data=df_fig_all,
                x=dim_red_labels[m][0],
                y=dim_red_labels[m][1],
                palette='spring',
                hue='SImAge Error',
                linewidth=1,
                alpha=0.75,
                edgecolor="k",
                style=df_fig_all.loc[:, 'Symbol'].values,
                s=40,
                ax=ax
            )
            scatter.get_legend().remove()
            scatter.figure.colorbar(sm, label='SImAge Error')
            scatter.set_title(fr'$\epsilon={eps:0.2f}$', loc='left', fontdict={'fontsize': 20})

            legend_handles = [
                mlines.Line2D([], [], marker='o', linestyle='None', markeredgecolor='k', markerfacecolor='lightgrey', markersize=10, label='Real'),
                mlines.Line2D([], [], marker='X', linestyle='None', markeredgecolor='k', markerfacecolor='lightgrey', markersize=10, label='Attack')
            ]
            plt.legend(handles=legend_handles, title="Samples", bbox_to_anchor=(0.4, 1.02, 1, 0.2), loc="lower left", borderaxespad=0, ncol=2, frameon=False)
            
            plt.savefig(f"{path_curr}/SImAgeError/{m}.png", bbox_inches='tight', dpi=200)
            plt.savefig(f"{path_curr}/SImAgeError/{m}.pdf", bbox_inches='tight')
            plt.close(fig)  
        
        df_fig_adv_eps = pd.concat(dfs_fig_adv)
        sns.set_theme(style='ticks')
        fig, ax = plt.subplots(figsize=(6, 4))
        kdeplot = sns.kdeplot(
            data=df_fig_adv_eps,
            x='SImAge Error',
            palette=colors_epsilons,
            hue='Eps',
            linewidth=2,
            fill=False,
            ax=ax
        )
        plt.savefig(f"{path_save}/Evasion/{atk}/SImAgeError_{m}.png", bbox_inches='tight', dpi=200)
        plt.savefig(f"{path_save}/Evasion/{atk}/SImAgeError_{m}.pdf", bbox_inches='tight')
        plt.close(fig)

## Plot distributions

In [None]:
epsilons_hglt = [0.01, 0.02, 0.05, 0.1, 0.2, 0.5]
colors_epsilons = {x: px.colors.qualitative.G10[x_id] for x_id, x in enumerate(['Origin'] + epsilons_hglt)}

df['Eps'] = 'Origin'
df['MarkerSize'] = 40

for atk in colors_atks:

    for eps in epsilons_hglt:
        path_curr = f"{path_save}/Evasion/{atk}/eps_{eps:0.4f}"
        pathlib.Path(f"{path_curr}/SImAgeError").mkdir(parents=True, exist_ok=True)
        df_adv = pd.read_excel(f"{path_curr}/df.xlsx", index_col='sample_id')
        df_adv.index += f'_eps_{eps:0.4f}'
        df_adv['Eps'] = eps
        df_adv['MarkerSize'] = 30
        df_ori_adv = pd.concat([df, df_adv])
        
        pw_brick_kdes = {}
        pw_brick_scatters = {}
        for f in feats:
            
            pw_brick_kdes[f] = pw.Brick(figsize=(3, 2))
            sns.set_theme(style='whitegrid')
            kdeplot = sns.kdeplot(
                data=df_ori_adv,
                x=f,
                hue='Eps',
                palette={'Origin': 'grey', eps: colors_epsilons[eps]},
                hue_order=['Origin', eps],
                fill=True,
                common_norm=False,
                ax=pw_brick_kdes[f]
            )
            
            pw_brick_scatters[f] = pw.Brick(figsize=(3, 2))
            sns.set_theme(style='whitegrid')
            scatterplot = sns.scatterplot(
                data=df_ori_adv,
                x=f,
                y='Age',
                hue='Eps',
                palette={'Origin': 'grey', eps: colors_epsilons[eps]},
                hue_order=['Origin', eps],
                linewidth=0.85,
                alpha=0.75,
                edgecolor="k",
                marker='o',
                s=30,
                ax=pw_brick_scatters[f]
            )
        
        n_cols = 5
        n_rows = int(np.ceil(len(feats)/ n_cols))
        pw_rows_kdes = []
        pw_rows_scatters = []
        for r_id in range(n_rows):
            pw_cols_kdes = []
            pw_cols_scatters = []
            for c_id in range(n_cols):
                rc_id = r_id * n_cols + c_id
                if rc_id < len(feats):
                    f = feats[rc_id]
                    pw_cols_kdes.append(pw_brick_kdes[f])
                    pw_cols_scatters.append(pw_brick_scatters[f])
                else:
                    empty_fig = pw.Brick(figsize=(4.67, 3))
                    empty_fig.axis('off')
                    pw_cols_kdes.append(empty_fig)
                    pw_cols_scatters.append(empty_fig)
            pw_rows_kdes.append(pw.stack(pw_cols_kdes, operator="|"))
            pw_rows_scatters.append(pw.stack(pw_cols_scatters, operator="|"))
        pw_fig_kde = pw.stack(pw_rows_kdes, operator="/")
        pw_fig_kde.savefig(f"{path_curr}/feats_kde.png", bbox_inches='tight', dpi=200)
        pw_fig_kde.savefig(f"{path_curr}/feats_kde.pdf", bbox_inches='tight')
        pw_fig_scatter = pw.stack(pw_rows_scatters, operator="/")
        pw_fig_scatter.savefig(f"{path_curr}/feats_scatter.png", bbox_inches='tight', dpi=200)
        pw_fig_scatter.savefig(f"{path_curr}/feats_scatter.pdf", bbox_inches='tight')
        pw.clear()

# Adversarial defences from attacks

## Generate detectors

In [None]:
df_ori = df[feats].copy()
df_ori['Class'] = 'Original'

for atk in colors_atks:
    
    df_def_acc = pd.DataFrame(index=epsilons, columns=['Model'] + list(epsilons))
    
    for eps in tqdm(epsilons):
        
        path_curr = f"{path_save}/Evasion/{atk}/eps_{eps:0.4f}"
        df_adv = pd.read_excel(f"{path_curr}/df.xlsx", index_col='sample_id')
        df_adv = df_adv[feats]
        df_adv['Class'] = 'Attack'
        df_def_trn_val = pd.concat([df_ori.loc[ids_trn_val, :], df_adv.loc[ids_trn_val, :]])
        df_def_tst = pd.concat([df_ori.loc[ids_tst, :], df_adv.loc[ids_tst, :]])
        
        data_config = DataConfig(
            target=['Class'],
            continuous_cols=list(feats),
            continuous_feature_transform='yeo-johnson',
            normalize_continuous_features=True,
        )
        
        trainer_config = TrainerConfig(
            batch_size=1024,
            max_epochs=100,
            min_epochs=1,
            auto_lr_find=True,
            early_stopping='valid_loss',
            early_stopping_min_delta=0.0001,
            early_stopping_mode='min',
            early_stopping_patience=100,
            checkpoints='valid_loss',
            checkpoints_path=f"{path_curr}/detector",
            load_best=True,
            progress_bar='none',
            seed=42
        )
        
        optimizer_config = OptimizerConfig(
            optimizer='Adam',
            lr_scheduler='CosineAnnealingWarmRestarts',
            lr_scheduler_params={
                'T_0': 10,
                'T_mult': 1,
                'eta_min': 0.00001,
            },
            lr_scheduler_monitor_metric='valid_loss'
        )

        head_config = LinearHeadConfig(
            layers='',
            activation='ReLU',
            dropout=0.1,
            use_batch_norm=False,
            initialization='xavier',
        ).__dict__

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            sweep_df, best_model = model_sweep(
                task="classification",
                train=df_def_trn_val,
                test=df_def_tst,
                data_config=data_config,
                optimizer_config=optimizer_config,
                trainer_config=trainer_config,
                model_list="standard",
                common_model_args=dict(head="LinearHead", head_config=head_config),
                metrics=[
                    'accuracy',
                    'f1_score',
                    'precision',
                    'recall',
                    'specificity',
                    'cohen_kappa',
                    'auroc'
                ],
                metrics_prob_input=[True, True, True, True, True, True, True],
                metrics_params=[
                    {'task': 'multiclass', 'num_classes': 2, 'average': 'weighted'},
                    {'task': 'multiclass', 'num_classes': 2, 'average': 'weighted'},
                    {'task': 'multiclass', 'num_classes': 2, 'average': 'weighted'},
                    {'task': 'multiclass', 'num_classes': 2, 'average': 'weighted'},
                    {'task': 'multiclass', 'num_classes': 2, 'average': 'weighted'},
                    {'task': 'multiclass', 'num_classes': 2},
                    {'task': 'multiclass', 'num_classes': 2, 'average': 'weighted'},
                ],
                rank_metric=("accuracy", "higher_is_better"),
                progress_bar=False,
                verbose=False,
                suppress_lightning_logger=True,
            )
        ckpts = glob(f"{path_curr}/detector/*")
        for ckpt in ckpts:
            os.remove(ckpt)
        # best_model.save_model(f"{path_curr}/detector")
        df_def_acc.at[eps, 'Model'] = best_model.config['_model_name']
        
        for tst_eps in epsilons:
            if tst_eps != eps:
                path_tst = f"{path_save}/Evasion/{atk}/eps_{tst_eps:0.4f}"
                df_adv_tst = pd.read_excel(f"{path_tst}/df.xlsx", index_col='sample_id')
                df_adv_tst = df_adv_tst[feats]
                df_adv_tst['Class'] = 'Attack'
                df_def_tst_eps = pd.concat([df_ori, df_adv_tst])
                metrics = best_model.evaluate(test=df_def_tst_eps, verbose=False)[0]
                df_def_acc.at[eps, tst_eps] = metrics['test_accuracy']
    df_def_acc.to_excel(f"{path_save}/Evasion/{atk}/detectors_accuracy.xlsx")            
    

## Plot detectors accuracy

In [None]:
for atk in colors_atks:
    df_def_acc = pd.read_excel(f"{path_save}/Evasion/{atk}/detectors_accuracy.xlsx", index_col=0)
    df_def_acc['Eps'] = [f"{x:.2f}" for x in df_def_acc.index.values]
    df_def_acc['index'] = df_def_acc['Model'] + '\n' + df_def_acc['Eps']
    df_def_acc.set_index('index', inplace=True)
    df_def_acc.drop(['Model', 'Eps'], axis=1, inplace=True)
    df_def_acc.rename(columns={x: f"{x:.2f}" for x in df_def_acc.columns}, inplace=True)
    
    df_fig = df_def_acc.astype(float)
    sns.set_theme(style='ticks', font_scale=1.0)
    fig, ax = plt.subplots(figsize=(13, 12))
    heatmap = sns.heatmap(
        df_fig,
        annot=True,
        fmt=".2f",
        cmap='hot',
        linewidth=0.1,
        linecolor='black',
        cbar_kws={
            'orientation': 'horizontal',
            'location': 'top',
            'pad': 0.025,
            'aspect': 30
        },
        annot_kws={"size": 12},
        ax=ax
    )
    ax.set_xlabel('Test Attack Strength')
    ax.set_ylabel('Training Model and Data')
    heatmap_pos = heatmap.get_position()
    ax.figure.axes[-1].set_title("Accuracy")
    ax.figure.axes[-1].tick_params()
    for spine in ax.figure.axes[-1].spines.values():
        spine.set_linewidth(1)
    plt.savefig(f"{path_save}/Evasion/{atk}/detectors_accuracy.png", bbox_inches='tight', dpi=200)
    plt.savefig(f"{path_save}/Evasion/{atk}/detectors_accuracy.pdf", bbox_inches='tight')
    plt.close(fig)

# Outliers analysis for attacks

In [None]:
epsilons_hglt = [0.01, 0.02, 0.05, 0.1, 0.2, 0.5]
colors_epsilons = {x: px.colors.qualitative.G10[x_id] for x_id, x in enumerate(['Origin'] + epsilons_hglt)}

for atk in colors_atks:

    for eps in epsilons_hglt:
        path_curr = f"{path_save}/Evasion/{atk}/eps_{eps:0.4f}"
        pathlib.Path(f"{path_curr}/SImAgeError").mkdir(parents=True, exist_ok=True)
        df_adv = pd.read_excel(f"{path_curr}/df.xlsx", index_col='sample_id')
        
        # IQR outliers
        pathlib.Path(f"{path_curr}/outliers_iqr").mkdir(parents=True, exist_ok=True)
        plot_iqr_outs(df_adv, feats, colors_epsilons[eps], f"{atk} Eps({eps})", f"{path_curr}/outliers_iqr")
        
        # PyOD plots
        pathlib.Path(f"{path_curr}/outliers_pyod").mkdir(parents=True, exist_ok=True)
        plot_pyod_outs(df_adv, pyod_methods, colors_epsilons[eps], f"{atk} Eps({eps})", f"{path_curr}/outliers_pyod")