In [None]:
import numpy as np
import os
import pandas as pd
import sys
import torch


from matplotlib import pyplot as plt


project_dir = os.path.join(os.getcwd(),'..')
if project_dir not in sys.path:
    sys.path.append(project_dir)
import config

from experiments.MNIST import ExperimentADeLEn, ExperimentSupervised, ExperimentSVM

# Pollution experiments using MNIST


In [None]:
pollution_exp = [0, .05, .1, .2]

## Table

In [None]:
from itertools import chain
def generate_multi_df(data:list, parent_index:list, child_index:list) -> pd.DataFrame:
    index_names = [
        [x for x in parent_index for _ in child_index],
        child_index*len(parent_index)
    ]

    tuples = list(zip(*index_names))
    index = pd.MultiIndex.from_tuples(tuples)
    return pd.DataFrame(chain.from_iterable(data), index=index)

In [None]:
exp = ExperimentADeLEn(.1, .2, 2)
result_dir = exp.config()['save_result_dir']
df = pd.read_csv(os.path.join(result_dir, 'metrics.csv'), index_col=0)
acc = df['Accuracy'].mean(), df['Accuracy'].std()
prec = df['Precision'].mean(), df['Precision'].std()
rec = df['Recall'].mean(), df['Recall'].std()
f1 = df['F1'].mean(), df['F1'].std()
auc = df['AUC'].mean(), df['AUC'].std()


data = [acc, prec, rec, f1, auc]
parent_index = ['Acc', 'Prec', 'Recall', 'F1', 'AUC']
child_index = ['Mean', 'Std']
df = generate_multi_df(data, parent_index, child_index).T
df

In [None]:
exp = ExperimentADeLEn(.1, .2, 2)
result_dir = exp.config()['save_result_dir']
df = pd.read_csv(os.path.join(result_dir, 'metrics.csv'), index_col=0)
df['Accuracy'].mean(), df['Accuracy'].std(),

In [None]:
(df['Accuracy'].mean(), df['Accuracy'].std()), (df['Precision'].mean(), df['Precision'].std()), (df['Recall'].mean(), df['Recall'].std()), (df['F1'].mean(), df['F1'].std()), (df['AUC'].mean(), df['AUC'].std())

In [None]:
exp = ExperimentSupervised(.1, .2)
result_dir = exp.config()['save_result_dir']
df = pd.read_csv(os.path.join(result_dir, 'metrics.csv'), index_col=0)

In [None]:
(df['Accuracy'].mean(), df['Accuracy'].std()), (df['Precision'].mean(), df['Precision'].std()), (df['Recall'].mean(), df['Recall'].std()), (df['F1'].mean(), df['F1'].std()), (df['AUC'].mean(), df['AUC'].std())

## Images

In [None]:
def generate_score_histogram(score_df:pd.DataFrame, normal_bins=10, anomaly_bins=10):
    from matplotlib.ticker import MaxNLocator

    with plt.style.context('seaborn-colorblind'):
        fig = plt.figure(figsize=(5, 4))
        ax = plt.gca()
        ax.hist(score_df['Normal'], bins=normal_bins, alpha=.7, label='Normal')
        ax.hist(score_df['Anomaly'], bins=anomaly_bins, alpha=.7, label='Anomaly')
        ax.legend(fontsize='x-large')
        ax.xaxis.set_major_locator(MaxNLocator(nbins=5)) 
        ax.yaxis.set_major_locator(MaxNLocator(nbins=5))
        ax.set_ylabel('Samples', fontsize='xx-large')
        ax.set_xlabel('Score', fontsize='xx-large')
        ax.tick_params(axis='both', which='major', labelsize='x-large')
        ax.grid()
        
        return fig
    
def plot_reconstructed(model, r0=(-6, 6), r1=(-6, 6), n=12):
    model.eval()
    w = 28
    img = np.zeros((n*w, n*w))

    fig = plt.figure(figsize=(5, 5))

    for i, y in enumerate(np.linspace(*r1, n)):
        for j, x in enumerate(np.linspace(*r0, n)):
            z = torch.Tensor([[x, y]])
            x_hat = torch.tanh(model.decode_path(z)) # ADeLEn
            x_hat = x_hat.reshape(w, w).to('cpu').detach().numpy()
            img[(n-1-i)*w:(n-1-i+1)*w, j*w:(j+1)*w] = x_hat
    
    plt.xlabel('$\mathcal{N}(0, \sigma_1)$', fontsize='xx-large')
    plt.ylabel('$\mathcal{N}(0, \sigma_2)$', fontsize='xx-large')
    plt.tick_params(axis='both', which='major', labelsize='x-large')
    plt.imshow(img, extent=[*r0, *r1], cmap='viridis')

    return fig

### ADeLEn

In [None]:
for d in [2, 5, 10]:
    for pollution in pollution_exp:
        exp = ExperimentADeLEn(.1, pollution, d)
        result_dir = exp.config()['save_result_dir']
        img_dir = exp.config()['save_imgs_dir']

        df = pd.read_csv(os.path.join(result_dir, 'metrics.csv'), index_col=0)
        idx = df['AUC'].apply(lambda x: abs(x - df['AUC'].mean())).idxmin()

        score_df = pd.read_pickle(os.path.join(result_dir, 'sample_score.pkl'))[f'It {idx+1}']
        fig = generate_score_histogram(score_df)
        fig.savefig(os.path.join(img_dir, f'score_histogram_{pollution}.pdf'), bbox_inches='tight')
        plt.close(fig)

#### Reconstructions

In [None]:
for pollution in pollution_exp:
    exp = ExperimentADeLEn(.1, pollution, d=2)
    img_dir = exp.config()['save_imgs_dir']
    exp.run()

    fig = plot_reconstructed(exp.model, r0=(-6, 6), r1=(-6, 6), n=10)
    fig.savefig(os.path.join(img_dir, f'reconstruction_{pollution}.pdf'), bbox_inches='tight')
    plt.close(fig)

### Supervised

In [None]:
for pollution in pollution_exp:
    exp = ExperimentSupervised(.1, pollution)
    result_dir = exp.config()['save_result_dir']
    img_dir = exp.config()['save_imgs_dir']

    df = pd.read_csv(os.path.join(result_dir, 'metrics.csv'), index_col=0)
    idx = df['AUC'].apply(lambda x: abs(x - df['AUC'].mean())).idxmin()

    score_df = pd.read_pickle(os.path.join(result_dir, 'sample_score.pkl'))[f'It {idx+1}']
    score_df['Normal'] = score_df['Normal'].apply(lambda x: x.item() if x is not None else x)
    score_df['Anomaly'] = score_df['Anomaly'].apply(lambda x: x.item() if x is not None else x)
    
    fig = generate_score_histogram(score_df, normal_bins=(1 if pollution==.2 else 10))
    fig.savefig(os.path.join(img_dir, f'score_histogram_{pollution}.pdf'), bbox_inches='tight')
    plt.close(fig)

# SVM

In [None]:
for pollution in pollution_exp:
    exp = ExperimentSVM(.1, pollution)
    result_dir = exp.config()['save_result_dir']
    img_dir = exp.config()['save_imgs_dir']

    df = pd.read_csv(os.path.join(result_dir, 'metrics.csv'), index_col=0)
    idx = df['AUC'].apply(lambda x: abs(x - df['AUC'].mean())).idxmin()

    score_df = pd.read_pickle(os.path.join(result_dir, 'sample_score.pkl'))[f'It {idx+1}']
    
    fig = generate_score_histogram(score_df)
    fig.savefig(os.path.join(img_dir, f'score_histogram_{pollution}.pdf'), bbox_inches='tight')
    plt.close(fig)