In [2]:
% load_ext autoreload
% autoreload 2
% matplotlib inline

import pickle
import pandas as pd
from matplotlib import pyplot as plt

import os
import sys
sys.path.append('..')

from src.evaluation import Plotter, Evaluator

### After adding to Plotter

In [16]:
output_dir = './reports'
pickles_dir = '../reports/experiment_pollution/shift_1'
plotter = Plotter(output_dir, pickles_dir)
plotter.fix_anomaly_percentage(-2)

0.4

In [18]:
print(plotter.latex_lineplot(
    title='Pollution',
    x_label='Pollution In Training Data',
    caption='Comparison of the presented approaches on \\textbf{shift} anomalies with varying '
    'pollution levels for the training dataset. The anomaly percentage for the test dataset is '
    'fixed to 40~%.',
    latex_label='pollution_lineplot',
))


\begin{figure}[!h]
    \centering
    \begin{subfigure}{.475\linewidth}
    \centering
    \begin{tikzpicture}[yscale=0.9]
    \begin{axis}[
        % title={{Pollution}},
        xlabel={Pollution In Training Data},
        ylabel={AUROC},
        xmin=0, xmax={1.10},
        ymin=0, ymax=1,
        xtick={0.00, 0.25, 0.50, 0.75, 1.00},
        ytick={0.0, 0.2, 0.4, 0.6, 0.8, 1.0},
        legend entries={\lstmdagmm, \lstmed, \lstmad, \ebm, OLD-DAGMM-NN, \donut, \dagmm},
        legend pos = outer north east,
        ymajorgrids=true,
        grid style=dashed,
        width=\linewidth
    ]

	\addplot coordinates{(0.00, 0.72) (0.25, 0.75) (0.50, 0.64) (0.75, 0.67) (1.00, 0.62)}
	\addplot coordinates{(0.00, 0.83) (0.25, 0.82) (0.50, 0.85) (0.75, 0.83) (1.00, 0.83)}
	\addplot coordinates{(0.00, 0.92) (0.25, 0.82) (0.50, 0.95) (0.75, 0.89) (1.00, 0.93)}
	\addplot coordinates{(0.00, 0.99) (0.25, 0.84) (0.50, 0.95) (0.75, 0.86) (1.00, 0.86)}
	\addplot coordinates{(0.00, 0.89) (0.25, 0.83

### Code by Willi

In [None]:
GROUPS = None
#GROUPS = 20

df = pd.read_pickle("experiment-multi_dim_multivariate-2018-07-23-013012.pkl")['benchmark_results']
df = df.append(pd.read_pickle("experiment-run-1-1043199673-2018-08-01-194308.pkl")['benchmark_results'])
df = df.append(pd.read_pickle("experiment-run-0-906869411-2018-08-01-131414.pkl")['benchmark_results'])
df = df.groupby(["dataset", "algorithm"], as_index=False).mean()
df = df[["dataset", "algorithm", "auroc"]]
df.head()


In [None]:
if GROUPS is None:
    df = df[~df['dataset'].str.contains("with 20 ")]
else:
    df = df[df['dataset'].str.contains("with 20 ")]

In [None]:
import collections

def recursively_default_dict():
    return collections.defaultdict(recursively_default_dict)

results = recursively_default_dict() # function - algorithm - dimension
algorithm_name = {'DAGMM_LSTMAutoEncoder_withWindow': 'DAGMM-LW', 'DAGMM_NNAutoEncoder_withWindow': 'DAGMM-NW',
                  'DAGMM_NNAutoEncoder_withoutWindow': 'DAGMM-NN', 'LSTM-AD': 'LSTM-AD', 'Recurrent EBM': 'REBM', 'LSTMED': 'LSTMED'}
dimensions = set()
functions = ['Doubled', 'Shrinked', 'Inversed', 'XOR', 'Delayed', 'Delayed Missing']
algorithms = set()

for _, row in df.iterrows():
    function =  row[0].split('(f=')[1].split(')')[0]
    dimension = int(row[0].split('-dimensional')[0].split(' ')[2])
    dimensions.add(dimension)
    algorithm = algorithm_name[row[1]]
    algorithms.add(algorithm)
    score = float(row[2])
    results[function][algorithm][dimension] = score
algorithms = sorted(list(algorithms))
results['delayed']

In [None]:
for function_index, function in enumerate(functions):
    function_lower = function.lower().replace(' ', '_')
    title = f"({function_index}) {function}"
    x_label = "Dimensions"
    x_max = str(max(dimensions) + 100)
    
    # Add figure block
    header = ""
    if function_index == 0:
        header += "\\begin{figure}[!h]\n\\centering"
    header += "" + \
    """\\begin{subfigure}{.475\\linewidth}
    \\centering
    """
    
    header += u"""
    \\begin{tikzpicture}[yscale=0.9]
    \\begin{axis}[
        title={{the_title}},
        xlabel={x_label},
        ylabel={AUROC},
        xmin=0, xmax={x_max},
        ymin=0, ymax=1,
        xtick={dimensions},
        ytick={0,0.2, 0.4, 0.6, 0.8, 1.0},
        legend pos=north west,
        ymajorgrids=true,
        grid style=dashed,
        legend style ={ at={(0,0)}, 
            anchor=south west, draw=black, 
            fill=white,align=left},
        %cycle list name=black white,
        width=\\linewidth
    ]
    """.replace("the_title", title).replace("x_label", x_label).replace("dimensions",",".join([str(x) for x in sorted(list(dimensions))])).replace("x_max", x_max)
    print(header)
    for algorithm in algorithms:
        add_plot = """
        \\addplot
            coordinates {
            """
        for dimension in dimensions:
            add_plot += f"({dimension},{results[function_lower][algorithm][dimension]})"
        print(add_plot + "};")
    if function_index == 0:
        print("\\legend{algorithms}".replace("algorithms", ",".join(algorithms)))
    print("""
    \\end{axis}
    \\end{tikzpicture}
    \\end{subfigure}""")
    if function_index % 2 == 0:
        print("\\hfill")
    else:
        if function_index == len(functions)-1:
            group_text = "The group size is set to "
            if GROUPS is None:
                group_text += "the number of dimensions."
            else:
                group_text += "20."
            
            print("\\caption{Comparison of the presented approaches on different anomalies with varying dimensions. " + group_text + "}")
            if GROUPS is None:
                print("\\label{fig:multid_groups_none}")
            else:
                print("\\label{fig:multid_groups_20}")
            print("\\end{figure}")