In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('../../')
import pandas as pd
import plotly.graph_objects as go
from sklearn.metrics import roc_auc_score
import numpy as np
from utils.util import cfg
from src.ad2s import AD2SDetector
import time

Here, we plot the AUC score and execute time under different number of chains.


In [2]:
def ad2s_detect(df, chains_candidate, seed=0):
    n_base_list = {}
    auc_dict = {}
    time_dict = {}

    for seed in np.random.default_rng(seed).integers(0, 100, 5):
        all_pred = []
        clf = AD2SDetector(n_base=max(chains_candidate),seed=seed)
        predict = {}
        time_cnt = {}

        for item in df['data']:
            score = clf.predict(item)
            for i in chains_candidate:
                if i not in predict:
                    predict[i] = [np.mean([clf.score_list[:i]])]
                    time_cnt[i] = [clf.time_list[i-1]]
                else:
                    predict[i].append(np.mean([clf.score_list[:i]]))
                    time_cnt[i].append(clf.time_list[i-1])

        for key, value in predict.items():
            df['predict'] = value
            truth = df['label'][df['data']!=0.0]
            predict_nonzero = df['predict'][df['data']!=0.0]
            predict_nonzero
            auc = roc_auc_score(truth,predict_nonzero)
            if key not in auc_dict:
                auc_dict[key] = [auc]
                time_dict[key] = [np.sum(time_cnt[key])]
            else:
                auc_dict[key].append(auc)
                time_dict[key].append(np.sum(time_cnt[key]))

    for key, value in auc_dict.items():
        n_base_list[key] = {'auc':np.mean(value), 'err':np.std(value,ddof=1), 'time': np.mean(time_dict[key])}
    return n_base_list

In [3]:
def plot_res(df):

    data = [
        go.Scatter(
            x=df["num"],
            y=df["auc"],
            mode="lines+markers",
            error_y=dict(type="data", array=df["err"], visible=True),
            name="AUC",
            marker_color="red",
        ),
        go.Scatter(
            x=df["num"],
            y=df["time"],
            mode="lines+markers",
            name="Time (s)",
            yaxis="y2",
            marker_color="#4292c6",
            marker_symbol="triangle-up-dot",
            marker_size=17,
        ),
    ]

    layout = go.Layout(
        plot_bgcolor="white",
        xaxis=dict(
            title= f"Number of chains (#{cfg.experiments.chains.ds})",
            showline=True,
            showgrid=False,
            linecolor="black",
            linewidth=2,
            mirror=True,
        ),
        yaxis=dict(
            title="AUC",
            showline=True,
            showgrid=False,
            linecolor="black",
            linewidth=2,
            mirror=True,
        ),
        yaxis2=dict(
            title="Time (s)",
            overlaying="y",
            side="right",
            showline=True,
            showgrid=False,
            linecolor="black",
            linewidth=2,
            mirror=True,
        ),
        legend=dict(x=0, y=0.9, font=dict(size=35, color="black")),
    )

    fig = go.Figure(data=data, layout=layout)
    fig.update_layout(
        font_family="Times New Roman",
        font_color="black",
        height=400,
        width=700,
        margin=dict(l=0, r=0, b=0, t=0),
        legend=dict(x=0.6, y=0, bgcolor="rgba(0,0,0,0)"),
        font=dict(size=35),
        yaxis=dict(showgrid=False, tickmode="auto"),
        yaxis_range=[0.5, 1.0],
        xaxis=dict(tickvals=list(cfg.experiments.chains.candidate)),
    )
    return fig


In [8]:
cfg.data.synthetic_ds = 1
cfg.experiments.chains.ds = 1

df = pd.read_csv(cfg.data.save_path)
res = ad2s_detect(df, chains_candidate = cfg.experiments.chains.candidate)
df = pd.DataFrame.from_dict(res,orient='index')
df['num'] = df.index
df.to_csv(cfg.experiments.chains.save_path)

df = pd.read_csv(cfg.experiments.chains.save_path)
fig = plot_res(df)

fig.write_image(cfg.experiments.chains.pdf_path)

fig

In [9]:
cfg.data.synthetic_ds = 2
cfg.experiments.chains.ds = 2

df = pd.read_csv(cfg.data.save_path)
res = ad2s_detect(df, chains_candidate = cfg.experiments.chains.candidate)
df = pd.DataFrame.from_dict(res,orient='index')
df['num'] = df.index
df.to_csv(cfg.experiments.chains.save_path)

df = pd.read_csv(cfg.experiments.chains.save_path)
fig = plot_res(df)
fig.write_image(cfg.experiments.chains.pdf_path)
fig

In [6]:
cfg.data.synthetic_ds = 3
cfg.experiments.chains.ds = 3

df = pd.read_csv(cfg.data.save_path)
res = ad2s_detect(df, chains_candidate = cfg.experiments.chains.candidate)
df = pd.DataFrame.from_dict(res,orient='index')
df['num'] = df.index
df.to_csv(cfg.experiments.chains.save_path)

df = pd.read_csv(cfg.experiments.chains.save_path)
fig = plot_res(df)
fig.write_image(cfg.experiments.chains.pdf_path)
fig

In [7]:
cfg.data.synthetic_ds = 4
cfg.experiments.chains.ds = 4

df = pd.read_csv(cfg.data.save_path)
res = ad2s_detect(df, chains_candidate = cfg.experiments.chains.candidate)
df = pd.DataFrame.from_dict(res,orient='index')
df['num'] = df.index
df.to_csv(cfg.experiments.chains.save_path)

df = pd.read_csv(cfg.experiments.chains.save_path)
fig = plot_res(df)
fig.write_image(cfg.experiments.chains.pdf_path)
fig