Imports

In [None]:
from evaluation_metric import range_lift_with_delay
from sklearn.metrics import precision_recall_curve
from preprocessing import complete_timestamp
from evaluation import label_evaluation
from kpi_series import KPISeries
from model import DonutX
import pandas as pd
import numpy as np
import json

Performance Metric function

In [None]:
def evaluate(preds, labels, kpi_id, missing, timestamp):
    truth = pd.DataFrame({'missing': missing, 'label': labels, 'KPI ID': kpi_id, 'timestamp': timestamp})
    truth[truth['missing'] == 0].to_hdf(truth_file, key='df')

    prediction = pd.DataFrame({'missing': missing, 'predict': preds, 'KPI ID': kpi_id, 'timestamp': timestamp})
    prediction[prediction['missing'] == 0].to_csv(result_file)

    return json.loads(label_evaluation(truth_file, result_file, delay))['data']

In [None]:
train_file = '../train.csv'
test_file = '../test.csv'

result_file = 'predict.csv'
truth_file = 'ground_truth.hdf'
delay = 7
THRESHOLD = 1.1

train_df = pd.read_csv(train_file).rename(columns={'KPI ID': 'kpi_id'})
test_df = pd.read_csv(test_file).rename(columns={'KPI ID': 'kpi_id'})

Bagel inference

In [None]:
predictions = pd.DataFrame()
for kpi_id in train_df.kpi_id.unique():
    kpi = train_df[train_df['kpi_id'] == kpi_id]
    t_kpi = test_df[test_df['kpi_id'] == kpi_id]
    t_kpi['label'] = 0

    train_timestamp, train_missing, (train_values, train_labels) = complete_timestamp(kpi['timestamp'], (kpi['value'], kpi['label']))
    test_timestamp, test_missing, (test_values, test_labels) = complete_timestamp(t_kpi['timestamp'], (t_kpi['value'], t_kpi['label']))

    train_kpi = KPISeries(
        value = train_values,
        timestamp = train_timestamp,
        label = train_labels,
        name = 'train_data',
    )

    test_kpi = KPISeries(
        value = test_values,
        timestamp = test_timestamp,
        label = test_labels,
        name = 'test_data',
    )

    train_kpi, train_kpi_mean, train_kpi_std = train_kpi.normalize(return_statistic=True)
    test_kpi = test_kpi.normalize(mean=train_kpi_mean, std=train_kpi_std)

    model = DonutX(cuda=False, max_epoch=50, latent_dims=8, network_size=[100, 100])
    model.fit(train_kpi.label_sampling(0.))

    y_prob = model.predict(test_kpi.label_sampling(0.))
    n_elems = int(train_kpi.anormaly_rate * test_kpi.length)
    test_pred = np.where(y_prob > np.sort(y_prob)[::-1][n_elems], 1, 0)

    kpi_pred = pd.DataFrame({'missing': test_missing, 'predict': test_pred, 'KPI ID': kpi_id, 'timestamp': test_timestamp})
    predictions = predictions.append(kpi_pred[kpi_pred['missing'] == 0].drop(['missing'], axis=1)).reset_index(drop=True)
    kpi_pred[kpi_pred['missing'] == 0].drop(['missing'], axis=1).to_csv(f'inference/{kpi_id}.csv')