In [1]:
import asyncio
import httpx
import sys
sys.path.append('../')
sys.path.append('/app/python-modules')

!{sys.executable} -m pip install tqdm
from tqdm import tqdm

import os
import json
import pandas as pd
from pathlib import Path

from utils.time_function import time_function as timeit

#custom imports
import config
import nab_utils

from time import perf_counter

from utils.TimeSeriesClient import TimeSeriesClient
client = TimeSeriesClient(base_url='http://127.0.0.1:8000', timeout=30)

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
#load threhsolds
with open(config.NAB_ASSETS / "config" / "thresholds.json") as json_file:
    thresholds_file=json.load(json_file)

#load detector metadata
with open("detector_metadata.json") as json_file:
    detector_metadata=json.load(json_file)

#get dataset IDs
datasets = await client.request("get","/dataset/get_all")

In [3]:
def build_prediction_object(dataset_obj,detector_obj):
    prediction_object={}
    prediction_object["dataset_id"]=dataset_obj['id']
    prediction_object["dataset_name"]=dataset_obj['path']
    prediction_object["detector_name"]=detector_obj['name']
    prediction_object["detector_id"]=detector_obj['id']

    prediction_object["url"]= str(Path(config.NAB_ASSETS / "predictions" / prediction_object["detector_name"]) / dataset_obj["folder"] / Path(prediction_object["detector_name"]+"_"+dataset_obj["filename"]))
    
    return prediction_object

In [4]:
def build_anomaly_objects(results,detector_obj, prediction_obj):
    THRESHOLD=thresholds_file[MODEL]["standard"]["threshold"]
    results["is_anomaly"]=results["anomaly_score"].map(lambda x: x > THRESHOLD)
    anomalies=[i for i,v in enumerate(results["is_anomaly"]==True) if v]

    anomaly_records=[]
    #build anomaly records for this dataset
    for anomaly_index in anomalies:
        anomaly=results.loc[results.index[anomaly_index]]
        anomaly_object={}
        #anomaly_object["prediction_id"]=prediction_obj["id"]
        anomaly_object["detector_id"]=detector_obj["id"]
        anomaly_object["dataset_name"] = prediction_obj["dataset_name"]
        anomaly_object["detector_name"] = MODEL
        anomaly_object["value"] = float(anomaly["value"])
        anomaly_object["anomaly_score"] = float(anomaly["anomaly_score"])
        anomaly_object["threshold"] = float(THRESHOLD)
        anomaly_object["time"] = anomaly["timestamp"].strftime('%Y-%m-%dT%H:%M:%S.%f%z')
        anomaly_object["status"] = 'open'
        anomaly_object["severity"] = [element for element in detector_metadata["detectors"] if element['name'] == MODEL][0]["severity"] #get severity from metadata model file
        anomaly_object["tags"] = ['nab']
        anomaly_records.append(anomaly_object)
    return anomaly_records

# Build predictions, get anomalies and add as children to each prediction call

In [5]:
async def register_predictions():
    prediction_objects=[]
    for dataset in datasets:
        prediction_obj=build_prediction_object(dataset,detector_obj)

        #load dataset file
        dataset=pd.read_csv(prediction_obj["url"]) #load dataset
        dataset['timestamp']=pd.to_datetime(dataset['timestamp']) #set timestamps to datetime objects

        anomalies=build_anomaly_objects(dataset,detector_obj,prediction_obj)

        response = await client.request("post","/prediction/create", json={"payload":prediction_obj,"anomalies":anomalies})

    return "Succesfully added Predictions to all Datasets"

#predictions= await register_predictions()

## Alternative Method: Build All predictions, then build all anomalies for each prediction

In [6]:
predictions={}
async def register_anomalies(prediction):
    dataset=pd.read_csv(prediction["url"]) #load dataset
    dataset['timestamp']=pd.to_datetime(dataset['timestamp']) #set timestamps to datetime objects

    anomalies=build_anomaly_objects(dataset,detector_obj,prediction)

    if len(anomalies)>1:
        anomaly_response = await client.request("post","/anomaly/create_many/"+prediction['id'], json=anomalies)

async def generate_predictions():
    prediction_objects=[]
    for dataset in datasets:
        prediction_objects.append(build_prediction_object(dataset,detector_obj))

    predictions = await client.request("post","/prediction/create_many", json=prediction_objects)

    anomalies = await asyncio.gather(*[register_anomalies(prediction) for prediction in predictions])

In [7]:
for detector in detector_metadata["detectors"]:

    detector_payload=detector
    MODEL=detector_payload['name']

    detector_obj=await client.request("post","/detector/create", json={"payload":detector_payload})
    predictions= await register_predictions()