In [37]:
import pandas as pd
import numpy as np

import json
import wandb
import matplotlib.pyplot as plt

from pyod.models.abod import ABOD
from pyod.models.cblof import CBLOF
from pyod.models.hbos import HBOS
from pyod.models.iforest import IForest
from pyod.models.knn import KNN
from pyod.models.lof import LOF
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.pca import PCA
from pyod.models.lscp import LSCP
from pyod.models.inne import INNE
from pyod.models.gmm import GMM
from pyod.models.kde import KDE
from pyod.models.lmdd import LMDD

from utils.ml_utils import metrics
from utils.utils import make_table
from utils.wandb_logging import init_exp, log_params, finish_exp

import warnings
warnings.filterwarnings("ignore")

In [45]:
cols = ["account_id", "name", "point", "call_count", "total_call_time", "total_exclusive_time", "min_call_time", "max_call_time", "sum_of_squares", "instances", "language", "app_name", "app_id", "scope", "host", "display_host", "pid", "agent_version", "labels"]
data_raw = pd.read_csv("Data/metrics_collector.tsv", names = cols, sep="\t")

In [46]:
data = make_table(data_raw)

In [47]:
data.head()

Unnamed: 0,time,time_numeric,web_response,throughput,apdex,error
0,2024-04-15 23:32:00,28553732.0,0.015006,3898.0,0.999679,0.000128
1,2024-04-15 23:33:00,28553733.0,0.015006,3917.5,0.999745,0.000128
2,2024-04-15 23:34:00,28553734.0,0.015006,3993.0,0.999687,0.0
3,2024-04-15 23:35:00,28553735.0,0.015006,3991.5,0.999812,0.0
4,2024-04-15 23:36:00,28553736.0,0.015006,3915.5,0.999489,0.0


In [None]:
def calculate_weight():
    

In [43]:
config_model_name = "LOF"
metrics_dict = {}
timeseries_cols = ["web_response", "throughput", "apdex", "error"]

with open('config.json', 'r') as file:
    config = json.load(file)


for timeseries_col in timeseries_cols:
    model = LOF(**config[config_model_name][timeseries_col])

    # fit predict model 
    X = data_raw[["time", timeseries_col]]
    model.fit(X)
    predictions = model.predict(X) 
    print(predictions)
    metrics_dict[timeseries_col] = metrics(X, predictions, timeseries_col)


    with open('secrets/wandb_secret.txt', 'r') as file:
        api_key = file.read()

    wandb.login(key=api_key)

    wandb.init(project="redlab-hack", tags=[config_model_name, timeseries_col])
    wandb.log(config[config_model_name][timeseries_col])
    wandb.log(metrics_dict)
    wandb.finish()

[1 1 1 ... 1 1 1]


CommError: Run initialization has timed out after 90.0 sec. 
Please refer to the documentation for additional information: https://docs.wandb.ai/guides/track/tracking-faq#initstarterror-error-communicating-with-wandb-process-

In [None]:
def train_model(data: pd.DataFrame, model, params: dict, start: pd.datetime, end: pd.datetime, recalculate = 0) -> dict:

    # fit predict model 
    model.fit(data)
    predictions = model.predict() 

    metrics = metrics(predictions)

    # wandb logging 
    log_params(metrics)
    

In [None]:
# Define the number of inliers and outliers
n_samples = 200
outliers_fraction = 0.25
clusters_separation = [0]

# Compare given detectors under given settings
# Initialize the data
xx, yy = np.meshgrid(np.linspace(-7, 7, 100), np.linspace(-7, 7, 100))
n_inliers = int((1. - outliers_fraction) * n_samples)
n_outliers = int(outliers_fraction * n_samples)
ground_truth = np.zeros(n_samples, dtype=int)
ground_truth[-n_outliers:] = 1

# initialize a set of detectors for LSCP
detector_list = [LOF(n_neighbors=5), LOF(n_neighbors=10), LOF(n_neighbors=15),
                 LOF(n_neighbors=20), LOF(n_neighbors=25), LOF(n_neighbors=30),
                 LOF(n_neighbors=35), LOF(n_neighbors=40), LOF(n_neighbors=45),
                 LOF(n_neighbors=50)]

In [None]:
type(LOF(n_neighbors=5))

pyod.models.lof.LOF

In [None]:
# Show the statics of the data
print('Number of inliers: %i' % n_inliers)
print('Number of outliers: %i' % n_outliers)
print('Ground truth shape is {shape}. Outlier are 1 and inliers are 0.\n'.format(shape=ground_truth.shape))
print(ground_truth)

Number of inliers: 150
Number of outliers: 50
Ground truth shape is (200,). Outlier are 1 and inliers are 0.

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
