In [1]:
cd ../..

/home/msi/projects/diplomka


In [2]:
from orbit.models import ETS

import pandas as pd
import pathlib
from datetime import datetime
import glob
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
model_name = 'ETS'

In [4]:
dataset = glob.glob("results/benchmark/RTT/*.csv", recursive=True)
benchmark_dir = pathlib.Path('results/benchmark/')

In [5]:
def load_df(path):
    df = pd.read_csv(path)
    df['timestamp'] = pd.to_datetime(df['timestamp'] if 'timestamp' in df.columns else df['index'])
    df = df.set_index('timestamp').sort_index()
    df = df[~df.index.duplicated(keep='first')]

    return df

In [6]:
load_df(dataset[0])

Unnamed: 0_level_0,rtt,normalmodel_anomalies
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-01-01 00:03:31,26.5,False
2023-01-01 00:08:24,28.5,False
2023-01-01 00:08:45,28.6,False
2023-01-01 00:09:02,27.6,False
2023-01-01 00:10:23,27.5,False
...,...,...
2023-03-31 23:39:28,26.6,False
2023-03-31 23:40:16,33.3,False
2023-03-31 23:45:45,27.5,False
2023-03-31 23:50:00,51.3,False


In [7]:
value_col = 'rtt'

In [8]:
def fit_predict(df):
    model = ETS(response_col=value_col, date_col='timestamp', estimator='stan-mcmc')
    df2 = df.reset_index()
    model.fit(df2);
    predicted = model.predict(df=df2, prediction_percentiles=[2.5, 97.5]);
    predicted['value'] = df2[value_col]
    predicted['anomalies'] = ( predicted['prediction_95'] < predicted['value']) | ( predicted['prediction_5'] > predicted['value'])
    return predicted.set_index('timestamp')['anomalies'].rename('model_name')

In [9]:
total_times = []
per_sample_times = []

In [10]:
from IPython.display import clear_output

In [11]:
import numpy as np

In [12]:
unprocessed = []

In [13]:
for i, path in enumerate(dataset):
    df = load_df(path)
    
    start = datetime.now()

    start = datetime.now()
    try:
        current = fit_predict(df).rename(model_name)
    except RuntimeError as e:
        unprocessed.append(path)
        current = df['value'].apply(lambda x: np.nan).rename(model_name)
    end = datetime.now()

    duration = end - start
    total_times.append(duration.total_seconds())
    per_sample_times.append(duration.total_seconds() / len(df))

    file_path = pathlib.Path(path[18:])
    file_dir = benchmark_dir / file_path.parent
    
    if not file_dir.exists():
        file_dir.mkdir(parents=True)

    out_path = file_dir / file_path.name
    
    current.index = current.index.rename('timestamp')
    if out_path.exists():
        existing = load_df(out_path)
        current = pd.concat([existing, current], axis=1)

    current.to_csv(out_path)
    clear_output(wait=True)


2023-06-04 20:06:06 - orbit - INFO - Sampling (PyStan) with chains: 4, cores: 8, temperature: 1.000, warmups (per chain): 225 and samples(per chain): 25.
chain 1 |[33m          [0m| 00:00 Status
[A

[A[A
[A

chain 1 |[33m█████     [0m| 00:07 Iteration:   1 / 250 [  0%]  (Warmup)

[A[A
chain 1 |[33m███████▌  [0m| 00:13 Iteration: 100 / 250 [ 40%]  (Warmup)

chain 1 |[34m██████████[0m| 00:15 Iteration: 200 / 250 [ 80%]  (Warmup)
[A

[A[A
chain 1 |[34m          [0m| 00:18 Iteration: 226 / 250 [ 90%]  (Sampling)

[A[A
chain 1 |[34m██████████[0m| 00:19 Sampling completed                     
chain 2 |[34m██████████[0m| 00:19 Sampling completed                     
chain 3 |[34m██████████[0m| 00:19 Sampling completed                     
chain 4 |[34m██████████[0m| 00:19 Sampling completed                     

                                                                                                                                                                                                                                                                                                                                









In [14]:
names = [pathlib.Path(path).name for path in dataset]

In [15]:
stats_df = pd.DataFrame({f'time': total_times, f'per_sample_{model_name}': per_sample_times}, index=names)

In [16]:
stats_df.to_csv(benchmark_dir / f'stats_{model_name}.csv')