In [2]:
import sys
import os
sys.path.insert(0, os.path.realpath(".."))
import pandas as pd
from pathlib import Path
from tqdm import tqdm
import copy
import numpy as np

DATASET = 'E'
WORK_DIR = Path(f"/SSF/data/{DATASET}")
drift_ranges = [1709449200, 1709488800, 1709571600, 1711548600, 1711574700, 1711599900, 1711611600, 1711634400, 1711682400, 1712052000, 1712080800, 1712116800, 1712145600, 1712167200, 1712188800, 1712491200, 1712530800, 1712595600, 1712682000, 1712854800, 1712867400]
fault_df = pd.read_csv(WORK_DIR / "faults.csv")
with open(WORK_DIR / "drift.txt", 'r') as f:
    drift_ts = int(f.read())
print(f"{drift_ts=}")
metric_df = pd.read_pickle(WORK_DIR / "metrics.norm.pkl")
failure_timestamps = []
for _, failure in fault_df.iterrows():
    timestamp = failure['timestamp']
    failure_timestamps.extend([timestamp + i*60 for i in range(0, 20)])

metric_index_dict = metric_df.groupby(['name']).groups
metric_index_after_dict = metric_df[metric_df.timestamp>=drift_ts & ~(metric_df.timestamp.isin(failure_timestamps))].groupby(['name']).groups
metric_index_before_dict = metric_df[(metric_df.timestamp<drift_ts) & ~(metric_df.timestamp.isin(failure_timestamps))].groupby(['name']).groups
metrics_before = set(metric_index_before_dict.keys())
metrics_after = set(metric_index_after_dict.keys())
metrics = list(metrics_before & metrics_after)
metrics = [metric for metric in metrics if 'loadgenerator' not in metric]
print(f"metrics_before: {len(metrics_before)}, metrics_after: {len(metrics_after)}, metrics: {len(metrics)}")

drift_ts=1709442000
metrics_before: 4437, metrics_after: 4737, metrics: 4258


## DejaVu-Omni (Ours)

In [None]:
metric_df_ours = copy.deepcopy(metric_df)
metric_index_dict = metric_df_ours.groupby(['name']).groups
metric_index_before_dict = metric_df_ours[(metric_df_ours.timestamp<drift_ts) & ~(metric_df_ours.timestamp.isin(failure_timestamps))].groupby(['name']).groups
metrics_before = set(metric_index_before_dict.keys())
for i in range(len(drift_ranges)-1):
    drift_num = 0
    drift_metrics = []
    drift_start, drift_end = drift_ranges[i], drift_ranges[i+1]
    print(f"{drift_start=} {drift_end=}")
    metric_index_after_dict = metric_df_ours[(metric_df_ours.timestamp>=drift_start) & (metric_df_ours.timestamp<drift_end) & ~(metric_df_ours.timestamp.isin(failure_timestamps))].groupby(['name']).groups
    metric_index_after_all_dict = metric_df_ours[(metric_df_ours.timestamp>=drift_start) & (metric_df_ours.timestamp<drift_end)].groupby(['name']).groups
    metrics_after = set(metric_index_after_dict.keys())
    metrics = list(metrics_before & metrics_after)
    metrics = [metric for metric in metrics if 'loadgenerator' not in metric]
    print(f"metrics_before: {len(metrics_before)}, metrics_after: {len(metrics_after)}, metrics: {len(metrics)}")
    
    for metric in tqdm(metrics, desc="drift metrics: "):
        if 'fake' in metric:
            continue
        metric_index = metric_index_dict[metric]
        metric_index_after = metric_index_after_dict[metric]
        metric_index_before = metric_index_before_dict[metric]
        metrics_data = metric_df_ours.loc[metric_index]
        before_metrics = metrics_data.loc[metric_index_before].sort_values(by=['timestamp']).drop_duplicates()[-120:]
        metrics_data_after = metrics_data.loc[metric_index_after]
        after_metrics = metrics_data_after.sort_values(by=['timestamp']).drop_duplicates()[:90]
        if before_metrics.empty or after_metrics.empty or len(after_metrics) < 10 or len(after_metrics.value.unique()) == 1:
            continue
        before_metrics_value = before_metrics.value
        before_median = before_metrics_value.median()
        before_IQR = before_metrics_value.quantile(0.75) - before_metrics_value.quantile(0.25)
        after_metrics_value = after_metrics.value
        after_median = after_metrics_value.median()
        after_IQR = after_metrics_value.quantile(0.75) - after_metrics_value.quantile(0.25)
        drift_num += 1
        drift_metrics.append(metric)
        scale = before_IQR / after_IQR if after_IQR != 0 else 1
        bias = before_median - after_median * scale

        metric_index_after_all = metric_index_after_all_dict[metric]
        metrics_data_after_all = metrics_data.loc[metric_index_after_all]
        after_metrics_value_all = metrics_data_after_all['value'].values * scale + bias
        metric_df_ours.loc[metric_index_after_all, 'value'] = after_metrics_value_all
    print(f"successfully drift {drift_num} metrics using method [ours] in range ({drift_start}, {drift_end})")
metric_df_ours.to_pickle(WORK_DIR / f'metrics.norm.drift.ours.pkl')

## DejaVu-ICPP‘19

In [None]:
metrics_df_icpp = copy.deepcopy(metric_df)
metric_index_dict = metrics_df_icpp.groupby(['name']).groups
metric_index_before_dict = metrics_df_icpp[(metrics_df_icpp.timestamp<drift_ts) & ~(metrics_df_icpp.timestamp.isin(failure_timestamps))].groupby(['name']).groups
metric_index_before_all_dict = metrics_df_icpp[(metrics_df_icpp.timestamp<drift_ts)].groupby(['name']).groups
metrics_before = set(metric_index_before_dict.keys())
for i in range(len(drift_ranges)-1):
    drift_num = 0
    drift_metrics = []
    drift_start, drift_end = drift_ranges[i], drift_ranges[i+1]
    print(f"{drift_start=} {drift_end=}")
    metric_index_after_dict = metrics_df_icpp[(metrics_df_icpp.timestamp>=drift_start) & (metrics_df_icpp.timestamp<drift_end) & ~(metrics_df_icpp.timestamp.isin(failure_timestamps))].groupby(['name']).groups
    metrics_after = set(metric_index_after_dict.keys())
    metrics = list(metrics_before & metrics_after)
    metrics = [metric for metric in metrics if 'loadgenerator' not in metric]
    print(f"metrics_before: {len(metrics_before)}, metrics_after: {len(metrics_after)}, metrics: {len(metrics)}")
    
    for metric in tqdm(metrics, desc="drift metrics: "):
        if 'fake' in metric:
            continue
        metric_index = metric_index_dict[metric]
        metric_index_after = metric_index_after_dict[metric]
        metric_index_before = metric_index_before_dict[metric]
        metrics_data = metrics_df_icpp.loc[metric_index]
        before_metrics = metrics_data.loc[metric_index_before].sort_values(by=['timestamp']).drop_duplicates()[-120:]
        metrics_data_after = metrics_data.loc[metric_index_after]
        after_metrics = metrics_data_after.sort_values(by=['timestamp']).drop_duplicates()[:30]
        if before_metrics.empty or after_metrics.empty or len(after_metrics) < 10 or len(after_metrics.value.unique()) == 1:
            continue
        before_metrics_value = before_metrics.value
        before_mean = before_metrics_value.mean()
        before_std = before_metrics_value.std()
        after_metrics_value = after_metrics.value
        after_mean = after_metrics_value.mean()
        after_std = after_metrics_value.std()
        if after_std == 0:
            continue
        drift_num += 1
        drift_metrics.append(metric)
        scale = after_std / before_std if before_std != 0 else 1
        bias = after_mean - before_mean * scale

        metric_index_before_all = metric_index_before_all_dict[metric]
        metrics_data_before_all = metrics_data.loc[metric_index_before_all]
        before_metrics_value_all = metrics_data_before_all['value'].values * scale + bias
        metrics_df_icpp.loc[metric_index_before_all, 'value'] = before_metrics_value_all
    print(f"successfully drift {drift_num} metrics using method [icpp] in range ({drift_start}, {drift_end})")
metrics_df_icpp.to_pickle(WORK_DIR / f'metrics.norm.drift.icpp.pkl')

## DejaVu-StepWise

In [None]:
from sklearn.linear_model import RANSACRegressor

metric_df_stepwise = copy.deepcopy(metric_df)
metric_index_dict = metric_df_stepwise.groupby(['name']).groups
metric_index_before_dict = metric_df_stepwise[(metric_df_stepwise.timestamp<drift_ts) & ~(metric_df_stepwise.timestamp.isin(failure_timestamps))].groupby(['name']).groups
metrics_before = set(metric_index_before_dict.keys())
for i in range(len(drift_ranges)-1):
    drift_num = 0
    drift_metrics = []
    drift_start, drift_end = drift_ranges[i], drift_ranges[i+1]
    print(f"{drift_start=} {drift_end=}")
    metric_index_after_all_dict = metric_df_stepwise[(metric_df_stepwise.timestamp>=drift_start) & (metric_df_stepwise.timestamp<drift_end)].groupby(['name']).groups
    metric_index_after_dict = metric_df_stepwise[(metric_df_stepwise.timestamp>=drift_start) & (metric_df_stepwise.timestamp<drift_end) & ~(metric_df_stepwise.timestamp.isin(failure_timestamps))].groupby(['name']).groups
    metrics_after = set(metric_index_after_dict.keys())
    metrics = list(metrics_before & metrics_after)
    metrics = [metric for metric in metrics if 'loadgenerator' not in metric]
    print(f"metrics_before: {len(metrics_before)}, metrics_after: {len(metrics_after)}, metrics: {len(metrics)}")
    
    for metric in tqdm(metrics, desc="drift metrics: "):
        if 'fake' in metric:
            continue
        metric_index = metric_index_dict[metric]
        metric_index_after = metric_index_after_dict[metric]
        metric_index_before = metric_index_before_dict[metric]
        metrics_data = metric_df_stepwise.loc[metric_index]
        before_metrics = metrics_data.loc[metric_index_before].sort_values(by=['timestamp']).drop_duplicates()
        metrics_data_after = metrics_data.loc[metric_index_after]
        after_metrics = metrics_data_after.sort_values(by=['timestamp']).drop_duplicates()[:30]
        if before_metrics.empty or after_metrics.empty or len(after_metrics) < 10 or len(after_metrics.value.unique()) == 1:
            continue
        before_metrics_value = before_metrics.value
        
        ts_medians = np.zeros(24*60//17, dtype=np.float32)
        mod = 5040      # 24*60*60//17
        for i in range(24*60//17):
            tmp = before_metrics.loc[before_metrics.timestamp % mod == i*60].sort_values(by=['timestamp']).drop_duplicates()[-10:]['value']
            ts_medians[i] = tmp.median(skipna=True)
        ts_medians[np.isnan(ts_medians)] = 0
        target_series = []
        for i in range(after_metrics.shape[0]):
            tmp = after_metrics.iloc[i]
            timestamp = tmp['timestamp']
            target_series.append(ts_medians[(timestamp%mod)//60])
        target_series = np.array(target_series)
        rlr = RANSACRegressor()
        rlr.fit(np.array(after_metrics.value).reshape((-1, 1)), target_series)

        metric_index_after_all = metric_index_after_all_dict[metric]
        metrics_data_after_all = metrics_data.loc[metric_index_after_all]
        after_metrics_value_all = rlr.predict(metrics_data_after_all['value'].values.reshape((-1, 1)))
        metric_df_stepwise.loc[metric_index_after_all, 'value'] = after_metrics_value_all
    print(f"successfully drift {drift_num} metrics using method [stepwise] in range ({drift_start}, {drift_end})")
metric_df_stepwise.to_pickle(WORK_DIR / f'metrics.norm.drift.stepwise.pkl')