# Imports

In [1]:
import logging
import numpy as np
import pandas as pd
import stumpy
from src import common, pipeline
from src.outlier_model import OutlierModel
import matplotlib.pyplot as plt

logging.basicConfig(format='%(asctime)s: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
logger = logging.getLogger("TimeSeries")
logger.setLevel(logging.INFO)

Device: cpu


# Dataset preprocessing

In [2]:
column_types_loc = "#datasets/Colab_PowerConverter/column_types.json"
column_types = common.json_load(column_types_loc)

target = column_types["target"]
identifier = column_types["identifier"]
cat_cols = column_types["categorical"]
measurement_label = column_types["measurement_label"]

data = pd.read_pickle(
    f"#datasets/Colab_PowerConverter/dataset.pkl"
)

# this measurement did not have a fault (?)
data = data[data[measurement_label]!="Single-Phase_Sensor_Fault"]
data.reset_index(inplace=True, drop=True)

# assign unique label to each measurement fault and create dictionary for easier analysis
fault_dict = {}
for label,i in zip(data[measurement_label].unique(), range(len(data[measurement_label].unique()))):
    data.loc[(data[measurement_label]==label) & (data[target]==1), target] = int(i+1)
    fault_dict[label] = int(i+1)

data.drop(columns=[measurement_label], inplace=True)
# Fill NA - 0 for numerical and 'NA' for categorical
# categorical
data[cat_cols] = data[cat_cols].fillna("NA")
data[cat_cols] = data[cat_cols].astype(str)
# non-categorical
non_cat_cols = data.drop(columns=cat_cols + [identifier]).columns.tolist()
data[non_cat_cols] = data[non_cat_cols].fillna(0)


# Train model

In [3]:
task = "multiclass"

column_types_loc = "#datasets/Colab_PowerConverter/column_types.json"
save_loc = "models.dill"

parameters = {
    "random_state": 1,
    "test_size_train": 0.2,
    "test_size_valid": 0.5,
    "scaler": "Standard",
}


In [None]:
(
    data_train_scaled,
    data_valid_scaled,
    data_test_scaled,
    models,
) = pipeline.train(task,data, column_types_loc, parameters, save_loc=save_loc, verbose=True, datasets=True)


Size of dataset classes:
0     597599
5      40014
3      40001
6      40001
7      40001
8      40001
9      40001
10     40001
11     40001
13     40001
1      38971
2      38971
4       3166
12      1335
Name: fault, dtype: int64


epoch 1:   0%|          | 0/748 [00:25<?, ?it/s]

In [None]:
predicted = pipeline.predict(data, column_types_loc, save_loc)

# Outlier_model
* outlier_model code that implementes outlier_model.predict() function

In [None]:
start_index = 0
end_index = 1000000
data_test = data[start_index:end_index]

outlier_key = "f_c"
m = 250
preload_size = 5000
# plt.plot(data[40750:41250][outlier_key])
outlier_model = OutlierModel(m=m,std_dev=5,
                             time_series=data_test[:preload_size][outlier_key],
                             egress=True)


# Simulate stream of data
## Inverse transform test dataset for evaluation

In [None]:

data_test = pd.DataFrame(models["scaler"].inverse_transform(data_test_scaled.drop(columns=cat_cols + [identifier,target])),
               columns=data_test_scaled.drop(columns=cat_cols + [identifier,target]).columns.values)
data_test[[identifier,target]] = data_test_scaled[[identifier,target]].copy()

In [None]:
data_test.head()

* loading the whole DL model with preprocessors in each iteration is stupid but I wanted to send Alex at least some initial code


In [None]:
fault = False
for index, row in data_test[preload_size:].iterrows():
    outlier_model.train_one(row[outlier_key])
    fault = outlier_model.predict_one(index)
    if index % 10000 == 0:
        print(f"Current Global index: {index}")
    if not fault:
        print("fault start")
    else:
        predicted = pipeline.predict(row, column_types_loc, save_loc)
        # print("fault continues")
        if predicted == 0:
            fault = False
            print("fault ended")

In [None]:
plt.figure(figsize=(16,8))

plt.plot(outlier_model.max_val,'b-',
         outlier_model.max_mean,'r-',
         outlier_model.max_std_dev,'g-')


In [None]:
plt.figure(figsize=(16,8))
plt.plot(data_test["fault"])
plt.title(f"Detection Method: Anomaly")
for i in outlier_model.anomalies:
    plt.axvline(x=i,color='r')