# Imports

In [1]:
import os
os.chdir("..")

import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm

import matplotlib.pyplot as plt

from darts import TimeSeries, concatenate
from darts.dataprocessing.transformers import Scaler
from darts.models import TFTModel
from darts.metrics import mape
from darts.utils.statistics import check_seasonality, plot_acf
from darts.datasets import AirPassengersDataset, IceCreamHeaterDataset
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from darts.utils.likelihood_models import QuantileRegression

import warnings

warnings.filterwarnings("ignore")
import logging

logging.disable(logging.CRITICAL)

from src import common

# Dataset preprocessing

In [2]:
column_types_loc = "#datasets/Colab_PowerConverter/column_types.json"
column_types = common.json_load(column_types_loc)

target = column_types["target"]
identifier = column_types["identifier"]
cat_cols = column_types["categorical"]
measurement_label = column_types["measurement_label"]

data = pd.read_pickle(f"#datasets/Colab_PowerConverter/dataset.pkl")

# Create a TimeSeries, specifying the time and value columns
#data_ts = TimeSeries.from_dataframe(data, 'Month', '#Passengers')

# Set aside the last 36 months as a validation series
#train, val = data_ts[:-36], data_ts[-36:]

In [3]:
# before starting, we define some constants
num_samples = 200

#figsize = (9, 6)
lowest_q, low_q, high_q, highest_q = 0.01, 0.1, 0.9, 0.99
label_q_outer = f"{int(lowest_q * 100)}-{int(highest_q * 100)}th percentiles"
label_q_inner = f"{int(low_q * 100)}-{int(high_q * 100)}th percentiles"

In [5]:
series = IceCreamHeaterDataset().load()
series

In [7]:
# Read data
series_ice_heater = IceCreamHeaterDataset().load()

# convert monthly sales to average daily sales per month
converted_series = []
for col in ["ice cream", "heater"]:
    converted_series.append(
        series_ice_heater[col]
        / TimeSeries.from_series(series_ice_heater.time_index.days_in_month)
    )
converted_series = concatenate(converted_series, axis=1)
converted_series = converted_series[pd.Timestamp("20100101") :]

# define train/validation cutoff time
forecast_horizon_ice = 12
training_cutoff_ice = converted_series.time_index[-(2 * forecast_horizon_ice)]

# use ice cream sales as target, create train and validation sets and transform data
series_ice = converted_series["ice cream"]
train_ice, val_ice = series_ice.split_before(training_cutoff_ice)
transformer_ice = Scaler()
train_ice_transformed = transformer_ice.fit_transform(train_ice)
val_ice_transformed = transformer_ice.transform(val_ice)
series_ice_transformed = transformer_ice.transform(series_ice)

# use heater sales as past covariates and transform data
covariates_heat = converted_series["heater"]
cov_heat_train, cov_heat_val = covariates_heat.split_before(training_cutoff_ice)
transformer_heat = Scaler()
transformer_heat.fit(cov_heat_train)
covariates_heat_transformed = transformer_heat.transform(covariates_heat)

In [None]:
# use the last 3 years as past input data
input_chunk_length_ice = 36

# use `add_encoders` as we don't have future covariates
my_model_ice = TFTModel(
    input_chunk_length=input_chunk_length_ice,
    output_chunk_length=forecast_horizon_ice,
    hidden_size=32,
    lstm_layers=1,
    batch_size=16,
    n_epochs=300,
    dropout=0.1,
    add_encoders={"cyclic": {"future": ["month"]}},
    add_relative_index=False,
    optimizer_kwargs={"lr": 1e-3},
    random_state=42,
)

# fit the model with past covariates
my_model_ice.fit(
    train_ice_transformed, past_covariates=covariates_heat_transformed, verbose=True
)

In [None]:
n = 24
eval_model(
    model=my_model_ice,
    n=n,
    actual_series=series_ice_transformed[
        train_ice.end_time() - (2 * n - 1) * train_ice.freq :
    ],
    val_series=val_ice_transformed,
)

In [None]:
# Compute the backtest predictions with the two models
last_points_only = False
backtest_series_ice = my_model_ice.historical_forecasts(
    series_ice_transformed,
    num_samples=num_samples,
    start=training_cutoff_ice,
    forecast_horizon=forecast_horizon_ice,
    stride=1 if last_points_only else forecast_horizon_ice,
    retrain=False,
    last_points_only=last_points_only,
    overlap_end=True,
    verbose=True,
)

backtest_series_ice = (
    concatenate(backtest_series_ice)
    if isinstance(backtest_series_ice, list)
    else backtest_series_ice
)

In [None]:
def eval_backtest(backtest_series, actual_series, horizon, start, transformer):
    plt.figure(figsize=figsize)
    actual_series.plot(label="actual")
    backtest_series.plot(
        low_quantile=lowest_q, high_quantile=highest_q, label=label_q_outer
    )
    backtest_series.plot(low_quantile=low_q, high_quantile=high_q, label=label_q_inner)
    plt.legend()
    plt.title(f"Backtest, starting {start}, {horizon}-months horizon")
    print(
        "MAPE: {:.2f}%".format(
            mape(
                transformer.inverse_transform(actual_series),
                transformer.inverse_transform(backtest_series),
            )
        )
    )


eval_backtest(
    backtest_series=backtest_series_ice,
    actual_series=series_ice_transformed[
        train_ice.start_time() - 2 * forecast_horizon_ice * train_ice.freq :
    ],
    horizon=forecast_horizon_ice,
    start=training_cutoff_ice,
    transformer=transformer_ice,
)

# Train model

In [3]:
task = "multiclass"

column_types_loc = "#datasets/Colab_PowerConverter/column_types.json"
save_loc = "models.dill"

parameters = {
    "random_state": 1,
    "test_size_train": 0.2,
    "test_size_valid": 0.5,
    "scaler": "Standard",
}

In [4]:
(data_train_scaled, data_valid_scaled, data_test_scaled, models,) = pipeline.train(
    task,
    data,
    column_types_loc,
    parameters,
    save_loc=save_loc,
    verbose=True,
    datasets=True,
)

Size of dataset classes:
0     597599
5      40014
3      40001
6      40001
7      40001
8      40001
9      40001
10     40001
11     40001
13     40001
1      38971
2      38971
4       3166
12      1335
Name: fault, dtype: int64


epoch 1: 100%|██████████| 748/748 [00:12<00:00, 59.65it/s, loss=0.0901, metrics={'Accuracy': [0.0709, 0.3708, 0.0576, 0.0845, 0.0206, 0.574, 0.0347, 0.1306, 0.086, 0.0327, 0.1312, 0.066, 0.5213, 0.1361], 'Precision': 0.1665, 'F1': [0.0846, 0.3402, 0.0585, 0.0863, 0.0343, 0.4997, 0.0449, 0.1097, 0.078, 0.0404, 0.1053, 0.0765, 0.5182, 0.1002], 'Recall': [0.0709, 0.3708, 0.0576, 0.0845, 0.0206, 0.574, 0.0347, 0.1306, 0.086, 0.0327, 0.1312, 0.066, 0.5213, 0.1361]}]
valid: 100%|██████████| 1041/1041 [00:14<00:00, 73.08it/s, loss=0.04, metrics={'Accuracy': [0.0082, 0.8353, 0.0, 0.0377, 0.1893, 1.0, 0.0, 0.2442, 0.026, 0.003, 0.0655, 0.0115, 0.9104, 0.544], 'Precision': 0.1121, 'F1': [0.0163, 0.4081, 0.0, 0.0517, 0.0304, 0.8036, 0.0, 0.1444, 0.0165, 0.0028, 0.0396, 0.0218, 0.4909, 0.0896], 'Recall': [0.0082, 0.8353, 0.0, 0.0377, 0.1893, 1.0, 0.0, 0.2442, 0.026, 0.003, 0.0655, 0.0115, 0.9104, 0.544]}]



Epoch 00001: val_loss improved from inf to 0.04004


epoch 2: 100%|██████████| 748/748 [00:12<00:00, 60.35it/s, loss=0.0342, metrics={'Accuracy': [0.0544, 0.5636, 0.0509, 0.0885, 0.0363, 0.7402, 0.0194, 0.1295, 0.0924, 0.0334, 0.146, 0.078, 0.7631, 0.1842], 'Precision': 0.2145, 'F1': [0.0747, 0.5727, 0.0601, 0.0905, 0.0557, 0.5727, 0.0306, 0.1088, 0.085, 0.0429, 0.1125, 0.0889, 0.7579, 0.1137], 'Recall': [0.0544, 0.5636, 0.0509, 0.0885, 0.0363, 0.7402, 0.0194, 0.1295, 0.0924, 0.0334, 0.146, 0.078, 0.7631, 0.1842]}]
valid: 100%|██████████| 1041/1041 [00:13<00:00, 79.96it/s, loss=0.0272, metrics={'Accuracy': [0.0024, 0.9364, 0.0, 0.0457, 0.2334, 1.0, 0.0, 0.1922, 0.015, 0.007, 0.1128, 0.044, 1.0, 0.5215], 'Precision': 0.113, 'F1': [0.0048, 0.3698, 0.0, 0.0637, 0.0216, 0.8366, 0.0, 0.125, 0.0111, 0.0061, 0.0726, 0.0708, 0.4504, 0.0934], 'Recall': [0.0024, 0.9364, 0.0, 0.0457, 0.2334, 1.0, 0.0, 0.1922, 0.015, 0.007, 0.1128, 0.044, 1.0, 0.5215]}]



Epoch 00002: val_loss improved from 0.04004 to 0.02722


epoch 3: 100%|██████████| 748/748 [00:12<00:00, 61.77it/s, loss=0.027, metrics={'Accuracy': [0.0791, 0.6588, 0.0598, 0.0924, 0.0569, 0.8017, 0.0207, 0.1098, 0.1057, 0.0589, 0.1376, 0.1063, 0.849, 0.1214], 'Precision': 0.2316, 'F1': [0.1013, 0.6285, 0.0669, 0.0927, 0.0804, 0.5838, 0.0317, 0.103, 0.0961, 0.0669, 0.1075, 0.1075, 0.8227, 0.1002], 'Recall': [0.0791, 0.6588, 0.0598, 0.0924, 0.0569, 0.8017, 0.0207, 0.1098, 0.1057, 0.0589, 0.1376, 0.1063, 0.849, 0.1214]}]
valid: 100%|██████████| 1041/1041 [00:14<00:00, 72.86it/s, loss=0.0246, metrics={'Accuracy': [0.0456, 1.0, 0.059, 0.0047, 0.224, 1.0, 0.0, 0.139, 0.05, 0.0795, 0.1643, 0.1107, 1.0, 0.3315], 'Precision': 0.1402, 'F1': [0.0873, 0.3314, 0.0351, 0.0093, 0.0189, 0.8196, 0.0, 0.0972, 0.0384, 0.0484, 0.0954, 0.1344, 0.4882, 0.0962], 'Recall': [0.0456, 1.0, 0.059, 0.0047, 0.224, 1.0, 0.0, 0.139, 0.05, 0.0795, 0.1643, 0.1107, 1.0, 0.3315]}]



Epoch 00003: val_loss improved from 0.02722 to 0.02458


epoch 4: 100%|██████████| 748/748 [00:13<00:00, 54.20it/s, loss=0.0248, metrics={'Accuracy': [0.0917, 0.7435, 0.078, 0.1007, 0.0858, 0.8367, 0.0423, 0.0905, 0.1033, 0.0728, 0.1381, 0.1088, 0.879, 0.0769], 'Precision': 0.2472, 'F1': [0.1115, 0.6877, 0.0815, 0.1009, 0.1126, 0.6177, 0.0576, 0.0912, 0.0948, 0.076, 0.1088, 0.1031, 0.8542, 0.0857], 'Recall': [0.0917, 0.7435, 0.078, 0.1007, 0.0858, 0.8367, 0.0423, 0.0905, 0.1033, 0.0728, 0.1381, 0.1088, 0.879, 0.0769]}]
valid: 100%|██████████| 1041/1041 [00:15<00:00, 65.74it/s, loss=0.0236, metrics={'Accuracy': [0.1012, 1.0, 0.0929, 0.015, 0.3438, 1.0, 0.0022, 0.0815, 0.0512, 0.1233, 0.1855, 0.153, 1.0, 0.2093], 'Precision': 0.1715, 'F1': [0.1837, 0.3962, 0.0566, 0.0253, 0.018, 0.8283, 0.0044, 0.0851, 0.0361, 0.0606, 0.0974, 0.1092, 0.4214, 0.0998], 'Recall': [0.1012, 1.0, 0.0929, 0.015, 0.3438, 1.0, 0.0022, 0.0815, 0.0512, 0.1233, 0.1855, 0.153, 1.0, 0.2093]}]



Epoch 00004: val_loss improved from 0.02458 to 0.02363


epoch 5: 100%|██████████| 748/748 [00:13<00:00, 53.66it/s, loss=0.0238, metrics={'Accuracy': [0.1076, 0.8108, 0.083, 0.1031, 0.1029, 0.863, 0.0589, 0.0871, 0.102, 0.0793, 0.1402, 0.1161, 0.8949, 0.0486], 'Precision': 0.2583, 'F1': [0.1279, 0.7308, 0.0829, 0.1019, 0.1253, 0.6538, 0.073, 0.0924, 0.0956, 0.0792, 0.1131, 0.1104, 0.8763, 0.063], 'Recall': [0.1076, 0.8108, 0.083, 0.1031, 0.1029, 0.863, 0.0589, 0.0871, 0.102, 0.0793, 0.1402, 0.1161, 0.8949, 0.0486]}]
valid: 100%|██████████| 1041/1041 [00:17<00:00, 58.36it/s, loss=0.0232, metrics={'Accuracy': [0.1325, 1.0, 0.1183, 0.0223, 0.3407, 1.0, 0.0457, 0.052, 0.07, 0.1395, 0.1618, 0.1595, 1.0, 0.1252], 'Precision': 0.1887, 'F1': [0.2336, 0.3629, 0.0577, 0.0369, 0.0151, 0.8704, 0.0649, 0.0666, 0.0568, 0.0635, 0.107, 0.1213, 0.4621, 0.084], 'Recall': [0.1325, 1.0, 0.1183, 0.0223, 0.3407, 1.0, 0.0457, 0.052, 0.07, 0.1395, 0.1618, 0.1595, 1.0, 0.1252]}]



Epoch 00005: val_loss improved from 0.02363 to 0.02323
Model weights restored to best epoch: 5


predict: 100%|██████████| 1041/1041 [00:05<00:00, 179.81it/s]


Classification report:
              precision    recall  f1-score   support

           0       0.98      0.13      0.23     59760
           1       0.22      1.00      0.37      3897
           2       0.03      0.11      0.05      3897
           3       0.11      0.02      0.04      4000
           4       0.01      0.29      0.01       316
           5       0.77      1.00      0.87      4002
           6       0.11      0.05      0.07      4000
           7       0.09      0.05      0.07      4000
           8       0.05      0.08      0.06      4001
           9       0.04      0.14      0.07      4001
          10       0.08      0.16      0.11      4000
          11       0.11      0.18      0.14      4000
          12       0.29      0.98      0.45       133
          13       0.06      0.12      0.08      4000

    accuracy                           0.19    104007
   macro avg       0.21      0.31      0.19    104007
weighted avg       0.63      0.19      0.21    104007



In [None]:
predicted = pipeline.predict(data, column_types_loc, save_loc)

# Outlier_model
* outlier_model code that implementes outlier_model.predict() function

# Simulate stream of data
## Inverse transform test dataset for evaluation

In [23]:
data_test = pd.DataFrame(
    models["scaler"].inverse_transform(data_test_scaled.drop(columns=cat_cols + [identifier, target])),
    columns=data_test_scaled.drop(columns=cat_cols + [identifier, target]).columns.values,
)
data_test[[identifier, target]] = data_test_scaled[[identifier, target]].copy()

In [25]:
data_test.head()

Unnamed: 0,f_c,P,m_d,m_q,theta,P_ref,V_DC,V_phaseA,V_phaseB,V_phaseC,I_phaseA,I_phaseB,I_phaseC,sample_id,fault
0,50.000254,2494.761546,311.0,0.0,777.396328,2500.0,800.0,2.869625,-270.757247,267.887622,-0.150869,-4.556102,4.70697,1069662,0
1,50.000006,2499.877565,311.0,0.0,1172.238561,2500.0,800.0,-259.87004,-18.022479,277.892519,-228.781908,407.314521,-178.532613,614815,7
2,50.0,2499.999997,311.0,0.0,1424.037213,2500.0,800.0,-153.983148,-157.011936,310.995084,-357.403522,349.748723,7.654799,630845,7
3,50.0,2499.999999,311.0,0.0,649.626528,2500.0,800.0,-268.948563,269.717592,-0.769029,-4.41566,4.864604,-0.448944,741526,0
4,50.12434,0.0,311.0,0.0,1131.328035,2500.0,800.0,0.0,0.0,0.0,290.215924,-60.868123,-229.347801,452148,5


* loading the whole DL model with preprocessors in each iteration is stupid but I wanted to send Alex at least some initial code

In [None]:
fault = False
for index, row in data_test.iterrows():
    if not fault:
        fault = outlier_model.predict(row)
        print("fault start")
    else:
        predicted = pipeline.predict(row, column_types_loc, save_loc)
        print("fault continues")
        if predicted == 0:
            fault = False
            print("fault ended")