In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
import pandas as pd
import os
import PIL.Image as Image
from sklearn.ensemble import RandomForestClassifier, IsolationForest
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
import importlib
from plot_utils.MNIST_plot_utils import scale_to_unit_interval, save_ten_images, plot_ten_images, tile_raster_images
from plot_utils.ts_plot_utils import plot_ts, plot_ts_recon, save_ts, save_ts_recon, save_ts_recon_if
from plot_utils.heatmap import heatmap, annotate_heatmap

seed=30
np.random.seed(seed)
tf.random.set_seed(seed)

In [2]:
def subsequences(ts, window):
    shape = (ts.size - window + 1, window)
    strides = ts.strides * 2
    return np.lib.stride_tricks.as_strided(ts, shape=shape, strides=strides)

# L21 Experiment on Time series
## Isolation Forest

In [3]:
df = pd.read_csv(os.path.join('data', 'realKnownCause', 'machine_temperature_system_failure.csv'), delimiter=',', decimal='.')
df.drop_duplicates(subset='timestamp', keep='first', inplace=True)
ts_timestamps = df.iloc[:,0].values[33:-186]
ts_values = np.array(df.iloc[:,1].values[33:-186])
print(ts_values.shape)
#ts_values_daily = ts_values.copy().reshape((int(ts_values.shape[0]/(12*24)), 12*24, 1))

timesteps = 144
ts_data = subsequences(ts_values, timesteps)

scaler = MinMaxScaler()
ts_data_scaled = scaler.fit_transform(X=ts_data)

ts_train_dense = ts_data_scaled.copy()
np.random.shuffle(ts_train_dense)

(22464,)


In [4]:
fraction = 0.01

In [5]:
contaminations = np.arange(start=0.01, stop=0.5, step=0.01)
contaminations_len = len(contaminations)

if_accuracies = np.zeros(contaminations_len)
if_precisions = np.zeros(contaminations_len)
if_recalls = np.zeros(contaminations_len)
if_f1_scores = np.zeros(contaminations_len)

isol_forest = IsolationForest(n_estimators=500, max_features=5, random_state=seed, contamination=fraction)
isol_forest.fit(X=ts_train_dense)
if_anomaly_detected = isol_forest.predict(X=ts_train_dense)
if_anomaly_detected = ((-if_anomaly_detected) + 1) / 2
if_anomaly_detected = if_anomaly_detected.astype(int)
    
print(sum(if_anomaly_detected))
         
anomalies = np.argwhere(if_anomaly_detected)
np.random.shuffle(anomalies)
folder = os.path.join('.', 'l21_experiment_ts')

224


In [6]:
if len(anomalies)>0:
    for i in range(min(len(anomalies), 10)):
        ind = anomalies[i][0]
        save_ts_recon_if(ts_train_dense[ind], ind+1, filename=os.path.join('l21_experiment_ts', 'anom_if'+str(fraction)+'pos'+str(ind+1)+'.jpg'))