# Setup

In [23]:
import numpy as np
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
import plotly.graph_objects as go
import plotly.express as px

# Load data
Numenta Anomaly Benchmark(NAB) dataset
- artificial timseries data
- with anomalous periods labelled
- ordered, timestamped, single-valued
- a value for every 5min for 14days

Dataframes
- df_small_noise - for training
- df_daily_jumpsup - for testing

In [59]:
master_url_root = "https://raw.githubusercontent.com/numenta/NAB/master/data/"

df_small_noise_url_suffix = "artificialNoAnomaly/art_daily_small_noise.csv"
df_small_noise_url = master_url_root + df_small_noise_url_suffix
df_small_noise = pd.read_csv(
    df_small_noise_url, parse_dates=True, index_col="timestamp"
)

df_daily_jumpsup_url_suffix = "artificialWithAnomaly/art_daily_jumpsup.csv"
df_daily_jumpsup_url = master_url_root + df_daily_jumpsup_url_suffix
df_daily_jumpsup = pd.read_csv(
    df_daily_jumpsup_url, parse_dates=True, index_col="timestamp"
)

In [60]:
df = df_small_noise

In [3]:
display(df_small_noise.head())
display(df_daily_jumpsup.head())

Unnamed: 0_level_0,value
timestamp,Unnamed: 1_level_1
2014-04-01 00:00:00,18.324919
2014-04-01 00:05:00,21.970327
2014-04-01 00:10:00,18.624806
2014-04-01 00:15:00,21.953684
2014-04-01 00:20:00,21.90912


Unnamed: 0_level_0,value
timestamp,Unnamed: 1_level_1
2014-04-01 00:00:00,19.761252
2014-04-01 00:05:00,20.500833
2014-04-01 00:10:00,19.961641
2014-04-01 00:15:00,21.490266
2014-04-01 00:20:00,20.187739


# Visualise data & rolling statistics

In [48]:
# Get rolling mean & standard deviation
df_small_noise['rolmean'] = df_small_noise['value'].rolling(window=288).mean()
df_small_noise['rolstd'] = df_small_noise['value'].rolling(window=288).std()
df_daily_jumpsup['rolmean'] = df_daily_jumpsup['value'].rolling(window=288).mean()
df_daily_jumpsup['rolstd'] = df_daily_jumpsup['value'].rolling(window=288).std()

In [49]:
fig1 = px.line(df_small_noise, y=["value", "rolmean", "rolstd"], title="Timeseries without Anomalies")
fig1.show()

fig2 = px.line(df_daily_jumpsup, y=["value", "rolmean", "rolstd"], title="Timeseries with Anomalies")
fig2.show()


# Prepare train data
- 24*60/5 = 288 timesteps per day
- 288*14 = 4032 data points in total

In [73]:
# Normalise data with mean & standard deviation
training_mean = df_small_noise['value'].mean()
training_std = df_small_noise['value'].std()
df_train = ((df_small_noise['value'] - training_mean) / training_std).to_frame()
print(len(df_train))

4032


In [79]:
# Create sequences - windows of size TIME_STEPS
TIME_STEPS = 288

def create_sequences(values, time_steps=TIME_STEPS):
    output = []
    for i in range(len(values) - time_steps + 1):
        output.append(values[i : (i+time_steps)])
    return np.stack(output)

x_train = create_sequences(df_train.values)

In [83]:
display(x_train)
display(df_train)

array([[[-0.85882857],
        [-0.72899302],
        [-0.84814772],
        ...,
        [-0.86453747],
        [-0.81250829],
        [-0.79671155]],

       [[-0.72899302],
        [-0.84814772],
        [-0.72958579],
        ...,
        [-0.81250829],
        [-0.79671155],
        [-0.78767946]],

       [[-0.84814772],
        [-0.72958579],
        [-0.731173  ],
        ...,
        [-0.79671155],
        [-0.78767946],
        [-0.73706287]],

       ...,

       [[-0.8226548 ],
        [-0.86660975],
        [-0.82181363],
        ...,
        [-0.79164661],
        [-0.78025492],
        [-0.83364049]],

       [[-0.86660975],
        [-0.82181363],
        [-0.75595835],
        ...,
        [-0.78025492],
        [-0.83364049],
        [-0.73510213]],

       [[-0.82181363],
        [-0.75595835],
        [-0.74256753],
        ...,
        [-0.83364049],
        [-0.73510213],
        [-0.8460886 ]]])

Unnamed: 0_level_0,value
timestamp,Unnamed: 1_level_1
2014-04-01 00:00:00,-0.858829
2014-04-01 00:05:00,-0.728993
2014-04-01 00:10:00,-0.848148
2014-04-01 00:15:00,-0.729586
2014-04-01 00:20:00,-0.731173
...,...
2014-04-14 23:35:00,-0.791647
2014-04-14 23:40:00,-0.780255
2014-04-14 23:45:00,-0.833640
2014-04-14 23:50:00,-0.735102
