# Setup

In [23]:
import numpy as np
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
import plotly.graph_objects as go
import plotly.express as px

# Load data
Numenta Anomaly Benchmark(NAB) dataset
- artificial timseries data
- with anomalous periods labelled
- ordered, timestamped, single-valued
- a value for every 5min for 14days

Dataframes
- df_small_noise - for training
- df_daily_jumpsup - for testing

In [2]:
master_url_root = "https://raw.githubusercontent.com/numenta/NAB/master/data/"

df_small_noise_url_suffix = "artificialNoAnomaly/art_daily_small_noise.csv"
df_small_noise_url = master_url_root + df_small_noise_url_suffix
df_small_noise = pd.read_csv(
    df_small_noise_url, parse_dates=True, index_col="timestamp"
)

df_daily_jumpsup_url_suffix = "artificialWithAnomaly/art_daily_jumpsup.csv"
df_daily_jumpsup_url = master_url_root + df_daily_jumpsup_url_suffix
df_daily_jumpsup = pd.read_csv(
    df_daily_jumpsup_url, parse_dates=True, index_col="timestamp"
)

In [3]:
display(df_small_noise.head())
display(df_daily_jumpsup.head())

Unnamed: 0_level_0,value
timestamp,Unnamed: 1_level_1
2014-04-01 00:00:00,18.324919
2014-04-01 00:05:00,21.970327
2014-04-01 00:10:00,18.624806
2014-04-01 00:15:00,21.953684
2014-04-01 00:20:00,21.90912


Unnamed: 0_level_0,value
timestamp,Unnamed: 1_level_1
2014-04-01 00:00:00,19.761252
2014-04-01 00:05:00,20.500833
2014-04-01 00:10:00,19.961641
2014-04-01 00:15:00,21.490266
2014-04-01 00:20:00,20.187739


# Visualise data & rolling statistics

In [19]:
# Get rolling statistics
df_small_noise['rolmean'] = df_small_noise['value'].rolling(window=18).mean()
df_small_noise['rolstd'] = df_small_noise['value'].rolling(window=18).std()
df_daily_jumpsup['rolmean'] = df_daily_jumpsup['value'].rolling(window=36).mean()
df_daily_jumpsup['rolstd'] = df_daily_jumpsup['value'].rolling(window=36).std()

In [38]:
df_small_noise

Unnamed: 0_level_0,value,rolmean,rolstd
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014-04-01 00:00:00,18.324919,,
2014-04-01 00:05:00,21.970327,,
2014-04-01 00:10:00,18.624806,,
2014-04-01 00:15:00,21.953684,,
2014-04-01 00:20:00,21.909120,,
...,...,...,...
2014-04-14 23:35:00,20.211195,19.746269,19.746269
2014-04-14 23:40:00,20.531041,19.876807,19.876807
2014-04-14 23:45:00,19.032127,19.776356,19.776356
2014-04-14 23:50:00,21.798801,19.886125,19.886125


In [37]:
fig1 = px.line(df_small_noise, y=["value", "rolmean", "rolstd"], title="Timeseries without Anomalies")
#fig1.add_trace(go.Scatter(x=df_small_noise['rolmean'], y=df_small_noise.index.tolist()))
fig1.show()

fig2 = px.line(df_daily_jumpsup, y="value", title="Timeseries with Anomalies")
fig2.show()


# Prepare train data
- 24*60/5 = 288 timesteps per day
- 288*14 = 4032 data points in total

In [12]:
# Normalise data with mean & standard deviation
training_mean = df_small_noise.mean()
training_std = df_small_noise.std()
df_train = (df_small_noise - training_mean) / training_std
print(len(df_train))

4032
