# Neural Networks

In [1]:
import os
os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "2")  # Report only TF errors by default

'2'

In [2]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import ExtraTreesRegressor, RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn import preprocessing
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn import metrics
import seaborn as sns
from scipy import stats

## Data

In [3]:

# Load the CSV file
data = pd.read_csv("BC-Data-Set.csv")

In [4]:
# Convert the date column to a datetime object
data['date'] = pd.to_datetime(data['date'])

# Remove any missing values
data = data.dropna()

# Set the date column as the index of the DataFrame
data = data.set_index('date')

In [5]:
seed = 42

In [6]:
from sklearn.preprocessing import StandardScaler

In [7]:
x_train, x_test, y_train, y_test = train_test_split(data, data.BC, test_size=0.15, random_state=seed, shuffle=True)
x_train = x_train.drop(columns=['BC'])
x_test = x_test.drop(columns=['BC'])

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.15, random_state=seed, shuffle=True)

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_val = scaler.transform(x_val)
x_test = scaler.transform(x_test)

In [8]:
# TODO: split, preprocess, make sequences

In [9]:
x_train

array([[-0.73291434,  1.55477006,  3.60894021, ...,  1.95560752,
        -0.89521109,  0.69813568],
       [-0.85503739, -0.08410553, -0.35847915, ..., -0.37050872,
        -0.48279646,  0.2517087 ],
       [-0.38903519, -0.40824082, -0.54120237, ...,  0.15400768,
        -1.71157723, -1.90387708],
       ...,
       [-0.89772   , -0.72621189, -0.91761219, ..., -0.39331378,
         0.94389125, -0.81840302],
       [-0.48238073,  0.10302261, -0.19784883, ...,  1.11182025,
         1.35890992, -0.49238002],
       [-0.7281987 , -0.953701  , -1.27475302, ..., -0.48453403,
        -1.55972922, -2.74749276]])

In [10]:
x_train.shape

(3050, 12)

In [11]:
import tensorflow as tf
from tensorflow import keras

## Neural Network (non-LSTM)

In [21]:
HIDDEN_DIM = 8
inputs = tf.keras.layers.Input(shape=[x_train.shape[-1]])

layer = keras.layers.Dense(HIDDEN_DIM, activation='relu')(inputs)
layer = keras.layers.Dense(HIDDEN_DIM, activation='relu')(layer)

predictions = keras.layers.Dense(1, activation=None)(layer)

model = tf.keras.Model(inputs=inputs, outputs=predictions)
model.compile(
            optimizer=tf.optimizers.Adam(learning_rate=1e-3),
            loss=tf.losses.mean_squared_error,
            metrics=[tf.metrics.mean_squared_error],
        )


In [22]:
batch_size = 32
epochs = 20
logs = model.fit(
    x_train, y_train,
    batch_size=batch_size, epochs=epochs,
    validation_data=(x_val, y_val),
    # callbacks=[model.tb_callback],
)

Epoch 1/20

## LSTM

In [14]:
# TODO
def build_sequences(df, window=200, stride=200):
    # Sanity check to avoid runtime errors
    assert window % stride == 0
    dataset = []
    labels = []
    for id in df['id'].unique():
        # Take only meaningful features
        temp = df[df['id'] == id][['x_axis','y_axis','z_axis']].values
        # Save the label
        label = df[df['id'] == id]['activity'].values[0]
        print(str(id), "  ", str(label))
        # Compute padding length
        padding_len = window - len(temp)%window
        # Create padding and concatenate it
        padding = np.zeros((padding_len,3), dtype='float64')
        temp = np.concatenate((temp,padding))
        # Build features windows with their corresponging labels
        idx = 0
        while idx+window <= len(temp):
            dataset.append(temp[idx:idx+window])
            labels.append(label)
            idx += stride
    dataset = np.array(dataset)
    labels = np.array(labels)
    return dataset, labels

In [15]:
SEQUENCE_LENGTH = 8
SEQUENCE_DIM = x_train.shape[-1]
RNN_CELL_DIM = 8
HIDDEN_DIM = 8
sequences = tf.keras.layers.Input(shape=[SEQUENCE_LENGTH, SEQUENCE_DIM])

layer = keras.layers.LSTM(RNN_CELL_DIM, return_sequences=True)(sequences)

layer = keras.layers.Dense(HIDDEN_DIM, activation='relu')(layer)

predictions = keras.layers.Dense(1, activation=None)(layer)

model = tf.keras.Model(inputs=sequences, outputs=predictions)
model.compile(
            optimizer=tf.optimizers.Adam(learning_rate=1e-3),
            loss=tf.losses.mean_squared_error,
            metrics=[tf.metrics.mean_squared_error],
        )
