# Neural Networks

In [None]:
import os

os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "2")  # Report only TF errors by default

In [None]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import ExtraTreesRegressor, RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn import preprocessing
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn import metrics
import seaborn as sns
from scipy import stats

In [None]:
import tensorflow as tf
from tensorflow import keras
tf.config.set_visible_devices([], 'GPU') #disables GPU

## Data

In [None]:

# Load the CSV file
data = pd.read_csv("BC-Data-Set.csv")

In [None]:
# Convert the date column to a datetime object
data['date'] = pd.to_datetime(data['date'])

# Remove any missing values
data = data.dropna()

# Set the date column as the index of the DataFrame
data = data.set_index('date')

In [None]:
seed = 42

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
x_train, x_test, y_train, y_test = train_test_split(data, data.BC, test_size=0.15, random_state=seed, shuffle=True)
x_train = x_train.drop(columns=['BC'])
x_test = x_test.drop(columns=['BC'])

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [None]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

In [None]:
# TODO: split, preprocess, make sequences

In [None]:
data

In [None]:
type(data)
data.info()

In [None]:
data.isna().sum() # check for missing values

In [None]:
test_size = 300
X_train_raw = data.iloc[:-test_size]
# y_train_raw = y.iloc[:-test_size]
X_test_raw = data.iloc[-test_size:]
# y_test_raw = y.iloc[-test_size:]
print(X_train_raw.shape, X_test_raw.shape)

# Normalize both features and labels
X_min = X_train_raw.min()
X_max = X_train_raw.max()

X_train_raw = (X_train_raw-X_min)/(X_max-X_min)
X_test_raw = (X_test_raw-X_min)/(X_max-X_min)

plt.figure(figsize=(17,5))
plt.plot(X_train_raw.BC, label='Train (BC)')
plt.plot(X_test_raw.BC, label='Test (BC)')
plt.title('Train-Test Split')
plt.legend()
plt.show()

In [None]:
x_train.shape, y_train.shape

## Neural Network (non-LSTM)

In [None]:
HIDDEN_DIM = 512
inputs = tf.keras.layers.Input(shape=[x_train.shape[-1]])

layer = keras.layers.Dense(HIDDEN_DIM, activation='relu')(inputs)
layer = keras.layers.Dropout(0.4)(layer)
layer = keras.layers.Dense(HIDDEN_DIM//2, activation='relu')(layer)
layer = keras.layers.Dropout(0.4)(layer)
layer = keras.layers.Dense(HIDDEN_DIM//3, activation='relu')(layer)
layer = keras.layers.Dropout(0.4)(layer)
predictions = keras.layers.Dense(1, activation='linear')(layer)

model = tf.keras.Model(inputs=inputs, outputs=predictions)
model.compile(
            optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=1e-3),
            loss=tf.losses.mean_squared_error,
            metrics=[tf.metrics.mean_squared_error],
        )



In [None]:
batch_size = 64
epochs = 2000
logs = model.fit(
    x_train, y_train,
    batch_size=batch_size, epochs=epochs,
    validation_split=.1,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=50,  restore_best_weights=True)
                            ],
)

## LSTM

In [None]:
window = 12
stride = 12
target_labels = 'BC'

In [None]:
import numpy as np

def build_sequences(df, target_labels=['BC'], window=200, stride=200):
    # Sanity check to avoid runtime errors
    assert window % stride == 0
    dataset = []
    labels = []
    temp_df = df.copy().values
    temp_label = df[target_labels].copy().values
    padding_len = len(df) % window
    
    if padding_len != 0:
        # Compute padding length
        padding_len = window - len(df) % window
        padding = np.zeros((padding_len, temp_df.shape[1]), dtype='float32')
        temp_df = np.concatenate((padding, temp_df))
        padding = np.zeros((padding_len,), dtype='float32')
        #padding = np.zeros((padding_len, temp_label.shape[1]), dtype='float32')
        temp_label = np.concatenate((padding, temp_label))
        assert len(temp_df) % window == 0

    # Build sequences and labels
    for i in range(0, len(temp_df) - window + 1, stride):
        dataset.append(temp_df[i:i + window])
        labels.append(temp_label[i:i + window])

    return np.array(dataset), np.array(labels)


In [None]:
X_train, y_train = build_sequences(data, target_labels, window, stride)
X_test, y_test = build_sequences(data, target_labels, window, stride)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

def inspect_multivariate(X, y, columns, telescope, idx=None):
    if(idx==None):
        idx=np.random.randint(0,len(X))

    figs, axs = plt.subplots(len(columns), 1, sharex=True, figsize=(17,17))
    for i, col in enumerate(columns):
        axs[i].plot(np.arange(len(X[0,:,i])), X[idx,:,i])
        axs[i].scatter(np.arange(len(X[0,:,i]), len(X_train[0,:,i])+telescope), y[idx,:,i], color='orange')
        axs[i].set_title(col)
        axs[i].set_ylim(0,1)
    plt.show()

inspect_multivariate(X_train, y_train, target_labels)

In [None]:
input_shape = X_train.shape[1:]
output_shape = y_train.shape[1:]
batch_size = 64
epochs = 200

input_shape, output_shape

In [None]:
def build_CONV_LSTM_model(input_shape, output_shape):
    # Build the neural network layer by layer
    input_layer = tf.keras.layers.Input(shape=input_shape, name='input_layer')
    x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True, name='lstm'), name='bidirectional_lstm')(input_layer)
    x = tf.keras.layers.Conv1D(128, 3, padding='same', activation='relu', name='conv')(x)
    if output_shape[0] == 1:
        output_layer = tf.keras.layers.Conv1D(output_shape[1], 3, padding='same', activation='sigmoid', name='output_layer')(x)
        output_layer = tf.keras.layers.GlobalAveragePooling1D(keepdims=True, name='gap')(output_layer)
    else:
        output_layer = tf.keras.layers.Conv1D(1, 3, padding='same', activation='sigmoid', name='output_layer')(x)
        crop_size = (1-output_shape[0])//2
        output_layer = tf.keras.layers.Cropping1D((crop_size,crop_size), name='cropping')(output_layer)

    # Connect input and output through the Model class
    model = tf.keras.Model(inputs=input_layer, outputs=output_layer, name='model')

    # Compile the model
    model.compile(loss=tf.keras.losses.MeanSquaredError(), optimizer=tf.keras.optimizers.legacy.Adam(), metrics=['mae'])

    # Return the model
    return model

In [None]:
model = build_CONV_LSTM_model(input_shape, output_shape)
model.summary()
tf.keras.utils.plot_model(model, expand_nested=True)

In [None]:
# Train the model
history = model.fit(
    x = X_train,
    y = y_train,
    batch_size = batch_size,
    epochs = epochs,
    validation_split=.1,
    callbacks = [
        tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=50, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', mode='min', patience=10, factor=0.1, min_lr=1e-5)
    ]
).history

In [None]:
x_train.shape

In [None]:
history = model.fit(
    x = x_train,
    y = y_train,
    batch_size = batch_size,
    epochs = epochs,
    validation_split=.1,
    callbacks = [
        tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=50, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', mode='min', patience=10, factor=0.1, min_lr=1e-5)
    ]
).history

## LSTM1

In [None]:
SEQUENCE_LENGTH = 12
SEQUENCE_DIM = x_train.shape[-1]
RNN_CELL_DIM = 8
HIDDEN_DIM = 8
sequences = tf.keras.layers.Input(shape=[SEQUENCE_LENGTH, SEQUENCE_DIM])

layer = keras.layers.LSTM(RNN_CELL_DIM, return_sequences=True)(sequences)

layer = keras.layers.Dense(HIDDEN_DIM, activation='relu')(layer)

predictions = keras.layers.Dense(1, activation=None)(layer)

model = tf.keras.Model(inputs=sequences, outputs=predictions)
model.compile(
            optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=1e-3),
            loss=tf.losses.mean_squared_error,
            metrics=[tf.metrics.mean_squared_error],
        )
