In [1]:
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


In [2]:
%cd /gdrive/MyDrive/
%ls

/gdrive/MyDrive
 [0m[01;34mAuroregressiveForecasting[0m/            [01;34mIncpetionFtAugWeighModel1_more_layer[0m/
 [01;34mAuroregressiveForecasting48[0m/          [01;34mIncpetionFtModel1[0m/
 [01;34mAuroregressiveLOUGATTForecasting48[0m/   [01;34mIncpetionModel1[0m/
 [01;34mAuroregressivePREATTForecasting48[0m/    model_plot.png
[01;34m'Colab Notebooks'[0m/                     model.png
 [01;34mDataset[0m/                              [01;34mS3CRET[0m/
 [01;34mFinalModel[0m/                           [01;34mSimpleLSTMForecasting[0m/
 [01;34mGabriele[0m/                             [01;34mSimplerModel[0m/
 [01;34mIncpetionFtAugModel1[0m/                 [01;34mStandardModel[0m/
 [01;34mIncpetionFtAugWeighModel1[0m/            [01;34mtraining[0m/


In [3]:
import tensorflow as tf
import numpy as np
import os
import random
import pandas as pd
import seaborn as sns
from datetime import datetime
import matplotlib.pyplot as plt
plt.rc('font', size=16)
from sklearn.preprocessing import MinMaxScaler
import warnings
import math
warnings.filterwarnings('ignore')
tf.get_logger().setLevel('ERROR')

tfk = tf.keras
tfkl = tf.keras.layers
print(tf.__version__)

2.7.0


In [4]:
# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

In [5]:
dataset = pd.read_csv('Dataset/Training.csv')
print(dataset.shape)
dataset.head()

(68528, 7)


Unnamed: 0,Sponginess,Wonder level,Crunchiness,Loudness on impact,Meme creativity,Soap slipperiness,Hype root
0,7.97698,4.33494,10.67282,1.76692,3.2244,51.68146,3.65434
1,8.07824,4.44616,10.5616,1.70716,3.32566,51.563598,3.47672
2,8.02844,4.22372,10.5616,1.64906,3.1746,50.86308,3.47672
3,8.02844,4.22372,10.5616,1.70716,3.1746,45.841581,3.47672
4,7.87572,4.44616,10.45038,1.70716,3.27586,47.126421,3.47672


In [22]:
crunch_data = pd.DataFrame(dataset['Crunchiness'])

In [6]:
def inspect_dataframe(df, columns, zoom):
    figs, axs = plt.subplots(len(columns), 1, sharex=True, figsize=(17,17))
    for i, col in enumerate(columns):
        axs[i].plot(df[col][1:zoom])
        axs[i].set_title(col)
    plt.show()


In [7]:
VALIDATION_SPLIT = 0.1

In [23]:
val_size = int(len(crunch_data)*VALIDATION_SPLIT)
X_train_raw = crunch_data.iloc[:-val_size]
X_val_raw = crunch_data.iloc[-val_size:]
print(X_train_raw.shape, X_val_raw.shape)

# Normalize both features and labels
X_min = X_train_raw.min()
X_max = X_train_raw.max()
print(X_min)
print(X_max)

X_train_raw = (X_train_raw-X_min)/(X_max-X_min)
X_test_raw = (X_val_raw-X_min)/(X_max-X_min)

(61676, 1) (6852, 1)
Crunchiness   -34.802881
dtype: float64
Crunchiness    41.138801
dtype: float64


In [9]:
def build_sequences(df, target_labels, window=2000, stride=20, telescope=864): #1156/4 = 288 time-steps into the future
    # Sanity check to avoid runtime errors
    assert window % stride == 0
    dataset = []
    labels = []
    temp_df = df.copy().values
    temp_label = df[target_labels].copy().values
    padding_len = len(df)%window

    if(padding_len != 0):
        # Compute padding length
        padding_len = window - len(df)%window
        padding = np.zeros((padding_len,temp_df.shape[1]), dtype='float32')
        temp_df = np.concatenate((padding,df))
        padding = np.zeros((padding_len,temp_label.shape[1]), dtype='float32')
        temp_label = np.concatenate((padding,temp_label))
        assert len(temp_df) % window == 0

    for idx in np.arange(0,len(temp_df)-window-telescope,stride):
        dataset.append(temp_df[idx:idx+window])
        labels.append(temp_label[idx+window:idx+window+telescope])

    dataset = np.array(dataset)
    labels = np.array(labels)
    return dataset, labels

In [10]:
def inspect_multivariate(X, y, columns, telescope, idx=None):
    if(idx==None):
        idx=np.random.randint(0,len(X))

    figs, axs = plt.subplots(len(columns), 1, sharex=True, figsize=(17,17))
    for i, col in enumerate(columns):
        axs[i].plot(np.arange(len(X[0,:,i])), X[idx,:,i])
        axs[i].scatter(np.arange(len(X[0,:,i]), len(X_train[0,:,i])+telescope), y[idx,:,i], color='orange')
        axs[i].set_title(col)
        axs[i].set_ylim(0,1)
    plt.show()

In [11]:
def inspect_multivariate_prediction(X, y, pred, columns, telescope, idx=None):
    if(idx==None):
        idx=np.random.randint(0,len(X))

    figs, axs = plt.subplots(len(columns), 1, sharex=True, figsize=(17,17))
    for i, col in enumerate(columns):
        axs[i].plot(np.arange(len(X[0,:,i])), X[idx,:,i])
        axs[i].plot(np.arange(len(X[0,:,i]), len(X_train[0,:,i])+telescope), y[idx,:,i], color='orange')
        axs[i].plot(np.arange(len(X[0,:,i]), len(X_train[0,:,i])+telescope), pred[idx,:,i], color='green')
        axs[i].set_title(col)
        axs[i].set_ylim(0,1)
    plt.show()

In [24]:
target_labels = crunch_data.columns

In [33]:
TELESCOPE = 864
WINDOWS_LENGTH = 2000
STRIDE = 10
#REG_TELESCOPE = 864/TELESCOPE

In [34]:
X_train, y_train = build_sequences(X_train_raw, target_labels, WINDOWS_LENGTH, STRIDE, TELESCOPE)
X_test, y_test = build_sequences(X_test_raw, target_labels, WINDOWS_LENGTH, STRIDE, TELESCOPE)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((5914, 2000, 1), (5914, 864, 1), (514, 2000, 1), (514, 864, 1))

In [35]:
single_lstm_model = tf.keras.Sequential([
    # Shape [batch, time, features] => [batch, lstm_units].
    # Adding more `lstm_units` just overfits more quickly.
    tf.keras.layers.LSTM(32, return_sequences=False),
    # Shape => [batch, out_steps*features].
    tf.keras.layers.Dense(TELESCOPE,
                          kernel_initializer=tf.initializers.zeros()),
    # Shape => [batch, out_steps, features].
    tf.keras.layers.Reshape([TELESCOPE])
])

single_lstm_model.compile(loss=tf.losses.MeanSquaredError(),
                      metrics=[tf.metrics.MeanAbsoluteError(), tf.keras.metrics.RootMeanSquaredError()])

In [36]:
epochs= 200
history_6 = single_lstm_model.fit(
    x = X_train,
    y = y_train,
    batch_size = 64,
    epochs = epochs,
    shuffle = False,
    validation_data=(X_test, y_test),
    callbacks = [
        tfk.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=10, restore_best_weights=True),
        tfk.callbacks.ReduceLROnPlateau(monitor='val_loss', mode='min', patience=5, factor=0.5, min_lr=1e-5)
    ]
).history

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200


In [38]:
single_lstm_model.save('Cruchorecasting')



In [40]:
# See prediction on the validation set
predictions = single_lstm_model.predict(X_test)
print(predictions.shape)

mean_squared_error = tfk.metrics.mse(y_test.flatten(),predictions.flatten())
mean_absolute_error = tfk.metrics.mae(y_test.flatten(),predictions.flatten())
root_mean_absolute_error =math.sqrt(mean_squared_error)
mean_squared_error, mean_absolute_error, root_mean_absolute_error

(514, 864)


(<tf.Tensor: shape=(), dtype=float32, numpy=0.015233438>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.09318805>,
 0.12342381642948401)