# Import stuffs

In [None]:
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorboard
import datetime
from sklearn.model_selection import train_test_split

def generate_time_series(batch_size, n_steps):
    freq1, freq2, offsets1, offsets2 = np.random.rand(4, batch_size, 1)
    time = np.linspace(0, 1, n_steps)
    series = 0.5 * np.sin((time - offsets1) * (freq1 * 10 + 10))  #   wave 1
    series += 0.2 * np.sin((time - offsets2) * (freq2 * 20 + 20)) # + wave 2
    series += 0.1 * (np.random.rand(batch_size, n_steps) - 0.5)   # + noise
    return series[..., np.newaxis].astype(np.float32)

n_steps = 50
series = generate_time_series(10000, n_steps + 1)
x_train, y_train = series[:7000, :n_steps], series[:7000, -1]
x_valid, y_valid = series[7000:9000, :n_steps], series[7000:9000, -1]
x_test, y_test = series[9000:, :n_steps], series[9000:, -1]

# Creating a baseline model

In [None]:
model = keras.Sequential([
  keras.layers.Flatten(input_shape=[50, 1]),
  keras.layers.Dense(1)                          
])

model.compile(loss="mse", optimizer=keras.optimizers.Adam(), metrics=["mse"])
model.fit(x_train, y_train, epochs=100,
          validation_data=(x_valid, y_valid))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100

KeyboardInterrupt: ignored

# Creating a simple RNN

In [None]:
model = keras.Sequential([
  keras.layers.SimpleRNN(1, input_shape=[None, 1])       # None because the RNN can take any number of time steps                
])                                                       # as the input size is rolled out

model.compile(loss="mse", optimizer=keras.optimizers.Adam(), metrics=["mse"])
model.fit(x_train, y_train, epochs=100,
          validation_data=(x_valid, y_valid))

# Deep RNNs
- Just stack em to make em DEEP

In [None]:
model = keras.Sequential([
  keras.layers.SimpleRNN(20, return_sequences=True, input_shape=[None, 1]), #[Time steps, dimensionality]
  keras.layers.SimpleRNN(20),
  # keras.layers.SimpleRNN(1) ---> Not ideal to have this here as it is just one unit in the RNN, which s useless.
  keras.layers.Dense(1) # Converges faster too!
])

model.compile(loss="mse", optimizer=keras.optimizers.Adam(), metrics=["mse"])
model.fit(x_train, y_train, epochs=50,
          validation_data=(x_valid, y_valid))

## Forecasting multiple steps ahead

In [None]:
n_steps = 50
series = generate_time_series(10000, n_steps + 10)
x_train, y_train = series[:7000, :n_steps], series[:7000, -10:, 0]
x_valid, y_valid = series[7000:9000, :n_steps], series[7000:9000, -10:, 0]
x_test, y_test = series[9000:, :n_steps], series[9000:, -10:, 0]

In [None]:
model = keras.Sequential([
  keras.layers.SimpleRNN(20, return_sequences=True, input_shape=[None, 1]), #[batch_size, time steps, dimensionality]
  keras.layers.SimpleRNN(20), # keras.layers.SimpleRNN(20, return_sequences=True) **** Can't do this here because YOU ONLY DO RETURN_SEQUENCES
                                                                                        # WHEN UR STACKING RNN CELLS. On the previous line, you do true because 
                                                                                        # this line is still stacking, but the next Dense layer is not a RNN cell
                                                                                          # so can't do True.          
  # keras.layers.SimpleRNN(1) ---> Not ideal to have this here as it is just one unit in the RNN, which s useless.
  keras.layers.Dense(10) # Converges faster too!
])

model.compile(loss="mse", optimizer=keras.optimizers.Adam(), metrics=["mse"])
model.fit(x_train, y_train, epochs=20,
          validation_data=(x_valid, y_valid))

## Better optimization is to predict the next ten steps at each time step.
- At time step=0, it will predict the next 10 time steps(1-10)

In [None]:
Y = np.empty((10000, n_steps, 10))
for step_ahead in range(1, 10 + 1):
  Y[:, :, step_ahead - 1] = Y[:, step_ahead: step_ahead + n_steps, 0]
y_train = Y[:7000]
y_valid = Y[7000:9000]
y_test = Y[9000:]  

In [None]:
model = keras.Sequential([
  keras.layers.SimpleRNN(20, return_sequences=True, input_shape=[None, 1]),  # Use return_sequences when stacking stuff
  keras.layers.SimpleRNN(20, return_sequences=True),
  keras.layers.TimeDistributed(keras.layers.Dense(10)) # Wraps the Dense so that it applies this Dense at every single Time Step                          
])

In [None]:
# Our model keeps all the outputs of all the other layers but only the last output is important so we only use that
def last_time_step_mse(y_true, y_pred):
  return keras.metrics.MeanSquaredError(y_true[:, -1], y_pred[:, -1])

optimizer = keras.optimizers.Adam(lr=0.01)
model.compile(loss="mse", optimizer=optimizer, metrics=last_time_step_mse)

  "The `lr` argument is deprecated, use `learning_rate` instead.")


# Building a Custom Simple RNN Cell with Layer Normalization(pg 358)
- Never use BN in RNNs. ***Always use Layer Normalization***
- There already is a thing in TF called the SimpleRNNCell, but it doesn't have layer Normalization.
- Also helps to alleviate exploding gradients

In [None]:
class LNSimpleRNNCell(keras.layers.Layer):
  def __init__(self, units, activation="tanh", **kwargs): # Use a saturating activation function like tanh(not relu)
    super().__init__(**kwargs)
    self.state_size = units
    self.output_size = units
    self.simple_rnn_cell = keras.layers.SimpleRNNCell(units, activation=None)
    self.layer_norm = keras.layers.LayerNormalization()
    self.activation = keras.activations.get(activation)
  def call(self, inputs, states):
    outputs, new_states = self.simple_rnn_cell(inputs, states)
    norm_outputs = self.activation(self.layer_norm(outputs))
    return norm_outputs, [norm_outputs] # In a simple RNN cell, the output is the same as the hidden state

In [None]:
model = keras.Sequential([
  keras.layers.RNN(LNSimpleRNNCell(20), return_sequences=True, input_shape=[None, 1]),
  keras.layers.RNN(LNSimpleRNNCell(20), return_sequences=True),
  keras.layers.TimeDistributed(keras.layers.Dense(10))
])  

# LSTM Cells
- Very well explained on pg in picture

In [None]:
model = keras.Sequential([
  keras.layers.LSTM(20, return_sequences=True, input_shape=[None, 1]),
  keras.layers.LSTM(20, return_sequences=True),
  keras.layers.TimeDistributed(keras.layers.Dense(10))
])  

# WaveNet

In [None]:
model = keras.Sequential()
model.add(keras.layers.Input(input_shape=[None, 1]))
for rate in (1, 2, 4, 8) * 2:
  model.add(keras.layers.Conv1D(filters=20, kernel_size=20, padding="causal",
                                activation="relu", dilation_rate=rate))
model.add(keras.layers.Conv1D(filters=10, kernel_size=1))
  

# RNN Notes
- https://towardsdatascience.com/all-you-need-to-know-about-rnns-e514f0b00c7c
- ***OFTEN GOOD TO HAVE MC DROPOUT, SO PUT THEM IN EACH MEMORY CELL***
- Normally, tf returns one output from the whole entire recurrent layer. To get it to be something like sequence-to-sequence, you have to do return_sequences and have to use TimeDistributed so that each time step can produce and output(seqeunce-to-sequence)