## <center>RNN forecasting a Sine Wave</center>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
x = np.linspace(0,50,501)
y = np.sin(x)

In [None]:
plt.plot(x,y)

Let's turn this into a DataFrame

In [None]:
df = pd.DataFrame(data=y,index=x,columns=['Sine'])
df.head()

### Train Test Split

Note! This is very different from our usual test/train split methodology!

In [None]:
len(df)

In [None]:
test_percent = 0.1
test_size = np.round(len(df)*test_percent)
test_size

In [None]:
test_ind = int(len(df)- test_size)

train = df.iloc[:test_ind]
test = df.iloc[test_ind:]

#### Scale Data

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
scaler = MinMaxScaler()

scaled_train = scaler.fit_transform(train)
scaled_test = scaler.transform(test)

In this case, due to the fact that the output of the RNN is inputed again inside the Neural Network, it's important to scale it. 

### Time Series Generator

This class takes in a sequence of data-points gathered at
equal intervals, along with time series parameters such as
stride, length of history, etc., to produce batches for
training/validation.

In [None]:
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

#### Arguments
    data: Indexable generator (such as list or Numpy array)
        containing consecutive data points (timesteps).
        The data should be at 2D, and axis 0 is expected
        to be the time dimension.
    targets: Targets corresponding to timesteps in `data`.
        It should have same length as `data`.
    length: Length of the output sequences (in number of timesteps).
    sampling_rate: Period between successive individual timesteps
        within sequences. For rate `r`, timesteps
        `data[i]`, `data[i-r]`, ... `data[i - length]`
        are used for create a sample sequence.
    stride: Period between successive output sequences.
        For stride `s`, consecutive output samples would
        be centered around `data[i]`, `data[i+s]`, `data[i+2*s]`, etc.
    start_index: Data points earlier than `start_index` will not be used
        in the output sequences. This is useful to reserve part of the
        data for test or validation.
    end_index: Data points later than `end_index` will not be used
        in the output sequences. This is useful to reserve part of the
        data for test or validation.
    shuffle: Whether to shuffle output samples,
        or instead draw them in chronological order.
    reverse: Boolean: if `true`, timesteps in each output sample will be
        in reverse chronological order.
    batch_size: Number of timeseries samples in each batch
        (except maybe the last one).

In [None]:
# define generator

length = 2 # Number of points of the dataset taken to predict a next value
batch_size = 1 # Number of batches run, most of the times is used 1

generator = TimeseriesGenerator(data=scaled_train, # Where is the X
                                targets=scaled_train, # Where is the y in this case is same as X
                                length=length, 
                                batch_size=batch_size)

In [None]:
print(len(scaled_train))
print(len(generator))  # n_input = 2

# In this case is [451 - 2 (length)] // [1 (batch_size)]

In [None]:
# What does the first batch look like?
X,y = generator[0]

In [None]:
scaled_train[:length + 1]

In [None]:
print(f'Given the Array: \n{X.flatten()}')
print(f'Predict this y: \n {y.flatten()}')

In [None]:
# Let's redefine to get 10 steps back and then predict the next step out
length = 10
generator = TimeseriesGenerator(scaled_train, scaled_train, length=length, batch_size=1)

X,y = generator[0]

In [None]:
scaled_train[:length + 1]

In [None]:
print(f'Given the Array: \n{X.flatten()}')
print(f'Predict this y: \n {y.flatten()}')

In [None]:
length = 50
generator = TimeseriesGenerator(scaled_train, scaled_train, length=length, batch_size=1)

X,y = generator[0]

In [None]:
scaled_train[:length + 1]

In [None]:
print(f'Given the Array: \n{X.flatten()}')
print(f'Predict this y: \n {y.flatten()}')

### Create the Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,LSTM,SimpleRNN

In [None]:
# We're only using one feature in our time series
n_features = 1 # We just have x to predict y

In [None]:
# define model
model = Sequential()

# Simple RNN layer
model.add(SimpleRNN(50, input_shape=(length, n_features)))

# Final Prediction
model.add(Dense(1))

model.compile(optimizer='adam', loss='mse')

In [None]:
model.summary()

In [None]:
# fit model
model.fit_generator(generator,epochs=5)

In [None]:
losses = pd.DataFrame(model.history.history)
losses.plot()

#### Evaluate on Test Data

The evaluate the test data, the last point of the train data must be taken to evaluate/forcast the first point of the test data.

----
Step by step, first iteration

In [None]:
first_eval_batch = scaled_train[-length:]
first_eval_batch = first_eval_batch.reshape((1, length, n_features)) # Must be reshaped, otherwise won't work

In [None]:
model.predict(first_eval_batch)[0].item()

In [None]:
scaled_test[0]

----

In [None]:
test_predictions = []

first_eval_batch = scaled_train[-length:]
current_batch = first_eval_batch.reshape((1, length, n_features))

for i in range(len(test)):
    
    # get prediction 1 time stamp ahead ([0] is for grabbing just the number instead of [array])
    current_pred = model.predict(current_batch)[0]
    # store prediction
    test_predictions.append(current_pred) 
    # update batch to now include prediction and drop first value
    current_batch = np.append(current_batch[:,1:,:],[[current_pred]],axis=1)

In [None]:
test_predictions

In [None]:
scaled_test

#### Inverse Transformations and Compare

In [None]:
true_predictions = scaler.inverse_transform(test_predictions)

In [None]:
# IGNORE WARNINGS
test['Predictions'] = true_predictions
test.head()

In [None]:
test.plot(figsize=(12,8))

#### Adding in Early Stopping and Validation Generator

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
early_stop = EarlyStopping(monitor='val_loss',patience=2)

In [None]:
# To avoid the following error:

# `start_index+length=50 > end_index=49` is disallowed, as no part of the sequence would be left to be used as current step.
# The validation generator has to be bigger than the length of batches, so the length is set 1 value smaller



length = 49
generator = TimeseriesGenerator(scaled_train,scaled_train,
                               length=length,batch_size=1)


validation_generator = TimeseriesGenerator(scaled_test,scaled_test,
                                          length=length, batch_size=1)



### LSTMS

In [None]:
# define model
model = Sequential()

# Simple RNN layer
model.add(LSTM(50,input_shape=(length, n_features)))

# Final Prediction
model.add(Dense(1))

model.compile(optimizer='adam', loss='mse')

In [None]:
model.fit_generator(generator,epochs=20,
                   validation_data=validation_generator,
                   callbacks=[early_stop])

In [None]:
test_predictions = []

first_eval_batch = scaled_train[-length:]
current_batch = first_eval_batch.reshape((1, length, n_features))

for i in range(len(test)):
    
    # get prediction 1 time stamp ahead ([0] is for grabbing just the number instead of [array])
    current_pred = model.predict(current_batch)[0]
    
    # store prediction
    test_predictions.append(current_pred) 
    
    # update batch to now include prediction and drop first value
    current_batch = np.append(current_batch[:,1:,:],[[current_pred]],axis=1)

In [None]:
# IGNORE WARNINGS
true_predictions = scaler.inverse_transform(test_predictions)
test['LSTM Predictions'] = true_predictions
test.plot(figsize=(12,8))

### Forecasting

Forecast into unknown range. We should first utilize all our data, since we are now forecasting!

In [None]:
full_scaler = MinMaxScaler()
scaled_full_data = full_scaler.fit_transform(df)

In [None]:
length = 100 # Length of the output sequences (in number of timesteps)
generator = TimeseriesGenerator(scaled_full_data, scaled_full_data, length=length, batch_size=1)

In [None]:
model = Sequential()
model.add(LSTM(length, input_shape=(length, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.fit_generator(generator,epochs=6)

In [None]:
forecast = []

first_eval_batch = scaled_full_data[-length:]
current_batch = first_eval_batch.reshape((1, length, n_features))

for i in range(length):
    
    current_pred = model.predict(current_batch)[0]
    forecast.append(current_pred) 
    current_batch = np.append(current_batch[:,1:,:],[[current_pred]],axis=1)

In [None]:
forecast = scaler.inverse_transform(forecast)
forecast_index = np.arange(50.1,50.1+length*0.1,step=0.1)

In [None]:
plt.plot(df.index,df['Sine'])
plt.plot(forecast_index,forecast)