# LSTM, One day at a time!
In this Notebook we'll see how the Pytorch LSTM block allows us to pass it the whole data sequence at once. The LSTM block will very quickly process the sequence for us, without having to use a slow Python for loop. Instead of providing a sequence of data ast each time-step (like previous notebooks) we'll simply provide the LSTM a single days worth of data at a time.

[<img src="https://upload.wikimedia.org/wikipedia/commons/thumb/9/93/LSTM_Cell.svg/2880px-LSTM_Cell.svg.png">](LSTM)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch import optim
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
import torch.nn.functional as F

from tqdm.notebook import trange, tqdm
from Dataset import WeatherDataset

In [None]:
dataset_file = "../data/weather.csv"

# Test-Train split on date
split_date = pd.to_datetime('2023-01-01')

# Number of days in the input sequence
day_range = 30

# Define the hyperparameters
learning_rate = 1e-4

nepochs = 500

batch_size = 32

dataset_train = WeatherDataset(dataset_file, day_range=day_range, split_date=split_date, train_test="train")
dataset_test = WeatherDataset(dataset_file, day_range=day_range, split_date=split_date, train_test="test")

In [None]:
print(f'Number of training examples: {len(dataset_train)}')
print(f'Number of testing examples: {len(dataset_test)}')

In [None]:
data_loader_train = DataLoader(dataset=dataset_train, batch_size=batch_size, shuffle=True, drop_last=True)
data_loader_test = DataLoader(dataset=dataset_test, batch_size=batch_size, shuffle=False, drop_last=True)

In [None]:
fig = plt.figure(figsize=(10, 5))
_ = plt.title("Melbourne Max Daily Temperature (C)")

_ = plt.plot(dataset_train.dataset.index, dataset_train.dataset.values[:, 1])
_ = plt.plot(dataset_test.dataset.index, dataset_test.dataset.values[:, 1])

_ = plt.legend(["Train", "Test"])
# Note:see here how we can just directly access the data from the dataset class

In [None]:
# Define our network class by using the nn.module
class ResBlockMLP(nn.Module):
    def __init__(self, input_size, output_size):
        super(ResBlockMLP, self).__init__()
        self.norm1 = nn.LayerNorm(input_size)
        self.fc1 = nn.Linear(input_size, input_size//2)
        
        self.norm2 = nn.LayerNorm(input_size//2)
        self.fc2 = nn.Linear(input_size//2, output_size)
        
        self.fc3 = nn.Linear(input_size, output_size)

        self.act = nn.ELU()

    def forward(self, x):
        x = self.act(self.norm1(x))
        skip = self.fc3(x)
        
        x = self.act(self.norm2(self.fc1(x)))
        x = self.fc2(x)
        
        return x + skip


class RNN(nn.Module):
    def __init__(self, seq_len, output_size, num_blocks=1, hidden_size=128):
        super(RNN, self).__init__()
        
        self.input_mlp = nn.Sequential(nn.Linear(seq_len, 4 * seq_len),
                                       nn.ELU(),
                                       nn.Linear(4 * seq_len, hidden_size))
        
        self.lstm = nn.LSTM(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, batch_first=True)
              
        blocks = [ResBlockMLP(hidden_size, hidden_size) for _ in range(num_blocks)]
        self.res_blocks = nn.Sequential(*blocks)
        
        self.fc_out = nn.Linear(hidden_size, output_size)
        self.act = nn.ELU()


    def forward(self, input_seq, hidden_in, mem_in):
        input_vec = self.input_mlp(input_seq)
        
        output, (hidden_out, mem_out) = self.lstm(input_vec, (hidden_in, mem_in))

        x  = self.act(self.res_blocks(output))
        
        return self.fc_out(x), hidden_out, mem_out

In [None]:
device = torch.device(0 if torch.cuda.is_available() else 'cpu')

In [None]:
hidden_size = 128

# Create model
weather_rnn = RNN(seq_len=2, output_size=2, hidden_size=hidden_size).to(device)

# Initialize the optimizer with above parameters
optimizer = optim.Adam(weather_rnn.parameters(), lr=learning_rate)

# Define the loss function
loss_fn = nn.MSELoss()  # mean squared error

In [None]:
# Let's see how many Parameters our Model has!
num_model_params = 0
for param in weather_rnn.parameters():
    num_model_params += param.flatten().shape[0]

print("-This Model Has %d (Approximately %d Million) Parameters!" % (num_model_params, num_model_params//1e6))

In [None]:
training_loss_logger = []

In [None]:
for epoch in trange(nepochs, desc="Epochs", leave=False):
    weather_rnn.train()
    for day, month, data_seq in tqdm(data_loader_train, desc="Training", leave=False):
        
        # Pytorch allows us to pass the whole sequence of data at a time
        # The nn.Linear layers will feed-forward the layers indepenantly (like a batch)
        # The LSTM block will process them sequentially
        seq_block = data_seq[:, :-1].to(device)
        target_seq_block = data_seq[:, 1:].to(device)
        hidden = torch.zeros(1, data_seq.shape[0], hidden_size, device=device)
        memory = torch.zeros(1, data_seq.shape[0], hidden_size, device=device)

        # Letting Pytorchs LSTM to rollout the sequence is a lot faster 
        # But we can't pass previous predictions into the model at the next timestep
        data_pred, hidden, memory = weather_rnn(seq_block, hidden, memory)
        loss = loss_fn(data_pred, target_seq_block)
            
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        training_loss_logger.append(loss.item())

In [None]:
_ = plt.figure(figsize=(10, 5))
_ = plt.plot(training_loss_logger)
_ = plt.title("Training Loss")

In [None]:
data_tensor = torch.FloatTensor(dataset_test.dataset.values)

log_predictions = []
weather_rnn.eval()

len_input = 30
with torch.no_grad():
    seq_block = data_tensor[:len_input].unsqueeze(0).to(device)
    
    hidden = torch.zeros(1, seq_block.shape[0], hidden_size, device=device)
    memory = torch.zeros(1, seq_block.shape[0], hidden_size, device=device)
    
    for i in range(data_tensor.shape[0] - len_input):
        data_pred, hidden, memory = weather_rnn(seq_block, hidden, memory)
    
        seq_block = data_pred[:, -1:, :]
        log_predictions.append(data_pred[:, -1, :].cpu())
        
predictions_cat = torch.cat(log_predictions)
un_norm_predictions = (predictions_cat * dataset_test.std) + dataset_test.mean
un_norm_data = (data_tensor * dataset_test.std) + dataset_test.mean
un_norm_data = un_norm_data[len_input:]

In [None]:
test_mse = (un_norm_data - un_norm_predictions).pow(2).mean().item()
print("Test MSE value %.2f" % test_mse)

In [None]:
_ = plt.figure(figsize=(10, 5))
_ = plt.plot(un_norm_data[:, 0])
_ = plt.plot(un_norm_predictions[:, 0])
_ = plt.title("Rainfall (mm)")

_ = plt.legend(["Ground Truth", "Prediction"])

In [None]:
_ = plt.figure(figsize=(10, 5))
_ = plt.plot(un_norm_data[:, 1])
_ = plt.plot(un_norm_predictions[:, 1])
_ = plt.title("Max Daily Temperature (C)")

_ = plt.legend(["Ground Truth", "Prediction"])