# Homework 7 - Experiments on RNN and LSTM

Please implement the following two functions:
- MnistRNN() - Design a RNN
- MnistLSTM() - Design a LSTM 

Please train two models on the Mnist dataset and print the training results for each epoch.

In [1]:
from torchvision.datasets import MNIST
from torchvision.transforms import Compose,ToTensor,Normalize
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
import os
import torch
import numpy as np

BATCH_SIZE = 128
TEST_BATCH_SIZE = 1000
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.backends.cudnn.enabled = False

# dataloader for the dataset
def get_dataloader(train,batch_size=BATCH_SIZE):
    transform_fn = Compose([
        ToTensor(),
        Normalize(mean = (0.1307,), std = (0.3081,))
        ]) 
    dataset = MNIST(root = './data',train = train,transform = transform_fn, download = True)
    data_loader = DataLoader(dataset,batch_size = batch_size,shuffle = True)
    return data_loader

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import torch
import torch.nn as nn

class MnistRNN(nn.Module):
    def __init__(self, input_dim=1, hidden_dim=128, output_dim=10):
        super(MnistRNN, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.stem = nn.Sequential(
            nn.Conv2d(input_dim, hidden_dim, kernel_size=4, stride=4),
            nn.ReLU())

        # Define the RNN layer
        self.rnn = nn.RNN(input_size=hidden_dim, hidden_size=hidden_dim, num_layers=1, batch_first=True)

        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, inputs):
        # Stem network (convolutional + activation)
        inputs = self.stem(inputs)
        inputs = inputs.view(inputs.size(0), -1, inputs.size(1)) # B x seq_len x hidden_dim
        h0 = torch.zeros(1, inputs.size(0), self.hidden_dim).to(inputs.device) # Initialize hidden state
        out, _ = self.rnn(inputs, h0) # Pass through RNN layer
        out = out[:, -1, :] # Take the last time step's output
        output = self.softmax(out) # Apply softmax activation
        return output

        return output

In [3]:
import torch
import torch.nn as nn

class MnistLSTM(nn.Module):
    def __init__(self, input_dim=1, hidden_dim=128, output_dim=10):
        super(MnistLSTM, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.stem = nn.Sequential(
            nn.Conv2d(input_dim, hidden_dim, kernel_size=4, stride=4),
            nn.ReLU())

        # Define the LSTM layer
        self.lstm = nn.LSTM(input_size=hidden_dim, hidden_size=hidden_dim, num_layers=1, batch_first=True)

        self.fc = nn.Linear(hidden_dim, output_dim)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, inputs):
        inputs = self.stem(inputs)
        inputs = inputs.view(inputs.size(0), -1, self.hidden_dim)  # B x (7*7) x D

        # Forward through LSTM
        lstm_output, (hidden, cell) = self.lstm(inputs)

        # We can use the last hidden state as the output for classification
        output = self.fc(hidden[-1])  # Get the last hidden state
        output = self.softmax(output)
        
        return output


## Train the RNN model

In [4]:
model = MnistRNN().to(device)
optimizer = Adam(model.parameters(), lr=0.001)

In [5]:
def train(epoch, num_epochs):
    data_loader = get_dataloader(True)
    total_step = len(data_loader)
    for idx, (input, target) in enumerate(data_loader):
        optimizer.zero_grad()
        output = model(input.to(device))
        loss = F.nll_loss(output, target.to(device))
        loss.backward()
        optimizer.step()
        if (idx+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, idx+1, total_step, loss.item()))

In [6]:
def test():
    loss_list = []
    acc_list = []
    test_dataloader = get_dataloader(train = False, batch_size=TEST_BATCH_SIZE)
    for idx,(input,target) in enumerate(test_dataloader):
        with torch.no_grad():
            output = model(input.to(device))
            target = target.to(device)
            cur_loss = F.nll_loss(output, target)
            loss_list.append(cur_loss.cpu())
            pred = output.max(dim = -1)[-1]
            cur_acc = pred.eq(target).float().mean()
            acc_list.append(cur_acc.cpu())
    print("Mean accuracy: ", np.mean(acc_list), "Mean loss: ", np.mean(loss_list))

In [7]:
test()
num_epochs = 3
for i in range(num_epochs):
    train(i, num_epochs)
test()

Mean accuracy:  0.0033999998 Mean loss:  4.8232107
Epoch [1/3], Step [100/469], Loss: 3.2151
Epoch [1/3], Step [200/469], Loss: 3.1464
Epoch [1/3], Step [300/469], Loss: 3.1051
Epoch [1/3], Step [400/469], Loss: 3.0577
Epoch [2/3], Step [100/469], Loss: 3.0076
Epoch [2/3], Step [200/469], Loss: 3.0137
Epoch [2/3], Step [300/469], Loss: 2.9919
Epoch [2/3], Step [400/469], Loss: 2.9549
Epoch [3/3], Step [100/469], Loss: 2.9459
Epoch [3/3], Step [200/469], Loss: 2.9602
Epoch [3/3], Step [300/469], Loss: 2.9329
Epoch [3/3], Step [400/469], Loss: 2.9428
Mean accuracy:  0.93149996 Mean loss:  2.9467125


## Train the LSTM model

In [8]:
model = MnistLSTM().to(device)
optimizer = Adam(model.parameters(), lr=0.001)

In [9]:
test()
num_epochs = 3
for i in range(num_epochs):
    train(i, num_epochs)
test()

Mean accuracy:  0.0759 Mean loss:  2.3054051
Epoch [1/3], Step [100/469], Loss: 0.3155
Epoch [1/3], Step [200/469], Loss: 0.3201
Epoch [1/3], Step [300/469], Loss: 0.2750
Epoch [1/3], Step [400/469], Loss: 0.1922
Epoch [2/3], Step [100/469], Loss: 0.1175
Epoch [2/3], Step [200/469], Loss: 0.1012
Epoch [2/3], Step [300/469], Loss: 0.0957
Epoch [2/3], Step [400/469], Loss: 0.1270
Epoch [3/3], Step [100/469], Loss: 0.0656
Epoch [3/3], Step [200/469], Loss: 0.0670
Epoch [3/3], Step [300/469], Loss: 0.0827
Epoch [3/3], Step [400/469], Loss: 0.0380
Mean accuracy:  0.9739 Mean loss:  0.07997291
