## Recurrent Neural Networks (RNN)

In [22]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

In [23]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [24]:
# Loading data
# Train test split

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, 
                                           transform=transforms.ToTensor(),
                                          download=True)

test_dataset = torchvision.datasets.MNIST(root='./data', train=False, 
                                           transform=transforms.ToTensor())

In [25]:
# Defining hyper-parameters

hidden_size = 128
num_classes = 10
num_epochs = 2
batch_size = 100
learning_rate = 0.001
input_size = 28         #pixels
sequence_len = 28
num_layers = 2

In [26]:
# DataLoader

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [27]:
# Neural Network Model

class RNN(nn.Module):
    # Defining Neural Network
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)  #1st dim as batch_size
        # input has the shape; batch_size, seq_len, input_size
        
        # fully connected layer
        self.fc = nn.Linear(hidden_size, num_classes) 
    
    # Defining Forward Pass
    def forward(self, x):
        # initial hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.rnn(x, h0)
        out = out[:,-1, :]
        out = self.fc(out)
        return out
        
    
 # Defining RNN model
model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)
    
# Loss Function
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
# Training the Model
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
    for i, (images,labels) in enumerate(train_loader):
        images = images.reshape(-1, sequence_len, input_size).to(device)
        labels= labels.to(device)
            
        #forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
            
        # backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        if (i+1)%100 == 0:
            print(f'Epoch[{epoch+1}/{num_epochs}], Step[{i+1}]/{n_total_steps}, Loss: {loss.item():.4f}') 
                

# Testing the Model
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader:
        images = images.reshape(-1, sequence_len, input_size).to(device)
        labels= labels.to(device)
        outputs = model(images)   
        
        _, predicted = torch.max(outputs.data, 1)
        n_samples += labels.size(0)
        n_correct += (predicted==labels).sum().item()
        
    accuracy = 100.0 * (n_correct/n_samples)
    print(f'Accuracy for test images: {accuracy}%')         

Epoch[1/2], Step[100]/600, Loss: 1.0102
Epoch[1/2], Step[200]/600, Loss: 0.7046
Epoch[1/2], Step[300]/600, Loss: 0.5072
Epoch[1/2], Step[400]/600, Loss: 0.4395
Epoch[1/2], Step[500]/600, Loss: 0.3302
Epoch[1/2], Step[600]/600, Loss: 0.2540
Epoch[2/2], Step[100]/600, Loss: 0.1650
Epoch[2/2], Step[200]/600, Loss: 0.3770
Epoch[2/2], Step[300]/600, Loss: 0.2809
Epoch[2/2], Step[400]/600, Loss: 0.1516
Epoch[2/2], Step[500]/600, Loss: 0.1841
Epoch[2/2], Step[600]/600, Loss: 0.2009
Accuracy for test images: 94.1%


## Gated Recurring Units (GRU)

In [28]:
# Neural Network Model

class RNN(nn.Module):
    # Defining Neural Network
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)  #1st dim as batch_size
        # input has the shape; batch_size, seq_len, input_size
        
        # fully connected layer
        self.fc = nn.Linear(hidden_size, num_classes) 
    
    # Defining Forward Pass
    def forward(self, x):
        # initial hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.gru(x, h0)
        out = out[:,-1, :]
        out = self.fc(out)
        return out
        
    
 # Defining RNN model
model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)
    
# Loss Function
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
# Training the Model
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
    for i, (images,labels) in enumerate(train_loader):
        images = images.reshape(-1, sequence_len, input_size).to(device)
        labels= labels.to(device)
            
        #forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
            
        # backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        if (i+1)%100 == 0:
            print(f'Epoch[{epoch+1}/{num_epochs}], Step[{i+1}]/{n_total_steps}, Loss: {loss.item():.4f}') 
                

# Testing the Model
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader:
        images = images.reshape(-1, sequence_len, input_size).to(device)
        labels= labels.to(device)
        outputs = model(images)   
        
        _, predicted = torch.max(outputs.data, 1)
        n_samples += labels.size(0)
        n_correct += (predicted==labels).sum().item()
        
    accuracy = 100.0 * (n_correct/n_samples)
    print(f'Accuracy for test images: {accuracy}%')         

Epoch[1/2], Step[100]/600, Loss: 0.5916
Epoch[1/2], Step[200]/600, Loss: 0.4258
Epoch[1/2], Step[300]/600, Loss: 0.3235
Epoch[1/2], Step[400]/600, Loss: 0.1508
Epoch[1/2], Step[500]/600, Loss: 0.2344
Epoch[1/2], Step[600]/600, Loss: 0.0703
Epoch[2/2], Step[100]/600, Loss: 0.1287
Epoch[2/2], Step[200]/600, Loss: 0.1575
Epoch[2/2], Step[300]/600, Loss: 0.1640
Epoch[2/2], Step[400]/600, Loss: 0.0903
Epoch[2/2], Step[500]/600, Loss: 0.0766
Epoch[2/2], Step[600]/600, Loss: 0.0653
Accuracy for test images: 97.39999999999999%


## Long Short Term Memory (LSTM)

In [29]:
# Neural Network Model

class RNN(nn.Module):
    # Defining Neural Network
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)  #1st dim as batch_size
        # input has the shape; batch_size, seq_len, input_size
        
        # fully connected layer
        self.fc = nn.Linear(hidden_size, num_classes) 
    
    # Defining Forward Pass
    def forward(self, x):
        # initial hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        # initial cell state
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.lstm(x, (h0,c0))
        out = out[:,-1, :]
        out = self.fc(out)
        return out
        
    
 # Defining RNN model
model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)
    
# Loss Function
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
# Training the Model
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
    for i, (images,labels) in enumerate(train_loader):
        images = images.reshape(-1, sequence_len, input_size).to(device)
        labels= labels.to(device)
            
        #forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
            
        # backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        if (i+1)%100 == 0:
            print(f'Epoch[{epoch+1}/{num_epochs}], Step[{i+1}]/{n_total_steps}, Loss: {loss.item():.4f}') 
                

# Testing the Model
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader:
        images = images.reshape(-1, sequence_len, input_size).to(device)
        labels= labels.to(device)
        outputs = model(images)   
        
        _, predicted = torch.max(outputs.data, 1)
        n_samples += labels.size(0)
        n_correct += (predicted==labels).sum().item()
        
    accuracy = 100.0 * (n_correct/n_samples)
    print(f'Accuracy for test images: {accuracy}%')         

Epoch[1/2], Step[100]/600, Loss: 0.6894
Epoch[1/2], Step[200]/600, Loss: 0.3493
Epoch[1/2], Step[300]/600, Loss: 0.3302
Epoch[1/2], Step[400]/600, Loss: 0.2759
Epoch[1/2], Step[500]/600, Loss: 0.2876
Epoch[1/2], Step[600]/600, Loss: 0.0972
Epoch[2/2], Step[100]/600, Loss: 0.0696
Epoch[2/2], Step[200]/600, Loss: 0.2279
Epoch[2/2], Step[300]/600, Loss: 0.0785
Epoch[2/2], Step[400]/600, Loss: 0.0541
Epoch[2/2], Step[500]/600, Loss: 0.1899
Epoch[2/2], Step[600]/600, Loss: 0.2445
Accuracy for test images: 96.86%
