# Recurrent Neural Network with Pytorch 

### Model A: 1 Hidden Layer (ReLU)

* Unroll 28 time steps
    * Each step input size: 28 x 1
    * Total per unroll: 28 x 28
        * Feedforward Neural Network input size: 28 x 28
* 1 Hidden Layer
* ReLU Activation Function

#### Steps: 

<li> Step 1: Load Dataset
<li> Step 2: Make Dataset Iterable
<li> Step 3: Create Model Class
<li> Step 4: Instantiate Model Class
<li> Step 5: Instantiate Loss Class
<li> Step 6: Instantiate Optimizer Class
<li> Step 7: Train Model

#### Step 1: Load Dataset


In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable

In [2]:
train_dataset = dsets.MNIST(root = './data',
                            train = True,
                            transform = transforms.ToTensor(),
                            download = True)

test_dataset = dsets.MNIST(root = './data',
                           train = False,
                           transform = transforms.ToTensor())

In [3]:
print(train_dataset.data.size())

torch.Size([60000, 28, 28])


In [4]:
print(train_dataset.targets.size())

torch.Size([60000])


In [5]:
print(test_dataset.data.size())

torch.Size([10000, 28, 28])


In [6]:
print(test_dataset.targets.size())

torch.Size([10000])


#### Step 2: Make Dataset Iterable

In [7]:
batch_size = 100
n_iters = 3000

num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                        batch_size=batch_size,
                                        shuffle=False)

#### Step 3: Create Model Class

In [8]:
class RNNModel(nn.Module):
    
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(RNNModel, self).__init__()
        # Hidden Dimensions
        self.hidden_dim = hidden_dim
        
        # Number of hidden layers
        self.layer_dim = layer_dim
        
        # Building your RNN
        # batch_first = True causes input/output tensors to be of shape
        # (batch_dim, seq_dim, input_dim)
        self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, nonlinearity = 'relu')
        
        # Readout layer
        self.fn = nn.Linear(hidden_dim, output_dim)

        
    def forward(self, x):
        # Initialize hidden state with zeroes
        # (layer_dim, batch_size, hidden_dim)
        h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))
        
        # One time-step
        out, hn = self.rnn(x, h0)
        
        # Input hidden state of the last time step
        # out.size() ---> 100, 28, 100
        # out[:, -1, :] ---> 100, 100 ---> just want last time step hidden states!
        out = self.fn(out[:, -1, :])
        # out.size() ---> 100, 10 
        return out

#### Step 4: Instantiate Model Class

* 28 time steps
    * Each time step: input dimension = 28
* 1 Hidden Layer
* MNIST 1-9 digits ----> output dimension = 10

In [9]:
input_dim = 28
hidden_dim = 100
layer_dim = 1
output_dim = 10

model = RNNModel(input_dim, hidden_dim, layer_dim, output_dim)

#### Step 5: Instantiate Loss Class

In [10]:
criterion = nn.CrossEntropyLoss()

#### Step 6: Instantiate Optimizer Class

In [11]:
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

**Parameters In-Depth**



In [12]:
len(list(model.parameters()))

6

In [13]:
# Input --> Hidden (A1)
print(list(model.parameters())[0].size())
# Input --> Hidden Bias (B1)
print(list(model.parameters())[2].size())

# Hidden --> Hidden (A3)
print(list(model.parameters())[1].size())
# Hidden --> Hidden Bias (B3)
print(list(model.parameters())[3].size())

# Hidden -> Output (A2)
print(list(model.parameters())[4].size())
# Hidden -> Output Bias (B2)
print(list(model.parameters())[5].size())

torch.Size([100, 28])
torch.Size([100])
torch.Size([100, 100])
torch.Size([100])
torch.Size([10, 100])
torch.Size([10])


#### Step 7: Train Model

Process

1. <b>Convert input/labels to variables</b>
    * RNN input: (1, 28)
    * CNN input: (1, 28, 28)
    * Feedforward NN input: (1, 28*28)
2. Clear gradient buffers
3. Get output given inputs
4. Get loss
5. Get gradients w.r.t. parameters
6. Update parameters using gradients
    * parameters = parameters - leaarning_rate * parameter_gradients
7. Repeat

In [14]:
# Number of steps to unroll\
seq_dim = 28
iter = 0

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        
        images = Variable(images.view(-1, seq_dim, input_dim))
        labels = Variable(labels)
        
        optimizer.zero_grad()
        
        outputs = model(images)
        
        loss = criterion(outputs, labels)
        
        loss.backward()
        
        optimizer.step()
        
        iter += 1
        
        if iter % 500 == 0:
            correct = 0
            total = 0
            
            for images, labels in test_loader:
                
                images = Variable(images.view(-1, seq_dim, input_dim))
                outputs = model(images)
                
                _, predicted = torch.max(outputs.data, 1)
                
                total += labels.size(0)
                
                correct += (predicted == labels).sum()
                
            accuracy = 100 * correct / total
            
            print('Iterations: {}, Loss: {}, Accuracy: {}'.format(iter, loss.data, accuracy.float()))

Iterations: 500, Loss: 1.1717265844345093, Accuracy: 59.0
Iterations: 1000, Loss: 1.3896496295928955, Accuracy: 58.0
Iterations: 1500, Loss: 0.6873026490211487, Accuracy: 70.0
Iterations: 2000, Loss: 1.009582281112671, Accuracy: 71.0
Iterations: 2500, Loss: 0.4830789268016815, Accuracy: 86.0
Iterations: 3000, Loss: 0.32755807042121887, Accuracy: 86.0


### Model B: 2 Hidden Layer (ReLU)

* Unroll 28 time steps
    * Each step input size: 28 x 1
    * Total per unroll: 28 x 28
        * Feedforward Neural Network input size: 28 x 28
* <b>2 Hidden Layer</b>
* ReLU Activation Function

#### Steps: 

<li> Step 1: Load Dataset
<li> Step 2: Make Dataset Iterable
<li> Step 3: Create Model Class
<li> <b>Step 4: Instantiate Model Class </b>
<li> Step 5: Instantiate Loss Class
<li> Step 6: Instantiate Optimizer Class
<li> Step 7: Train Model

In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable


'''
STEP 1: LOAD DATASET
'''
train_dataset = dsets.MNIST(root = './data',
                            train = True,
                            transform = transforms.ToTensor(),
                            download = True)

test_dataset = dsets.MNIST(root = './data',
                           train = False,
                           transform = transforms.ToTensor())


'''
STEP 2: MAKE SATASET ITERABLE
'''
batch_size = 100
n_iters = 3000

num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                        batch_size=batch_size,
                                        shuffle=False)



'''
STEP 3: CREATE MODEL CLASS
'''
class RNNModel(nn.Module):
    
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(RNNModel, self).__init__()
        # Hidden Dimensions
        self.hidden_dim = hidden_dim
        
        # Number of hidden layers
        self.layer_dim = layer_dim
        
        # Building your RNN
        # batch_first = True causes input/output tensors to be of shape
        # (batch_dim, seq_dim, input_dim)
        self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, nonlinearity = 'relu')
        
        # Readout layer
        self.fn = nn.Linear(hidden_dim, output_dim)

        
    def forward(self, x):
        # Initialize hidden state with zeroes
        h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))
        
        # One time-step
        out, hn = self.rnn(x, h0)
        
        # Input hidden state of the last time step
        # out.size() ---> 100, 28, 100
        # out[:, -1, :] ---> 100, 100 ---> just want last time step hidden states!
        out = self.fn(out[:, -1, :])
        # out.size() ---> 100, 10 
        return out
   


'''
STEP 4: INSTANTIATE MODEL CLASS
'''
input_dim = 28
hidden_dim = 100
layer_dim = 2
output_dim = 10

model = RNNModel(input_dim, hidden_dim, layer_dim, output_dim)

# PRINTING MODEL AND PARAMETERS
print(model)
print('-'*60)
print(len(list(model.parameters())))
for i in range(len(list(model.parameters()))):
    print(list(model.parameters())[i].size())
print('-'*60)


'''
STEP 5: INSTANTIATE LOSS CLASS
'''
criterion = nn.CrossEntropyLoss()


'''
STEP 6: INSTANTIATE OPTIMIZER CLASS
'''
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)



'''
STEP 7: TRAIN THE MODEL
'''
# Number of steps to unroll\
seq_dim = 28
iter = 0

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        
        images = Variable(images.view(-1, seq_dim, input_dim))
        labels = Variable(labels)
        
        optimizer.zero_grad()
        
        outputs = model(images)
        
        loss = criterion(outputs, labels)
        
        loss.backward()
        
        optimizer.step()
        
        iter += 1
        
        if iter % 500 == 0:
            correct = 0
            total = 0
            
            for images, labels in test_loader:
                
                images = Variable(images.view(-1, seq_dim, input_dim))
                outputs = model(images)
                
                _, predicted = torch.max(outputs.data, 1)
                
                total += labels.size(0)
                
                correct += (predicted == labels).sum()
                
            accuracy = 100 * correct / total
            print('Iterations: {}, Loss: {}, Accuracy: {}'.format(iter, loss.data, accuracy.float()))

RNNModel(
  (rnn): RNN(28, 100, num_layers=2, batch_first=True)
  (fn): Linear(in_features=100, out_features=10, bias=True)
)
------------------------------------------------------------
10
torch.Size([100, 28])
torch.Size([100, 100])
torch.Size([100])
torch.Size([100])
torch.Size([100, 100])
torch.Size([100, 100])
torch.Size([100])
torch.Size([100])
torch.Size([10, 100])
torch.Size([10])
------------------------------------------------------------
Iterations: 500, Loss: 1.169751763343811, Accuracy: 62.0
Iterations: 1000, Loss: 0.6962988376617432, Accuracy: 64.0
Iterations: 1500, Loss: 0.2189740240573883, Accuracy: 92.0
Iterations: 2000, Loss: 0.15934990346431732, Accuracy: 94.0
Iterations: 2500, Loss: 0.1623079627752304, Accuracy: 95.0
Iterations: 3000, Loss: 0.13616520166397095, Accuracy: 95.0


In [2]:
import gc
gc.collect()

4

### Model C: 2 Hidden Layer (TanH)

* Unroll 28 time steps
    * Each step input size: 28 x 1
    * Total per unroll: 28 x 28
        * Feedforward Neural Network input size: 28 x 28
* <b>2 Hidden Layer</b>
* ReLU Activation Function

#### Steps: 

<li> Step 1: Load Dataset
<li> Step 2: Make Dataset Iterable
<li> <b>Step 3: Create Model Class </b>
<li> Step 4: Instantiate Model Class
<li> Step 5: Instantiate Loss Class
<li> Step 6: Instantiate Optimizer Class
<li> Step 7: Train Model

In [3]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable


'''
STEP 1: LOAD DATASET
'''
train_dataset = dsets.MNIST(root = './data',
                            train = True,
                            transform = transforms.ToTensor(),
                            download = True)

test_dataset = dsets.MNIST(root = './data',
                           train = False,
                           transform = transforms.ToTensor())


'''
STEP 2: MAKE SATASET ITERABLE
'''
batch_size = 100
n_iters = 3000

num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                        batch_size=batch_size,
                                        shuffle=False)



'''
STEP 3: CREATE MODEL CLASS
'''
class RNNModel(nn.Module):
    
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(RNNModel, self).__init__()
        # Hidden Dimensions
        self.hidden_dim = hidden_dim
        
        # Number of hidden layers
        self.layer_dim = layer_dim
        
        # Building your RNN
        # batch_first = True causes input/output tensors to be of shape
        # (batch_dim, seq_dim, input_dim)
        self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, nonlinearity = 'tanh')
        
        # Readout layer
        self.fn = nn.Linear(hidden_dim, output_dim)

        
    def forward(self, x):
        # Initialize hidden state with zeroes
        h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))
        
        # One time-step
        out, hn = self.rnn(x, h0)
        
        # Input hidden state of the last time step
        # out.size() ---> 100, 28, 100
        # out[:, -1, :] ---> 100, 100 ---> just want last time step hidden states!
        out = self.fn(out[:, -1, :])
        # out.size() ---> 100, 10 
        return out
   


'''
STEP 4: INSTANTIATE MODEL CLASS
'''
input_dim = 28
hidden_dim = 100
layer_dim = 2
output_dim = 10

model = RNNModel(input_dim, hidden_dim, layer_dim, output_dim)

# PRINTING MODEL AND PARAMETERS
print(model)
print('-'*60)
print(len(list(model.parameters())))
for i in range(len(list(model.parameters()))):
    print(list(model.parameters())[i].size())
print('-'*60)


'''
STEP 5: INSTANTIATE LOSS CLASS
'''
criterion = nn.CrossEntropyLoss()


'''
STEP 6: INSTANTIATE OPTIMIZER CLASS
'''
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)



'''
STEP 7: TRAIN THE MODEL
'''
# Number of steps to unroll\
seq_dim = 28
iter = 0

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        
        images = Variable(images.view(-1, seq_dim, input_dim))
        labels = Variable(labels)
        
        optimizer.zero_grad()
        
        outputs = model(images)
        
        loss = criterion(outputs, labels)
        
        loss.backward()
        
        optimizer.step()
        
        iter += 1
        
        if iter % 500 == 0:
            correct = 0
            total = 0
            
            for images, labels in test_loader:
                
                images = Variable(images.view(-1, seq_dim, input_dim))
                outputs = model(images)
                
                _, predicted = torch.max(outputs.data, 1)
                
                total += labels.size(0)
                
                correct += (predicted == labels).sum()
                
            accuracy = 100 * correct / total
            
            print('Iterations: {}, Loss: {}, Accuracy: {}'.format(iter, loss.data, accuracy.float()))

RNNModel(
  (rnn): RNN(28, 100, num_layers=2, batch_first=True)
  (fn): Linear(in_features=100, out_features=10, bias=True)
)
------------------------------------------------------------
10
torch.Size([100, 28])
torch.Size([100, 100])
torch.Size([100])
torch.Size([100])
torch.Size([100, 100])
torch.Size([100, 100])
torch.Size([100])
torch.Size([100])
torch.Size([10, 100])
torch.Size([10])
------------------------------------------------------------
Iterations: 500, Loss: 0.5385576486587524, Accuracy: 83.0
Iterations: 1000, Loss: 0.3958626985549927, Accuracy: 91.0
Iterations: 1500, Loss: 0.1738455891609192, Accuracy: 93.0
Iterations: 2000, Loss: 0.13413047790527344, Accuracy: 95.0
Iterations: 2500, Loss: 0.3107783794403076, Accuracy: 95.0
Iterations: 3000, Loss: 0.16378650069236755, Accuracy: 94.0


### Summary of Results

|     Model A        |     Model B        |   Model C          |
|--------------------|--------------------|--------------------|
|   ReLU             |  ReLU              |  TanH              |
|  1 Hidden Layer    |  2 Hidden Layers   |  2 Hidden Layers   |
|  100 Hidden Units  |  100 Hidden Units  |  100 Hidden Units  |
|     > 86 %         |      > 95 %        |       > 95 %       |

#### Deep Learning 

* 2 ways to expand a recurrent neural network
    * More non-linear activation units (neurons)
    * More Hidden Layers
* Cons
    * Need a Larger Dataset
        * Curse of Dimensionality
    * Does not necessarily mean higher accuracy
        

## Recurrent Neural Network with Pytorch (GPU)

#### Steps: 

<li> Step 1: Load Dataset
<li> Step 2: Make Dataset Iterable
<li> <b>Step 3: Create Model Class </b>
<li> Step 4: Instantiate Model Class
<li> Step 5: Instantiate Loss Class
<li> Step 6: Instantiate Optimizer Class
<li> Step 7: Train Model

In [4]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable


'''
STEP 1: LOAD DATASET
'''
train_dataset = dsets.MNIST(root = './data',
                            train = True,
                            transform = transforms.ToTensor(),
                            download = True)

test_dataset = dsets.MNIST(root = './data',
                           train = False,
                           transform = transforms.ToTensor())


'''
STEP 2: MAKE SATASET ITERABLE
'''
batch_size = 100
n_iters = 3000

num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                        batch_size=batch_size,
                                        shuffle=False)



'''
STEP 3: CREATE MODEL CLASS
'''
class RNNModel(nn.Module):
    
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(RNNModel, self).__init__()
        # Hidden Dimensions
        self.hidden_dim = hidden_dim
        
        # Number of hidden layers
        self.layer_dim = layer_dim
        
        # Building your RNN
        # batch_first = True causes input/output tensors to be of shape
        # (batch_dim, seq_dim, input_dim)
        self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, nonlinearity = 'tanh')
        
        # Readout layer
        self.fn = nn.Linear(hidden_dim, output_dim)

        
    def forward(self, x):
        # Initialize hidden state with zeroes
        if torch.cuda.is_available():
            h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).cuda())
        else:
            h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).cuda())
        
        # One time-step
        out, hn = self.rnn(x, h0)
        
        # Input hidden state of the last time step
        # out.size() ---> 100, 28, 100
        # out[:, -1, :] ---> 100, 100 ---> just want last time step hidden states!
        out = self.fn(out[:, -1, :])
        # out.size() ---> 100, 10 
        return out
   


'''
STEP 4: INSTANTIATE MODEL CLASS
'''
input_dim = 28
hidden_dim = 100
layer_dim = 2
output_dim = 10

model = RNNModel(input_dim, hidden_dim, layer_dim, output_dim)

if torch.cuda.is_available():
    model.cuda()

# PRINTING MODEL AND PARAMETERS
print(model)
print('-'*60)
print(len(list(model.parameters())))
for i in range(len(list(model.parameters()))):
    print(list(model.parameters())[i].size())
print('-'*60)


'''
STEP 5: INSTANTIATE LOSS CLASS
'''
criterion = nn.CrossEntropyLoss()


'''
STEP 6: INSTANTIATE OPTIMIZER CLASS
'''
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)



'''
STEP 7: TRAIN THE MODEL
'''
# Number of steps to unroll\
seq_dim = 28
iter = 0

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images = Variable(images.view(-1, seq_dim, input_dim).cuda())
            labels = Variable(labels.cuda())
        else:
            images = Variable(images.view(-1, seq_dim, input_dim))
            labels = Variable(labels)
        
        optimizer.zero_grad()
        
        outputs = model(images)
        
        loss = criterion(outputs, labels)
        
        loss.backward()
        
        optimizer.step()
        
        iter += 1
        
        if iter % 500 == 0:
            correct = 0
            total = 0
            
            for images, labels in test_loader:
                if torch.cuda.is_available():
                    images = Variable(images.view(-1, seq_dim, input_dim).cuda())
                else:
                    images = Variable(images.view(-1, seq_dim, input_dim))
                outputs = model(images)
                
                _, predicted = torch.max(outputs.data, 1)
                
                total += labels.size(0)
                if torch.cuda.is_available():
                    correct += (predicted.cpu() == labels.cpu()).sum()
                else:
                    correct += (predicted == labels).sum()
                
            accuracy = 100 * correct / total
            
            print('Iterations: {}, Loss: {}, Accuracy: {}'.format(iter, loss.data, accuracy.float()))

RNNModel(
  (rnn): RNN(28, 100, num_layers=2, batch_first=True)
  (fn): Linear(in_features=100, out_features=10, bias=True)
)
------------------------------------------------------------
10
torch.Size([100, 28])
torch.Size([100, 100])
torch.Size([100])
torch.Size([100])
torch.Size([100, 100])
torch.Size([100, 100])
torch.Size([100])
torch.Size([100])
torch.Size([10, 100])
torch.Size([10])
------------------------------------------------------------
Iterations: 500, Loss: 0.4747331142425537, Accuracy: 82.0
Iterations: 1000, Loss: 0.8292064666748047, Accuracy: 81.0
Iterations: 1500, Loss: 0.27577075362205505, Accuracy: 93.0
Iterations: 2000, Loss: 0.2377377301454544, Accuracy: 94.0
Iterations: 2500, Loss: 0.21858814358711243, Accuracy: 95.0
Iterations: 3000, Loss: 0.22211824357509613, Accuracy: 96.0


In [5]:
save_model = True
if save_model is True:
    # saving only params
    torch.save(model.state_dict(), 'Models/RNNPytorch.pkl')

-----