In [17]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

batchSize = 100
trainSet = torchvision.datasets.MNIST(root='./data', train = True, transform=transforms.ToTensor(), download=True)
trainLoader = torch.utils.data.DataLoader(dataset=trainSet, batch_size=batchSize, shuffle = True)
testSet = torchvision.datasets.MNIST(root='./data', train = False, transform=transforms.ToTensor(), download=True)
testLoader = torch.utils.data.DataLoader(dataset=testSet, batch_size=batchSize, shuffle = True)

In [19]:
class Model5_1(nn.Module):
    
    def __init__(self):
        self.inSize=28
        self.hiddenSize=100
        self.numLayers=2
        self.outSize = 10
        super(Model5_1, self).__init__()
        self.rnn = nn.RNN(self.inSize, self.hiddenSize, self.numLayers, batch_first=True)
        self.fc = nn.Linear(self.hiddenSize, self.outSize)
    
    def forward(self, x):
        h0 = torch.zeros(self.numLayers, x.size(0), self.hiddenSize)
        out, hn = self.rnn(x, h0)
        out = self.fc(out[:, -1, :]) 
        return out

model5_1 = Model5_1()

In [20]:
class Model5_1b(nn.Module):
    def __init__(self):
        super(Model5_1b, self).__init__()
        self.rnn = nn.RNN(28, 200, 2, batch_first=True)
        self.fc = nn.Linear(200, 10)
    
    def forward(self, x):
        h0 = torch.zeros(2, x.size(0), 200)
        out, hn = self.rnn(x, h0)
        out = self.fc(out[:, -1, :]) 
        return out

model5_1b = Model5_1b()

In [25]:
class Model5_1c(nn.Module):
    def __init__(self):
        super(Model5_1c, self).__init__()
        self.rnn = nn.RNN(28, 200, 3, batch_first=True)
        self.fc = nn.Linear(200, 10)
    
    def forward(self, x):
        h0 = torch.zeros(3, x.size(0), 200)
        out, hn = self.rnn(x, h0)
        out = self.fc(out[:, -1, :]) 
        return out

model5_1c = Model5_1c()

In [22]:
import torch.optim as optim
import time

def benchmark(trainLoader,testLoader, model, epochs=1, lr=0.01):
    model.__init__()
    start=time.time()
    optimiser = optim.SGD(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    for epoch in range(epochs):
        for i, (images, labels) in enumerate(trainLoader):
            optimiser.zero_grad()
            outputs = model(images.view(-1, 28,28))
            loss = criterion(outputs, labels)
            loss.backward()
            optimiser.step()
    print('Accuracy: {0:.4f}'.format(accuracy(testLoader,model)))
    print('Training time: {0:.2f}'.format(time.time() - start))
    
def accuracy(testLoader,model):    
    correct, total = 0, 0
    with torch.no_grad():
        for data in testLoader:
            images, labels = data
            outputs = model(images.view(-1, 28,28))
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()      
    return(correct / total)

In [24]:
benchmark(trainLoader,testLoader,model5_1, epochs=1, lr=0.1)
benchmark(trainLoader,testLoader,model5_1b, epochs=1, lr=0.1)
benchmark(trainLoader,testLoader,model5_1c, epochs=1, lr=0.1)

Accuracy: 0.8332
Training time: 46.85
Accuracy: 0.8948
Training time: 91.73


RuntimeError: Expected hidden size (3, 100, 200), got (2, 100, 200)

In [8]:
dataiter = iter(trainLoader)
images, labels = dataiter.next()
rnn = nn.RNN(28,100, 2, batch_first=True)
h0 = torch.zeros(2, images.size(0), 100)
output, hn = rnn(images.view(-1, 28,28), h0)

In [9]:
output.size()

torch.Size([100, 28, 100])

In [10]:
hn.size()

torch.Size([2, 100, 100])

In [11]:
images.size()

torch.Size([100, 1, 28, 28])

In [12]:
images.view(-1, 28,28).size()

torch.Size([100, 28, 28])

In [13]:
fc=nn.Linear(100,10)
output2=fc(output[:, -1, :])

In [14]:
output2.size()

torch.Size([100, 10])

In [15]:
output[:, -1, :].size()

torch.Size([100, 100])