# RNN

In [4]:
import torch

In [2]:
seq = torch.arange(1., 16.)

print(type(seq))
print(seq)
print(seq.size())

<class 'torch.Tensor'>
tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13., 14.,
        15.])
torch.Size([15])


In [3]:
# Number of previous data points to be taken in account
seq_length = 5
batch_size = len(seq) // seq_length
# Number of features
input_size = 1

In [4]:
X = seq.view(batch_size, seq_length, input_size)

print(X.size())

torch.Size([3, 5, 1])


In [11]:
X

tensor([[[ 1.],
         [ 2.],
         [ 3.],
         [ 4.],
         [ 5.]],

        [[ 6.],
         [ 7.],
         [ 8.],
         [ 9.],
         [10.]],

        [[11.],
         [12.],
         [13.],
         [14.],
         [15.]]])

In [7]:
import torch.nn as nn

In [6]:
# Number of features in hidden state
hidden_size = 10
# Number of RNN layers stacked
num_layers = 1

In [7]:
singleRNN = nn.RNN(
    input_size=input_size,
    hidden_size=hidden_size,
    num_layers=num_layers,
    nonlinearity='tanh',
    batch_first=True,
    dropout=0,
    bidirectional=False
)

In [8]:
y, h = singleRNN(X)

print(y.size())    # (batch_size, seq_length, hidden_size * num_directions)
print(h.size())    # (num_layers * num_directions, batch_size, hidden_size)

torch.Size([3, 5, 10])
torch.Size([1, 3, 10])


In [9]:
y

tensor([[[ 0.1261, -0.0305,  0.4943, -0.6666,  0.3857,  0.5619, -0.2774,
          -0.0452,  0.0134, -0.6958],
         [-0.1222, -0.6567,  0.8285, -0.8451,  0.5680,  0.7884, -0.1008,
          -0.1422, -0.1972, -0.7993],
         [-0.3199, -0.8624,  0.9120, -0.8545,  0.7633,  0.8909, -0.2065,
          -0.1905,  0.0501, -0.8643],
         [-0.3947, -0.9080,  0.9570, -0.9121,  0.8698,  0.9438, -0.3571,
          -0.0884,  0.2237, -0.9117],
         [-0.4424, -0.9282,  0.9781, -0.9494,  0.9312,  0.9703, -0.4367,
          -0.0158,  0.3264, -0.9463]],

        [[-0.3291, -0.6548,  0.9654, -0.9758,  0.9630,  0.9619, -0.5394,
           0.3976,  0.7301, -0.9795],
         [-0.5235, -0.9409,  0.9940, -0.9836,  0.9821,  0.9902, -0.5874,
           0.1473,  0.4197, -0.9811],
         [-0.5839, -0.9645,  0.9967, -0.9903,  0.9899,  0.9949, -0.6014,
           0.2097,  0.6633, -0.9900],
         [-0.6343, -0.9723,  0.9983, -0.9944,  0.9945,  0.9972, -0.6865,
           0.3081,  0.7437, -0.9939],

In [10]:
[ (name, param.shape ) for name, param in singleRNN.named_parameters()]

[('weight_ih_l0', torch.Size([10, 1])),
 ('weight_hh_l0', torch.Size([10, 10])),
 ('bias_ih_l0', torch.Size([10])),
 ('bias_hh_l0', torch.Size([10]))]

## Image Classification with RNN

In [1]:
import torchvision
import torchvision.transforms as transforms

In [2]:
transform = transforms.Compose([
    transforms.ToTensor()
])

trainset = torchvision.datasets.MNIST(root='./mnist', train=True, download=True, transform=transform)
testset = torchvision.datasets.MNIST(root='./mnist', train=False, transform=transform)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./mnist/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./mnist/MNIST/raw/train-images-idx3-ubyte.gz to ./mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./mnist/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./mnist/MNIST/raw/train-labels-idx1-ubyte.gz to ./mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./mnist/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./mnist/MNIST/raw/t10k-images-idx3-ubyte.gz to ./mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./mnist/MNIST/raw



In [5]:
batch_size = 1000
num_workers = 0

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

In [8]:
class ImageRNN(nn.Module):
    def __init__(self, batch_size, seq_length, input_size, hidden_size, num_layers, num_classes):
        super().__init__()
        self.batch_size = batch_size
        self.seq_length = seq_length
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.num_classes = num_classes
        
        self.rnn = nn.RNN(self.input_size, self.hidden_size, self.num_layers, batch_first=True)
        self.fc = nn.Linear(self.hidden_size * self.seq_length, self.num_classes)

    def forward(self, x, h0):
        x = x.view(-1, 28, 28)    # (batch_size, channel, width, height) --> (batch_size, width as seq_length, height * channel as feature)
        out, _ = self.rnn(x, h0)    # (batch_size, seq_length, num_directions * hidden_size)
        out = out.reshape(-1, (self.seq_length * self.hidden_size))    # (batch, seq_length * num_directions * hidden_size)
        outputs = self.fc(out)    # (batch_size, num_classes)
        return outputs

In [9]:
import torch.optim as optim

In [10]:
seq_length = 28
input_size = 28
hidden_size = 50
num_layers = 1
num_classes = 10

In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [12]:
model = ImageRNN(batch_size, seq_length, input_size, hidden_size, num_layers, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [13]:
epochs = 10

model.train()
for epoch in range(epochs):
    train_loss = 0
    train_correct = 0

    for x, y in trainloader:
        x, y = x.to(device), y.to(device)
        h0 = torch.zeros(num_layers, batch_size, hidden_size).to(device)    # (num_layers * num_directions, batch_size, hidden_size)

        optimizer.zero_grad()
        outputs = model(x, h0)
        loss = criterion(outputs, y)
                
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        _, predicted = outputs.max(1)
        train_correct += predicted.eq(y).sum().item()
        
    train_loss = train_loss / len(trainloader)
    train_acc = train_correct / len(trainset)
        
    print('[%2d] TRAIN loss: %.4f, acc: %.4f' % (epoch + 1, train_loss, train_acc))

[ 1] TRAIN loss: 0.9446, acc: 0.7599
[ 2] TRAIN loss: 0.3634, acc: 0.8922
[ 3] TRAIN loss: 0.2920, acc: 0.9135
[ 4] TRAIN loss: 0.2474, acc: 0.9265
[ 5] TRAIN loss: 0.2140, acc: 0.9370
[ 6] TRAIN loss: 0.1869, acc: 0.9449
[ 7] TRAIN loss: 0.1671, acc: 0.9516
[ 8] TRAIN loss: 0.1514, acc: 0.9558
[ 9] TRAIN loss: 0.1373, acc: 0.9594
[10] TRAIN loss: 0.1260, acc: 0.9630


In [14]:
test_loss = 0
test_correct = 0
test_preds = []

model.eval()
with torch.no_grad():
    for x, y in testloader:
        x, y = x.to(device), y.to(device)
        h0 = torch.zeros(num_layers, batch_size, hidden_size).to(device)

        outputs = model(x, h0)
        loss = criterion(outputs, y)
        
        test_loss += loss.item()
        _, predicted = outputs.max(1)
        test_correct += predicted.eq(y).sum().item()
        
        test_preds.extend(predicted.tolist())
        
print('TEST loss: %.4f, acc: %.4f' % (test_loss/len(testloader), test_correct/len(testset)))

TEST loss: 0.1146, acc: 0.9659
