In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable
from torch.nn import Parameter
from torch import Tensor
import math
import torch.nn.functional as F
import matplotlib.pyplot as plt

cuda = True if torch.cuda.is_available() else False
    
Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor    

torch.manual_seed(125)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(125)
    

In [2]:
class LSTMCell(nn.Module):
    """
    An implementation of Hochreiter & Schmidhuber:
    'Long-Short Term Memory' cell.    c0 = Variable(torch.zeros(layer_dim, x.size(0), hidden_dim))
    http://www.bioinf.jku.at/publications/older/2604.pdf
    """
    def __init__(self, input_size, hidden_size, bias=True):
        super(LSTMCell, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.bias = bias
        self.x2h = nn.Linear(input_size, 4 * hidden_size, bias=bias)
        self.h2h = nn.Linear(hidden_size, 4 * hidden_size, bias=bias)
        self.reset_parameters()
    '''
    STEP 3: CREATE MODEL CLASS
    '''
    def reset_parameters(self):
        std = 1.0 / math.sqrt(self.hidden_size)
        for w in self.parameters():
            w.data.uniform_(-std, std)

    def forward(self, x, hidden):
        hx, cx = hidden
        x = x.view(-1, x.size(1))
        gates = self.x2h(x) + self.h2h(hx)
        gates = gates.squeeze()
        ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)
        ingate = F.sigmoid(ingate)
        forgetgate = F.sigmoid(forgetgate)
        cellgate = F.tanh(cellgate)
        outgate = F.sigmoid(outgate)
        cy = torch.mul(cx, forgetgate) +  torch.mul(ingate, cellgate)
        hy = torch.mul(outgate, F.tanh(cy))

        return (hy, cy)

In [3]:
'''
STEP 1: LOADING DATASET
'''
train_dataset = dsets.MNIST(root='./data', 
                            train=True, 
                            transform=transforms.ToTensor(),
                            download=True)
 
test_dataset = dsets.MNIST(root='./data', 
                           train=False, 
                           transform=transforms.ToTensor())
 
batch_size = 100
n_iters = 6000
num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)
 
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

'''
STEP 4: INSTANTIATE MODEL CLASS
'''
input_dim = 28
hidden_dim = 128
layer_dim = 2  # ONLY CHANGE IS HERE FROM ONE LAYER TO TWO LAYER
output_dim = 10
seq_dim = 28

In [4]:
for i, (images, labels) in enumerate(train_loader):
    if torch.cuda.is_available():
    #    images = Variable(images.view(-1, seq_dim, input_dim).cuda())
    #    labels = Variable(labels.cuda())
    #else:
        images = Variable(images.view(-1, seq_dim, input_dim))
        labels = Variable(labels)
    break

In [5]:
images.shape

torch.Size([100, 28, 28])

In [6]:
x = images
layer_dim = 1
hidden_dim = 128
h0 = Variable(torch.zeros(layer_dim, x.size(0), hidden_dim))
c0 = Variable(torch.zeros(layer_dim, x.size(0), hidden_dim))

outs = []
cn = c0[0,:,:]
hn = h0[0,:,:]

In [7]:
print(cn.shape)
print(hn.shape)

torch.Size([100, 128])
torch.Size([100, 128])


In [8]:
alstm = LSTMCell(input_dim, hidden_dim, layer_dim)
afc = nn.Linear(hidden_dim, output_dim)

In [17]:
outs = []
for seq in range(x.size(1)):
    hn, cn = alstm.forward(x[:,seq,:], (hn,cn))
    outs.append(hn)

out = outs[-1].squeeze()
#out = self.fc(out)
# out.size() --> 100, 10

In [18]:
out.shape

torch.Size([100, 128])

In [19]:
len(outs)

28

In [24]:
outs[0].shape

torch.Size([100, 128])

In [20]:
out[-1].shape

torch.Size([128])

In [22]:
out

tensor([[-0.0542, -0.0071,  0.0139,  ...,  0.0089, -0.0030,  0.0079],
        [-0.0630, -0.0346, -0.0046,  ...,  0.0293,  0.0240,  0.0149],
        [-0.0510, -0.0201, -0.0014,  ...,  0.0275,  0.0308,  0.0358],
        ...,
        [-0.0551, -0.0081,  0.0137,  ...,  0.0086, -0.0017,  0.0067],
        [-0.0561, -0.0069,  0.0146,  ...,  0.0116,  0.0002,  0.0108],
        [-0.0641, -0.0427, -0.0079,  ...,  0.0442,  0.0315,  0.0387]],
       grad_fn=<SqueezeBackward0>)

In [23]:
outs[-1]

tensor([[-0.0542, -0.0071,  0.0139,  ...,  0.0089, -0.0030,  0.0079],
        [-0.0630, -0.0346, -0.0046,  ...,  0.0293,  0.0240,  0.0149],
        [-0.0510, -0.0201, -0.0014,  ...,  0.0275,  0.0308,  0.0358],
        ...,
        [-0.0551, -0.0081,  0.0137,  ...,  0.0086, -0.0017,  0.0067],
        [-0.0561, -0.0069,  0.0146,  ...,  0.0116,  0.0002,  0.0108],
        [-0.0641, -0.0427, -0.0079,  ...,  0.0442,  0.0315,  0.0387]],
       grad_fn=<MulBackward0>)