In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import torchvision
import torchvision.datasets as dsets
import torchvision.transforms as transforms


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
use_cuda = True
device = torch.device('cuda:4' if use_cuda else 'cpu')

# 1、data loader

Loading the MNIST data

In [3]:
dataMNIST_train = dsets.MNIST(
    root = 'data',
    train = True,
    download = True,
    transform = transforms.ToTensor()
)
dataMNIST_test = dsets.MNIST(
    root = 'data',
    train = False,
    download = True,
    transform = transforms.ToTensor()
)

In [10]:
dataLoaderMNIST_train = torch.utils.data.DataLoader(
    dataset = dataMNIST_train,
    batch_size = 128,
    shuffle = True,
)
dataLoaderMNIST_test = torch.utils.data.DataLoader(
    dataset = dataMNIST_test,
    batch_size = 128,
    shuffle = True,
)

In [11]:
dataMNIST_train

Dataset MNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor()

In [12]:
dataMNIST_test

Dataset MNIST
    Number of datapoints: 10000
    Root location: data
    Split: Test
    StandardTransform
Transform: ToTensor()

In [15]:
x,y = iter(dataLoaderMNIST_train).next()

In [8]:
# (batch_size, channel, height, width)
# RNN input data shape:(batch_size, seq_size, input_size)
x.shape,x.squeeze(1).shape

(torch.Size([128, 1, 28, 28]), torch.Size([128, 28, 28]))

In [9]:
x = x.squeeze(1)

In [10]:
#(batch_size)
y.shape

torch.Size([128])

# 2、RNN model

In [11]:
class modelLSTM(nn.Module):
    def __init__(self, input_size = 28, hidden_size = 32, num_layers = 1):
        super(modelLSTM, self).__init__()
        self.lstm = nn.LSTM(
            input_size = input_size,
            hidden_size = hidden_size,
            num_layers = num_layers,
            batch_first = True,
        )
        self.linear = nn.Linear(
            in_features = hidden_size*input_size, 
            out_features = 10
        )
    
    def forward(self, x, state = None):
        x = x.view(-1,x.shape[-2],x.shape[-1])
        y, next_state = self.lstm(x, state)
        y = y.contiguous().view(x.shape[0],-1) # contiguous operation
        y = self.linear(y)
        y = F.softmax(y,dim = 1)
        return y,next_state


In [12]:
class modelGRU(nn.Module):
    def __init__(self, input_size = 28, hidden_size = 32, num_layers = 1):
        super(modelGRU, self).__init__()
        self.gru = nn.GRU(
            input_size = input_size,
            hidden_size = hidden_size,
            num_layers = num_layers,
            batch_first = True,
        )
        self.linear = nn.Linear(
            in_features = hidden_size*input_size, 
            out_features = 10
        )
    
    def forward(self, x, state = None):
        x = x.view(-1,x.shape[-2],x.shape[-1])
        y, next_state = self.gru(x, state)
        y = y.contiguous().view(x.shape[0],-1) # contiguous operation
        y = self.linear(y)
        y = F.softmax(y,dim = 1)
        return y,next_state


In [13]:
mylstm = modelLSTM()
mylstm.cpu()

modelLSTM(
  (lstm): LSTM(28, 32, batch_first=True)
  (linear): Linear(in_features=896, out_features=10, bias=True)
)

In [14]:
mygru = modelGRU()
mygru.cpu()

modelGRU(
  (gru): GRU(28, 32, batch_first=True)
  (linear): Linear(in_features=896, out_features=10, bias=True)
)

In [15]:
x = torch.randn(32,1,28,28)
y,state = mylstm(x)
y,state = mygru(x)

# 3、train_data

In [16]:
optimizerLSTM = optim.Adam(mylstm.parameters(),lr = 0.001,)
optimizerGRU = optim.Adam(mygru.parameters(),lr = 0.001,)
loss_func = nn.CrossEntropyLoss()

In [17]:
mylstm.to(device)
mygru.to(device)

modelGRU(
  (gru): GRU(28, 32, batch_first=True)
  (linear): Linear(in_features=896, out_features=10, bias=True)
)

In [18]:
%%time
mylstm.train()
for epoch in range(5):
    for step,(x,y) in enumerate(dataLoaderMNIST_train):
        x = x.cuda(device)
        y = y.cuda(device)
        y_,state = mylstm(x)
        loss = loss_func(y_,y)
        
        optimizerLSTM.zero_grad()
        loss.backward()
        optimizerLSTM.step()

        print('\repoch:{epoch:3}--step:{step:5}--loss:{loss:.4}'.format(epoch = epoch, step=step, loss=loss),end = '')
    acc = 0
    for _,(x,y) in enumerate(dataLoaderMNIST_test):
        x = x.cuda(device)
        y = y.cuda(device)
        y_,state = mylstm(x)
        acc += torch.sum(y_.argmax(1) == y)
    print('\repoch:{epoch:3}--step:{step:5}--loss:{loss:.4}--acc:{acc:.4}%-----'.format(epoch = epoch, step=step, loss=loss, acc = acc/10000*100))
    print()

epoch:  0--step:  468--loss:1.539--acc:93.39%-----

epoch:  1--step:  468--loss:1.506--acc:95.61%-----

epoch:  2--step:  468--loss:1.489--acc:96.27%-----

epoch:  3--step:  468--loss:1.474--acc:96.72%-----

epoch:  4--step:  468--loss:1.474--acc:96.86%-----

CPU times: user 55 s, sys: 1.66 s, total: 56.7 s
Wall time: 55.7 s


In [19]:
%%time
mygru.train()
for epoch in range(5):
    for step,(x,y) in enumerate(dataLoaderMNIST_train):
        x = x.cuda(device)
        y = y.cuda(device)
        y_,state = mygru(x)
        loss = loss_func(y_,y)
        
        optimizerGRU.zero_grad()
        loss.backward()
        optimizerGRU.step()

        print('\repoch:{epoch:3}--step:{step:5}--loss:{loss:.4}'.format(epoch = epoch, step=step, loss=loss),end = '')
    acc = 0
    for _,(x,y) in enumerate(dataLoaderMNIST_test):
        x = x.cuda(device)
        y = y.cuda(device)
        y_,state = mygru(x)
        acc += torch.sum(y_.argmax(1) == y)
    print('\repoch:{epoch:3}--step:{step:5}--loss:{loss:.4}--acc:{acc:.4}%-----'.format(epoch = epoch, step=step, loss=loss, acc = acc/10000*100))
    print()

epoch:  0--step:  468--loss:1.58--acc:93.05%-----

epoch:  1--step:  468--loss:1.493--acc:95.55%-----

epoch:  2--step:  468--loss:1.486--acc:96.2%-----

epoch:  3--step:  468--loss:1.505--acc:96.92%-----

epoch:  4--step:  468--loss:1.511--acc:97.22%-----

CPU times: user 52.4 s, sys: 1.31 s, total: 53.8 s
Wall time: 52.9 s


In [23]:
FPS_LSTM = 70000*5/55.7
FPS_GRU = 70000*5/52.9
FPS_LSTM,FPS_GRU

(6283.662477558348, 6616.257088846881)

In [25]:
(FPS_GRU-FPS_LSTM)/FPS_LSTM*100

5.293005671077513