# Rnn in image recognition

Reference: https://wizardforcel.gitbooks.io/learn-dl-with-pytorch-liaoxingyu/content/5.2.html

<div align="center">
  <img src="css/rnn_image_1.jpg" width="50%"/>
  <img src="css/rnn_image_2.jpg" width="50%"/>
</div>

In [4]:
import torch
from torch.autograd import Variable
from torch import nn
from torch.utils.data import DataLoader

from torchvision import transforms as tfs
from torchvision.datasets import MNIST

# torchvision.datasets
# https://www.aiworkbox.com/lessons/load-mnist-dataset-from-pytorch-torchvision

In [None]:
# 定义数据
data_tf = tfs.Compose([
    tfs.ToTensor(),
    tfs.Normalize([0.5], [0.5]) # 标准化
])

train_set = MNIST('./data', train=True, transform=data_tf, download=True)
test_set = MNIST('./data', train=False, transform=data_tf, download=True)

#train_data = DataLoader(train_set, 64, True, num_workers=4)
#test_data = DataLoader(test_set, 128, False, num_workers=4)

In [None]:
# 定义模型
# (seq, batch, feature)
# (seq=num_layers*num_direction, batch, feature=hidden_size )

class rnn_classify(nn.Module):
    def __init__(self, in_feature=28, hidden_feature=100, num_class=10, num_layers=2):
        '''
        in_feature=28, hidden_feature=100
        使用两层 lstm
        The final class = 10
        '''
        super(rnn_classify, self).__init__()
        self.rnn = nn.LSTM(in_feature, hidden_feature, num_layers)
        self.classifier = nn.Linear(hidden_feature, num_class) # 将最后一个 rnn 的输出使用全连接得到最后的分类结果

    def forward(self, x):
        '''
        x 大小为 (batch, 1, 28, 28)，所以我们需要将其转换成 RNN 的输入形式，即 (28, batch, 28)
        '''
        x = x.squeeze() # 去掉 (batch, 1, 28, 28) 中的 1，变成 (batch, 28, 28)
        x = x.permute(2, 0, 1) # 将最后一维放到第一维，变成 (28, batch, 28)
        out, _ = self.rnn(x) # 使用默认的隐藏状态，得到的 out 是 (28, batch, hidden_feature)
        out = out[-1, :, :] # 取序列中的最后一个，大小是 (batch, hidden_feature)
        out = self.classifier(out) # 得到分类结果
        return out
    

net = rnn_classify()

criterion = nn.CrossEntropyLoss()
optimzier = torch.optim.Adadelta(net.parameters(), 1e-1)


# squeeze and unsqueeze

In [18]:
data = torch.tensor(
    [[[0, 1, 2],
      [3, 4, 5],
      [6, 7, 8],]] 
)

print('Shape:', data.shape)
print("")

# squeeze()
squeeze_data = data.squeeze(0)
print('squeeze data:', squeeze_data)
print('squeeze(0) shape:', squeeze_data.shape)

Shape: torch.Size([1, 3, 3])

squeeze data: tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])
squeeze(0) shape: torch.Size([3, 3])


In [19]:
data = torch.tensor([
    [[0, 1, 2],
     [3, 4, 5],
     [6, 7, 8],]
])

print('Shape:', data.shape)
print("")


# unsqueeze()
unsqueeze_data = data.unsqueeze(0)
print('unsqueeze data:', unsqueeze_data)
print('unsqueeze(0) shape:', unsqueeze_data.shape)

Shape: torch.Size([1, 3, 3])

unsqueeze data: tensor([[[[0, 1, 2],
          [3, 4, 5],
          [6, 7, 8]]]])
unsqueeze(0) shape: torch.Size([1, 1, 3, 3])


# View

https://pytorch.org/docs/stable/tensors.html?highlight=view#torch.Tensor.view

**view(*shape) → Tensor**    
- Returns a new tensor with the same data as the self tensor but of a different shape.

In [21]:
x = torch.randn(4, 4)
x.size()

torch.Size([4, 4])

In [22]:
y = x.view(16)
y.size()

torch.Size([16])

In [23]:
z = x.view(-1, 8)  # the size -1 is inferred from other dimensions
z.size()

torch.Size([2, 8])

In [24]:
a = torch.randn(1, 2, 3, 4)
a.size()

torch.Size([1, 2, 3, 4])

In [25]:
b = a.transpose(1, 2)  # Swaps 2nd and 3rd dimension
b.size()

torch.Size([1, 3, 2, 4])

In [26]:
c = a.view(1, 3, 2, 4)  # Does not change tensor layout in memory
c.size()

torch.Size([1, 3, 2, 4])

In [27]:
torch.equal(b, c)

False