In [22]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dataset
from torch.autograd import Variable
from torch.nn import Parameter
from torch import Tensor
import torch.nn.functional as F
from torch.utils.data import DataLoader
import math

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
cuda = True if torch.cuda.is_available() else False

Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

torch.manual_seed(125)
if torch.cuda.is_available():
  torch.cuda.manual_seed_all(125)


In [23]:
import torchvision.transforms as transforms

mnist_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, ), (1.0, ))])

In [24]:
from torchvision.datasets import MNIST

download_root = "/content/drive/MyDrive/딥러닝 파이토치 교과서/"

train_dataset = MNIST(download_root, transform=mnist_transform, train=True, download=True)

valid_dataset = MNIST(download_root, transform=mnist_transform, train=False, download=True)

test_dataset = MNIST(download_root, transform=mnist_transform, train=False, download=True)

In [25]:
batch_size = 64

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

valid_loader = DataLoader(dataset=valid_dataset, batch_size=batch_size, shuffle=True)

test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

In [26]:
batch_size = 100

n_iters = 6000

num_epochs = int(n_iters / (len(train_dataset) / batch_size))

In [27]:
class LSTMCell(nn.Module):
  def __init__(self, input_size, hidden_size, bias=True):
    super().__init__()
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.bias = bias
    self.x2h = nn.Linear(input_size, 4 * hidden_size, bias=bias)
    self.h2h = nn.Linear(hidden_size, 4 * hidden_size, bias=bias)
    self.reset_parameters()

  def reset_parameters(self):
    std = 1.0 / math.sqrt(self.hidden_size)
    for w in self.parameters():
      w.data.uniform_(-std, std)

  def forward(self, x, hidden):
    hx, cx = hidden
    x = x.view(-1, x.size(1))

    gates = self.x2h(x) + self.h2h(hx)
    gates = gates.squeeze()
    ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)

    ingate = F.sigmoid(ingate)
    forgetgate = F.sigmoid(forgetgate)
    cellgate = F.tanh(cellgate)
    outgate = F.sigmoid(outgate)

    cy = torch.mul(cx, forgetgate) + torch.mul(ingate, cellgate)
    hy = torch.mul(outgate, F.tanh(cy))

    return (hy, cy)

In [28]:
class LSTMModel(nn.Module):
  def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, bias=True):
    super().__init__()
    self.hidden_dim = hidden_dim
    self.layer_dim = layer_dim
    self.lstm = LSTMCell(input_dim, hidden_dim, layer_dim)
    self.fc = nn.Linear(hidden_dim, output_dim)

  def forward(self, x):
    if torch.cuda.is_available():
      h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).cuda())
    else:
      h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))

    if torch.cuda.is_available():
      c0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).cuda())
    else:
      c0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))

    outs = []
    cn = c0[0, :, :]
    hn = h0[0, :, :]

    for seq in range(x.size(1)):
      hn, cn = self.lstm(x[:, seq, :], (hn, cn))
      outs.append(hn)

    out = outs[-1].squeeze()
    out = self.fc(out)
    return out

In [29]:
input_dim = 28
hidden_dim = 128
layer_dim = 1
output_dim = 10

model = LSTMModel(input_dim, hidden_dim, layer_dim, output_dim)

if torch.cuda.is_available():
  model.cuda()

criterion = nn.CrossEntropyLoss()
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [30]:
seq_dim = 28
loss_list = []
iter = 0

for epoch in range(num_epochs):
  for i, (images, labels) in enumerate(train_loader):
    images = Variable(images.view(-1, seq_dim, input_dim).cuda())
    labels = Variable(labels.cuda())

    optimizer.zero_grad()
    outputs = model(images)
    loss = criterion(outputs, labels).cuda()

    loss.backward()
    optimizer.step()
    loss_list.append(loss.item())
    iter += 1

    if iter % 100 == 0:
      correct = 0
      total = 0
      for images, labels in valid_loader:
        images = Variable(images.view(-1, seq_dim, input_dim).cuda())
        labels = Variable(labels.cuda())
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)

        total += labels.size(0)
        correct += (predicted.cpu() == labels.cpu()).sum()

      accuracy = 100 * correct / total

      print(f'Iteration: {iter} Loss: {loss.item()} Accuracy: {accuracy}')



Iteration: 100 Loss: 2.293569803237915 Accuracy: 11.350000381469727
Iteration: 200 Loss: 2.3025078773498535 Accuracy: 11.550000190734863
Iteration: 300 Loss: 2.293407917022705 Accuracy: 15.260000228881836
Iteration: 400 Loss: 2.299330711364746 Accuracy: 15.5600004196167
Iteration: 500 Loss: 2.237457513809204 Accuracy: 21.420000076293945
Iteration: 600 Loss: 1.948909044265747 Accuracy: 33.18000030517578
Iteration: 700 Loss: 1.719103217124939 Accuracy: 47.04999923706055
Iteration: 800 Loss: 1.2479084730148315 Accuracy: 58.810001373291016
Iteration: 900 Loss: 0.915291428565979 Accuracy: 61.63999938964844
Iteration: 1000 Loss: 0.7579075694084167 Accuracy: 70.6500015258789
Iteration: 1100 Loss: 0.5575475692749023 Accuracy: 81.08999633789062
Iteration: 1200 Loss: 0.6163272261619568 Accuracy: 83.12999725341797
Iteration: 1300 Loss: 0.387583464384079 Accuracy: 84.80999755859375
Iteration: 1400 Loss: 0.37117958068847656 Accuracy: 88.19000244140625
Iteration: 1500 Loss: 0.44921135902404785 Accur

In [35]:
def evaluate(model, val_iter):
  corrects, total, total_loss = 0, 0, 0
  model.eval()
  for images, labels in val_iter:
    images = images.view(-1, seq_dim, input_dim).cuda()
    labels = labels.cuda()

    logit = model(images).cuda()
    loss = F.cross_entropy(logit, labels, reduction='sum')
    _, predicted = torch.max(logit.data, 1)
    total += labels.size(0)
    total_loss += loss.item()
    corrects += (predicted == labels).sum()

  avg_loss = total_loss / len(val_iter.dataset)
  avg_accuracy = corrects / total
  return avg_loss, avg_accuracy


In [36]:
test_loss, test_acc = evaluate(model, test_loader)
print(f'Test Loss : {test_loss:.2f} | Test Accuracy: {test_acc:.2f}')



Test Loss : 0.07 | Test Accuracy: 0.98
