### Use CNN and LSTM to train a classifier on MNIST dataset

In [1]:
# Mount to Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# Define Project Folder
FOLDERNAME = 'Colab\ Notebooks/SC201_L16'

%cd drive/MyDrive/$FOLDERNAME

Mounted at /content/drive
/content/drive/MyDrive/Colab Notebooks/SC201_L16


In [2]:
%ls

[0m[01;34m__MACOSX[0m/                model_RNN.py              [01;34m__pycache__[0m/
MNIST_CNN_pytorch.ipynb  my_pytorch_project.ipynb  [01;34mtrain[0m/
model_CNN.py             NLP.ipynb                 [01;34mval[0m/


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader

import torchvision.datasets as dset
import torchvision.transforms as T

from model_CNN import MyCNN
from model_RNN import MyRNN

In [4]:
# Define device
if torch.cuda.is_available():
  device = torch.device('cuda')
else:
  device = torch.device('cpu')
print('Device:', device)

Device: cuda


In [5]:
# Load Existing Dataset
transform = T.ToTensor()
train_data = dset.MNIST('./train', train=True, download=True, transform=transform)
val_data = dset.MNIST('./val', train=False, download=True, transform=transform)

In [6]:
# Check Data Dimension
print(train_data)
print(val_data)

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./train
    Split: Train
    StandardTransform
Transform: ToTensor()
Dataset MNIST
    Number of datapoints: 10000
    Root location: ./val
    Split: Test
    StandardTransform
Transform: ToTensor()


In [7]:
print(train_data[0][0])
print(train_data[0][1])
print(train_data[0][0].shape)

tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,

In [8]:
num_train = len(train_data)
num_val = len(val_data)
print('Number of training:', num_train)
print('Number of validation:', num_val)

Number of training: 60000
Number of validation: 10000


In [9]:
# Build Model
model = None

# model = nn.Sequential(
#     # N x 1 x 28 x 28
#     nn.Conv2d(1, 64, 3, 1, 1),
#     nn.BatchNorm2d(64),
#     nn.ReLU(),
#     nn.MaxPool2d(2, 2),

#     # N x 64 x 14 x 14
#     nn.Flatten(),
#     nn.Linear(in_features=64*14*14, out_features=10)
# )

model = MyCNN()

# model = MyRNN(input_size=28, hidden_size=64, output_size=10)

In [10]:
# Move model to GPU
model = model.to(device)

In [11]:
# Create Mini-batches
mini_trains = DataLoader(train_data, batch_size=128, shuffle=True)
mini_vals = DataLoader(val_data, batch_size=128, shuffle=True)

In [12]:
# Training Procedure
def train(num_epoch, model, mini_trains, mini_vals, device, loss_function, optimizer):
  for epoch in range(num_epoch):
    for counter, (x, y) in enumerate(mini_trains):
      model.train()
      x = x.to(device)
      y = y.to(device)
      try:
        scores = model(x)
      except:
        x = x.squeeze()  #RNN需降維(4轉3)
        scores = model(x)

      loss = loss_function(scores, y)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      if counter % 50 == 0:
        print(f'Epoch {epoch+1}: {loss.item()}', end='/')
        evaluate(model, mini_vals, device)

In [13]:
# Validating Procedure
def evaluate(model, mini_vals, device):
  model.eval()
  with torch.no_grad():
    acc_count = 0
    for x, y in mini_vals:
      x = x.to(device)
      y = y.to(device)
      try:
        scores = model(x)
      except:
        x = x.squeeze()  #RNN需降維(4轉3)
        scores = model(x)

      predictions=scores.max(1)[1]
      acc = predictions.eq(y).sum().item()
      acc_count += acc
    print(f'Acc: {acc_count/num_val}')

In [14]:
# Define loss function & optimizer
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [15]:
# Start training 
train(1, model, mini_trains, mini_vals, device, loss_function, optimizer)

Epoch 1: 2.515462636947632/Acc: 0.1086
Epoch 1: 0.5479617714881897/Acc: 0.9368
Epoch 1: 0.0985318049788475/Acc: 0.9611
Epoch 1: 0.15534746646881104/Acc: 0.9709
Epoch 1: 0.08903870731592178/Acc: 0.975
Epoch 1: 0.092035673558712/Acc: 0.9747
Epoch 1: 0.10163386911153793/Acc: 0.9805
Epoch 1: 0.15832054615020752/Acc: 0.979
Epoch 1: 0.04604435712099075/Acc: 0.9808
Epoch 1: 0.08638959378004074/Acc: 0.9795


In [17]:
# 實際預測 20 筆資料
predictions = []
for i in range(0,20):
    data, target = val_data[i][0], val_data[i][1]
    try:
        data = data.to(device)
        output = torch.argmax(model(data), axis=1)
    except:
        data = data.reshape(1, *data.shape).to(device)
        output = torch.argmax(model(data), axis=1)
    predictions.append(str(output.item()))

# 比對
print('actual    :', val_data.targets[0:20].numpy())
print('prediction: ', ' '.join(predictions))

actual    : [7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]
prediction:  7 2 8 0 4 8 4 9 5 9 0 6 9 0 8 5 9 7 3 4
