<a href="https://colab.research.google.com/github/Seouyang/Today-I-Learned/blob/master/CNN_batchnormalization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Improving CNN using batch normalization

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms
from torchsummary import summary

from tqdm import tqdm
import time 
from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt

In [2]:
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())

trainloader = torch.utils.data.DataLoader(trainset,batch_size=200, shuffle=True)
testloader = torch.utils.data.DataLoader(testset,batch_size=200, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw
Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [3]:
class CNN_BN(nn.Module):
  def __init__(self):
    super(CNN_BN, self).__init__()
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=(5, 5))
    self.bn1 = nn.BatchNorm2d(32)
    self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
    self.conv2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5)
    self.bn2 = nn.BatchNorm2d(32)
    self.fc1 = nn.Linear(in_features=512, out_features=128)
    self.bn3 = nn.BatchNorm1d(128)
    self.fc2 = nn.Linear(in_features=128, out_features=10)
  
  def forward(self, x):
    batchsize = x.size(0)
    x = self.pool(F.relu(self.bn1(self.conv1(x))))
    x = self.pool(F.relu(self.bn2(self.conv2(x))))
    x = x.view(batchsize, -1)
    x = F.relu(self.bn3(self.fc1(x)))
    out = self.fc2(x)
    return out

In [9]:
def train(model, n_epoch, loader, optimizer, criterion, device="cpu"):
  model.train()
  for epoch in tqdm(range(n_epoch)):
    running_loss = 0.0
    for i, data in enumerate(loader, 0):
      images, labels = data
      images = images.to(device)
      labels = labels.to(device)
      optimizer.zero_grad()

      outputs = model(images)
      loss = criterion(input=outputs, target=labels)
      loss.backward()
      optimizer.step()
      running_loss += loss.item()
    print('Epoch {}, loss = {:.3f}'.format(epoch, running_loss/len(loader)))
  print('Training Finished')

In [10]:
def evaluate(model, loader, device="cpu"):
  model.eval()
  total=0
  correct=0
  with torch.no_grad():
    for data in loader:
      images, labels = data
      images = images.to(device)
      labels = labels.to(device)
      outputs = model(images)
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted==labels).sum().item()
    
  acc = 100*correct/total
  return acc

In [11]:
cnn_bn_model = CNN_BN().to("cuda")
optimizer = optim.SGD(params=cnn_bn_model.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()
train(model=cnn_bn_model, n_epoch=15, loader=trainloader, optimizer=optimizer, criterion=criterion, device="cuda")
acc = evaluate(cnn_bn_model, testloader, device="cuda")
print('Test accuracy: {:.2f}%'.format(acc))

  7%|▋         | 1/15 [00:06<01:27,  6.23s/it]

Epoch 0, loss = 0.657


 13%|█▎        | 2/15 [00:12<01:19,  6.12s/it]

Epoch 1, loss = 0.204


 20%|██        | 3/15 [00:18<01:13,  6.13s/it]

Epoch 2, loss = 0.134


 27%|██▋       | 4/15 [00:24<01:06,  6.05s/it]

Epoch 3, loss = 0.104


 33%|███▎      | 5/15 [00:29<00:59,  5.95s/it]

Epoch 4, loss = 0.085


 40%|████      | 6/15 [00:35<00:52,  5.86s/it]

Epoch 5, loss = 0.074


 47%|████▋     | 7/15 [00:41<00:47,  5.97s/it]

Epoch 6, loss = 0.065


 53%|█████▎    | 8/15 [00:47<00:42,  6.01s/it]

Epoch 7, loss = 0.058


 60%|██████    | 9/15 [00:53<00:36,  6.02s/it]

Epoch 8, loss = 0.053


 67%|██████▋   | 10/15 [00:59<00:29,  5.90s/it]

Epoch 9, loss = 0.048


 73%|███████▎  | 11/15 [01:05<00:24,  6.00s/it]

Epoch 10, loss = 0.045


 80%|████████  | 12/15 [01:11<00:18,  6.01s/it]

Epoch 11, loss = 0.041


 87%|████████▋ | 13/15 [01:17<00:11,  5.95s/it]

Epoch 12, loss = 0.039


 93%|█████████▎| 14/15 [01:23<00:05,  5.91s/it]

Epoch 13, loss = 0.036


100%|██████████| 15/15 [01:29<00:00,  5.95s/it]

Epoch 14, loss = 0.034
Training Finished





Test accuracy: 99.03%
