<a href="https://colab.research.google.com/github/PANDASANG1231/deeplearn_note/blob/main/027_BatchNorm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import sys
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
sys.path.append('/content/drive/MyDrive/Colab Notebooks/deeplearning_note')
from tool import *

Mounted at /content/drive


In [2]:
import torch
import torchvision

This is a simple implementation of Batch_norm.

**Note**:
  
1. To a MLP(Table data), batch_norm is averageing on dim=0, feature-level average
2. To a CNN(Image data), batch_norm is averageing on dim=(0,2,3), channel-level average

In [20]:
def batch_norm(X, gamma, beta, moving_average, moving_var, eps, momentum):

  if not torch.is_grad_enabled():
    Xhat = (X - moving_average) / torch.sqrt(moving_var + eps)
  else:
    if len(X.shape) == 2:
      mean = X.mean(dim=0)
      var = ((X - mean) ** 2).mean(dim=0)
    elif len(X.shape) == 4:
      mean = X.mean(dim=(0,2,3), keepdim=True)
      var = ((X - mean) ** 2).mean(dim=(0,2,3), keepdim=True)

    Xhat = (X - mean) / torch.sqrt(var + eps)
    moving_average = (1 - momentum) * mean + momentum * moving_average
    moving_var = (1 - momentum) * var + momentum * moving_var

  Y_hat = Xhat * gamma + beta

  return Y_hat, moving_average.data, moving_var.data




In [21]:
class BatchNorm(torch.nn.Module):

  def __init__(self, num_features, nums_dim):

    super().__init__()
    self.num_features = num_features
    if nums_dim == 2:
      self.size = (1, num_features)
    elif nums_dim == 4:
      self.size = (1, num_features, 1, 1)
    
    self.gamma = torch.nn.Parameter(torch.ones(self.size))
    self.beta = torch.nn.Parameter(torch.zeros(self.size))
    self.moving_average = torch.ones(self.size)
    self.moving_var = torch.zeros(self.size)

  def forward(self, X):

    if self.moving_average.device != X.device:
      
      self.moving_average = self.moving_average.to(X.device)
      self.moving_var = self.moving_var.to(X.device)

    Y, self.moving_average, self.moving_var = batch_norm(
        X, self.gamma, self.beta, self.moving_average, self.moving_var, eps=1e-5, momentum=0.9
        )
    
    return Y



Test on Lenet with or without batchnorm

1. Batch norm will not change the performance. (It is not the case here, but here Lenet is not fully trained)

2. It will accelerate the speed of training

In [22]:
class Lenet_withbn(torch.nn.Module):

  def __init__(self):

    super().__init__()

    self.model = torch.nn.Sequential(
        torch.nn.Conv2d(in_channels=1, out_channels=6, padding=2, kernel_size=5, stride=1), 
        BatchNorm(num_features=6, nums_dim=4), 
        torch.nn.Sigmoid(),
        torch.nn.AvgPool2d(kernel_size=2, stride=2),
        torch.nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1),
        BatchNorm(num_features=16, nums_dim=4), 
        torch.nn.Sigmoid(),
        torch.nn.AvgPool2d(kernel_size=2, stride=2),
        torch.nn.Flatten(),
        torch.nn.Linear(in_features=400, out_features=120),
        BatchNorm(num_features=120, nums_dim=2), 
        torch.nn.Sigmoid(),
        torch.nn.Linear(in_features=120, out_features=84),
        torch.nn.Sigmoid(),
        torch.nn.Linear(in_features=84, out_features=10),
    )

  def forward(self, X):

    return self.model(X)


class Lenet(torch.nn.Module):

  def __init__(self):

    super().__init__()

    self.model = torch.nn.Sequential(
        torch.nn.Conv2d(in_channels=1, out_channels=6, padding=2, kernel_size=5, stride=1), 
        torch.nn.Sigmoid(),
        torch.nn.AvgPool2d(kernel_size=2, stride=2),
        torch.nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1),
        torch.nn.Sigmoid(),
        torch.nn.AvgPool2d(kernel_size=2, stride=2),
        torch.nn.Flatten(),
        torch.nn.Linear(in_features=400, out_features=120),
        torch.nn.Sigmoid(),
        torch.nn.Linear(in_features=120, out_features=84),
        torch.nn.Sigmoid(),
        torch.nn.Linear(in_features=84, out_features=10),
    )

  def forward(self, X):

    return self.model(X)

In [23]:
x = torch.ones((1, 1, 28, 28))

print("Normal Lent .......")
le = Lenet()
for layer in le.model:
    x = layer(x)
    print(layer.__class__.__name__, x.shape)

x = torch.ones((1, 1, 28, 28))

print("BN Lent .......")
le = Lenet_withbn()
for layer in le.model:
    x = layer(x)
    print(layer.__class__.__name__, x.shape)

Normal Lent .......
Conv2d torch.Size([1, 6, 28, 28])
Sigmoid torch.Size([1, 6, 28, 28])
AvgPool2d torch.Size([1, 6, 14, 14])
Conv2d torch.Size([1, 16, 10, 10])
Sigmoid torch.Size([1, 16, 10, 10])
AvgPool2d torch.Size([1, 16, 5, 5])
Flatten torch.Size([1, 400])
Linear torch.Size([1, 120])
Sigmoid torch.Size([1, 120])
Linear torch.Size([1, 84])
Sigmoid torch.Size([1, 84])
Linear torch.Size([1, 10])
BN Lent .......
Conv2d torch.Size([1, 6, 28, 28])
BatchNorm torch.Size([1, 6, 28, 28])
Sigmoid torch.Size([1, 6, 28, 28])
AvgPool2d torch.Size([1, 6, 14, 14])
Conv2d torch.Size([1, 16, 10, 10])
BatchNorm torch.Size([1, 16, 10, 10])
Sigmoid torch.Size([1, 16, 10, 10])
AvgPool2d torch.Size([1, 16, 5, 5])
Flatten torch.Size([1, 400])
Linear torch.Size([1, 120])
BatchNorm torch.Size([1, 120])
Sigmoid torch.Size([1, 120])
Linear torch.Size([1, 84])
Sigmoid torch.Size([1, 84])
Linear torch.Size([1, 10])


In [24]:
train_dataset = torchvision.datasets.FashionMNIST(root="./",train=True,download=True,transform=torchvision.transforms.ToTensor())
test_dataset = torchvision.datasets.FashionMNIST(root="./",train=False,download=True,transform=torchvision.transforms.ToTensor())

In [27]:
batch_size = 128
train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True)

test_dataloader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=batch_size,
                                              shuffle=False)

device = torch.device('cuda')
loss = torch.nn.CrossEntropyLoss()


train_p2(epoch_num=10, 
         model=Lenet(),  
         loss=loss, 
         lr=0.05, 
         train_data_iter=train_dataloader,
         test_data_iter=test_dataloader,
         device=device,
         optim_type="Adam")


loss 0.902, train acc 0.653, test acc 0.781
loss 0.477, train acc 0.821, test acc 0.833
loss 0.414, train acc 0.846, test acc 0.849
loss 0.382, train acc 0.856, test acc 0.855
loss 0.355, train acc 0.866, test acc 0.863
loss 0.337, train acc 0.873, test acc 0.855
loss 0.322, train acc 0.878, test acc 0.865
loss 0.314, train acc 0.883, test acc 0.867
loss 0.305, train acc 0.886, test acc 0.867
loss 0.297, train acc 0.888, test acc 0.874
Calculation Ability: 35210.3 examples/sec on cuda


In [26]:
batch_size = 128
train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True)

test_dataloader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=batch_size,
                                              shuffle=False)

device = torch.device('cuda')
loss = torch.nn.CrossEntropyLoss()


train_p2(epoch_num=10, 
         model=Lenet_withbn(),  
         loss=loss, 
         lr=0.05, 
         train_data_iter=train_dataloader,
         test_data_iter=test_dataloader,
         device=device,
         optim_type="Adam")

loss 0.528, train acc 0.813, test acc 0.847
loss 0.340, train acc 0.876, test acc 0.872
loss 0.294, train acc 0.893, test acc 0.876
loss 0.266, train acc 0.901, test acc 0.895
loss 0.248, train acc 0.908, test acc 0.880
loss 0.231, train acc 0.915, test acc 0.900
loss 0.222, train acc 0.916, test acc 0.885
loss 0.205, train acc 0.924, test acc 0.893
loss 0.190, train acc 0.930, test acc 0.901
loss 0.186, train acc 0.930, test acc 0.901
Calculation Ability: 18956.4 examples/sec on cuda
