<a href="https://colab.research.google.com/github/Jikhan-Jeong/2019-Computer-Vision-/blob/master/Feb_16%2C_2020_Torch_Batch_Normalization%2C_Adam%2C_LR_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

---
# Feb 16, 2020 Torch Batch Normalization, Adam, LR CNN
---
* Name: Jikhan Jeong
---
### Optimization Methods
---
* SGD
* Momentum
* Nestrov
* Adagrad
* Adadelta
* Adam
* Ref: https://colab.research.google.com/drive/17vefyxasId5GsARxCRdnpPvM9mtQi4gl
---

---
# 1. Preparing
---

In [0]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torchvision.datasets as dset
import torchvision.transforms as transforms
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader

In [0]:
batch_size = 256 # 16 height x 16 weight = 256 concantates
learning_rate = 0.0002
num_epoch = 10

* Normalization 

In [0]:
mnist_train = dset.MNIST("./", train=True, transform=transforms.ToTensor(), target_transform=None, download=True)
mnist_test  = dset.MNIST("./", train=False, transform=transforms.ToTensor(), target_transform=None, download=True)

# 1 channel so that it requires 1 mean and variance
# (with Data Normalization)  mnist_train = dset.MNIST("./", train=True, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.1307,), std=(0.3081,))]), target_transform=None, download=True)
# (with Data Normalization)  mnist_test = dset.MNIST("./", train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=(0.1307,), std=(0.3081,))]), target_transform=None, download=True)

In [30]:
print(mnist_train.__getitem__(0)[0].size(),    mnist_train.__len__())
mnist_test.__getitem__(0)[0].size(),           mnist_test.__len__()

torch.Size([1, 28, 28]) 60000


(torch.Size([1, 28, 28]), 10000)

In [0]:
train_loader = torch.utils.data.DataLoader(mnist_train,batch_size=batch_size, shuffle=True,num_workers=2,drop_last=True)
test_loader = torch.utils.data.DataLoader(mnist_test,batch_size=batch_size, shuffle=False,num_workers=2,drop_last=True)

---
#  2. CNN Model
* Plug Batch Normalization in CNN layer (Idea from DenseNet)
---

In [0]:
#  CNN Model
# https://pytorch.org/docs/stable/nn.html?highlight=batchnorm#torch.nn.BatchNorm2d
# nn.BatchNorm2d(x), x input channel number

class CNN(nn.Module):

    def __init__(self):
  
        super(CNN,self).__init__()
  
        self.layer = nn.Sequential(   
            nn.Conv2d(1,16,3,padding=1),  # 28 x 28, in_channel, out_channel, kernel size = 3 x 3 kernel size
            nn.BatchNorm2d(16),           # BatchNorm2d Input channel = 16
            nn.ReLU(),
            nn.Conv2d(16,32,3,padding=1), # 28 x 28
            nn.BatchNorm2d(32),           # BatchNorm2d Input channel = 32
            nn.ReLU(),
            nn.MaxPool2d(2,2),            # 14 x 14
            nn.Conv2d(32,64,3,padding=1), # 14 x 14
            nn.BatchNorm2d(64),           # BatchNorm2d Input channel = 64
            nn.ReLU(),
            nn.MaxPool2d(2,2)             #  7 x 7
        )
  
        self.fc_layer = nn.Sequential(
            nn.Linear(64*7*7,100),         # In Feature, out Feature
            nn.BatchNorm1d(100),           # BatchNorm1d Input channel = 100
            nn.ReLU(),
            nn.Linear(100,10)
        )        
        
    def forward(self,x):
        out = self.layer(x)
        out = out.view(batch_size,-1)
        out = self.fc_layer(out)
        return out

---
# 3. Loss func & Optimizer
---
* Adam : change in 

In [38]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

model = CNN().to(device)
loss_func = nn.CrossEntropyLoss()
# (SGD optimizer) optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

cuda:0


In [0]:
# 1. StepLR method, decreasing learning rate by using gamma 
# scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma= 0.99)       

# 2. MultiStepLR, decreasing learning rate in step points 10,30,80
#scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[10,30,80], gamma= 0.1)  

# 3. EponentialLR
#scheduler = lr_scheduler.ExponentialLR(optimizer, gamma= 0.99)                             



# 4. ReduceLROnPlateau learning rate decreases as there is no improvement
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,threshold=1,patience=1,mode='min')    

# Ref: https://pytorch.org/docs/stable/optim.html?highlight=lr_scheduler#torch.optim.lr_scheduler.ReduceLROnPlateau

In [40]:
print(dir(scheduler))
print(dir(optimizer))
# Ref: https://www.geeksforgeeks.org/python-dir-function/

['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_init_is_better', '_reduce_lr', '_reset', 'best', 'cooldown', 'cooldown_counter', 'eps', 'factor', 'in_cooldown', 'is_better', 'last_epoch', 'load_state_dict', 'min_lrs', 'mode', 'mode_worse', 'num_bad_epochs', 'optimizer', 'patience', 'state_dict', 'step', 'threshold', 'threshold_mode', 'verbose']
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclassh

 ---
 # 4. Train
 ---

In [41]:
for i in range(num_epoch):
    # ReduceLRONPlateau using # code: scheduler.step()  
    #scheduler.step()  
    for j,[image,label] in enumerate(train_loader):
        x = image.to(device)
        y_= label.to(device)
        
        optimizer.zero_grad()
        output = model.forward(x)
        loss = loss_func(output,y_)
        loss.backward()
        optimizer.step()
    
    # ReduceLRONPlateau only using this part : scheduler.step(loss)   
    scheduler.step(loss)      
    
    if i % 10 == 0:
        print(loss)   
            
    #print("Epoch: {}, Learning Rate: {}".format(i,scheduler.get_lr()))  # Other  schedule not ReduceLRONPlateau
    print("Epoch: {}, Learning Rate: {}".format(i,scheduler.optimizer.state_dict()['param_groups'][0]['lr']))

tensor(0.1382, device='cuda:0', grad_fn=<NllLossBackward>)
Epoch: 0, Learning Rate: 0.0002
Epoch: 1, Learning Rate: 2e-05
Epoch: 2, Learning Rate: 2e-05
Epoch: 3, Learning Rate: 2.0000000000000003e-06
Epoch: 4, Learning Rate: 2.0000000000000003e-06
Epoch: 5, Learning Rate: 2.0000000000000004e-07
Epoch: 6, Learning Rate: 2.0000000000000004e-07
Epoch: 7, Learning Rate: 2.0000000000000007e-08
Epoch: 8, Learning Rate: 2.0000000000000007e-08
Epoch: 9, Learning Rate: 2.000000000000001e-09


---
# 5. Test 
* Batch Normalization + **Adam Optimizaer** : 
* **Batch Normalization** and **Drop Out** requires **model.eval()**: Accuracy of Test Data: 78.5456771850586
* with Normalization: Accuracy of Test Data: 10.436698913574219
* without Normalization: Accuracy of Test Data: 10.09615421295166
---

In [42]:
correct = 0
total = 0

# model.eval() requires for drop out and batch normalization
model.eval()

with torch.no_grad():
  for image,label in test_loader:
      x = image.to(device)
      y_= label.to(device)

      output = model.forward(x)
      _,output_index = torch.max(output,1)

      total += label.size(0)
      correct += (output_index == y_).sum().float()

  print("Accuracy of Test Data: {}".format(100*correct/total))

Accuracy of Test Data: 98.94831848144531


---
# 6. Evaluation: Batch Normalization + Adam is the best way
* <font color = blue> Batch Normalization + Adam Optimizaer :  98.94831848144531 </font>
* Batch Normalization Accuracy of Test Data: 78.5456771850586 (Increased a lot)
* With Normalization: Accuracy of Test Data: 10.436698913574219
* Without Normalization: Accuracy of Test Data: 10.09615421295166
---