# Weight Initialization

- https://discuss.pytorch.org/t/weight-initilzation/157/9
- Initialize with small numbers
- Xavier Initialization
- Kaiming He Initialization

## 1. Settings
### 1) Import required libraries

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torchvision.datasets as dset
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.autograd import Variable

### 2) Set hyperparameters

In [2]:
batch_size = 16
learning_rate = 0.0002
num_epoch = 1

## 2. Data

### 1) Download Data

In [3]:
mnist_train = dset.MNIST("./", train=True, transform=transforms.ToTensor(), target_transform=None, download=True)
mnist_test = dset.MNIST("./", train=False, transform=transforms.ToTensor(), target_transform=None, download=True)

### 2) Check Dataset

In [4]:
print(mnist_train.__getitem__(0)[0].size(), mnist_train.__len__())
mnist_test.__getitem__(0)[0].size(), mnist_test.__len__()

torch.Size([1, 28, 28]) 60000


(torch.Size([1, 28, 28]), 10000)

### 3) Set DataLoader

In [5]:
train_loader = torch.utils.data.DataLoader(mnist_train,batch_size=batch_size, shuffle=True,num_workers=2,drop_last=True)
test_loader = torch.utils.data.DataLoader(mnist_test,batch_size=batch_size, shuffle=False,num_workers=2,drop_last=True)

## 3. Model & Optimizer

### 1) CNN Model

In [6]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        self.layer = nn.Sequential(
            nn.Conv2d(1,16,3,padding=1),  # 28
            nn.ReLU(),
            nn.Conv2d(16,32,3,padding=1), # 28
            nn.ReLU(),
            nn.MaxPool2d(2,2), # 14
            nn.Conv2d(32,64,3,padding=1), #14
            nn.ReLU(),
            nn.MaxPool2d(2,2) # 7
        )
        self.fc_layer = nn.Sequential(
            nn.Linear(64*7*7,100),
            nn.ReLU(),
            nn.Linear(100,10)
        )       
        
        # initialization
        
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                
                '''# Init with small numbers
                m.weight.data.normal_(0.0, 0.02)
                m.bias.data.fill_(0)
                
                # Xavier Initialization
                init.xavier_normal(m.weight.data)
                m.bias.data.fill_(0)'''
                
                # Kaming Initialization
                init.kaiming_normal(m.weight.data)
                m.bias.data.fill_(0)
                
            elif isinstance(m, nn.Linear):
                
                '''# Init with small numbers
                m.weight.data.normal_(0.0, 0.02)
                m.bias.data.fill_(0)
                
                # Xavier Initialization
                init.xavier_normal(m.weight.data)
                m.bias.data.fill_(0)'''
                
                # Kaming Initialization
                init.kaiming_normal(m.weight.data)
                m.bias.data.fill_(0)

    def forward(self,x):
        out = self.layer(x)
        out = out.view(batch_size,-1)
        out = self.fc_layer(out)

        return out

model = CNN()#.cuda()

### 2) Loss func & Optimizer

In [7]:
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

## 4. Train 

In [8]:
for i in range(num_epoch):
    for j,[image,label] in enumerate(train_loader):
        x = Variable(image)#.cuda()
        y_= Variable(label)#.cuda()
        
        optimizer.zero_grad()
        output = model.forward(x)
        loss = loss_func(output,y_)
        loss.backward()
        optimizer.step()
        
        if j % 1000 == 0:
            print(loss)          

Variable containing:
 2.6892
[torch.FloatTensor of size 1]

Variable containing:
 1.3642
[torch.FloatTensor of size 1]

Variable containing:
 1.3983
[torch.FloatTensor of size 1]

Variable containing:
 0.7934
[torch.FloatTensor of size 1]



In [9]:
#param_list = list(model.parameters())
#print(param_list)

## 5. Test

In [10]:
correct = 0
total = 0

for image,label in test_loader:
    x = Variable(image,volatile=True)#.cuda()
    y_= Variable(label)#.cuda()

    output = model.forward(x)
    _,output_index = torch.max(output,1)
        
    total += label.size(0)
    correct += (output_index == y_).sum().float()
    
print("Accuracy of Test Data: {}".format(100*correct/total))

Accuracy of Test Data: Variable containing:
 89.2700
[torch.FloatTensor of size 1]

