## Convolutional Neural Network (CNN)


In [5]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

print ("PyTorch version:[%s]."%(torch.__version__))

device = torch.device ('cuda:0' if torch.cuda.is_available() else 'cpu')

print ('device:[%s].'%(device))


PyTorch version:[1.9.0+cu102].
device:[cuda:0].


### Dataset

In [6]:
from torchvision import datasets, transforms

mnist_train = datasets.MNIST(root='./data/',train=True, transform=transforms.ToTensor(), download=True)
mnist_test = datasets.MNIST(root='./data/', train=False,transform=transforms.ToTensor(),download=True)

print('mnist_train:\n',mnist_train,'\n')
print('mnist_test:\n',mnist_test,'\n')
print('Done')

mnist_train:
 Dataset MNIST
    Number of datapoints: 60000
    Root location: ./data/
    Split: Train
    StandardTransform
Transform: ToTensor() 

mnist_test:
 Dataset MNIST
    Number of datapoints: 10000
    Root location: ./data/
    Split: Test
    StandardTransform
Transform: ToTensor() 

Done


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


### Data Iterator

In [7]:
BATCH_SIZE = 256

train_iter = torch.utils.data.DataLoader(mnist_train,batch_size=BATCH_SIZE,shuffle=True,num_workers=1)
test_iter = torch.utils.data.DataLoader(mnist_test,batch_size=BATCH_SIZE,shuffle=True,num_workers=1)

print('Done.')

Done.


### Define Model

In [8]:
class ConvolutionalNeuralNetworkClass(nn.Module):
    """
        Convolutional Neural Network (CNN) Class
    """
    def __init__(self,name='cnn',xdim=[1,28,28],
                 ksize=3,cdims=[32,64],hdims=[1024,128],ydim=10,
                 USE_BATCHNORM=False):
        super(ConvolutionalNeuralNetworkClass,self).__init__()
        self.name = name
        self.xdim = xdim
        self.ksize = ksize
        self.cdims = cdims
        self.hdims = hdims
        self.ydim = ydim
        self.USE_BATCHNORM = USE_BATCHNORM

        # Convolutional layers
        self.layers = []
        prev_cdim = self.xdim[0]
        for cdim in self.cdims: # for each hidden layer
            self.layers.append(
                nn.Conv2d(in_channels = prev_cdim,
                    out_channels = cdim,
                    kernel_size=self.ksize,
                    stride=(1,1),
                    padding=self.ksize//2)) # convlution 
            if self.USE_BATCHNORM:
                self.layers.append(nn.BatchNorm2d(cdim)) # batch-norm
            self.layers.append(nn.ReLU(True))  # activation
            self.layers.append(nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))) # max-pooling 
            self.layers.append(nn.Dropout2d(p=0.5))  # dropout
            prev_cdim = cdim

        # Dense layers
        self.layers.append(nn.Flatten())
        prev_hdim = prev_cdim*(self.xdim[1]//(2**len(self.cdims)))*(self.xdim[2]//(2**len(self.cdims)))
        for hdim in self.hdims:
            self.layers.append(nn.Linear(
                # FILL IN HERE
                prev_hdim,hdim,bias=True
                               ))
            self.layers.append(nn.ReLU(True))  # activation
            prev_hdim = hdim
        # Final layer (without activation)
        self.layers.append(nn.Linear(prev_hdim,self.ydim,bias=True))

        # Concatenate all layers 
        self.net = nn.Sequential()
        for l_idx,layer in enumerate(self.layers):
            layer_name = "%s_%02d"%(type(layer).__name__.lower(),l_idx)
            self.net.add_module(layer_name,layer)
        self.init_param() # initialize parameters
        
    def init_param(self):
        for m in self.modules():
            if isinstance(m,nn.Conv2d): # init conv
                nn.init.kaiming_normal_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m,nn.BatchNorm2d): # init BN
                nn.init.constant_(m.weight,1)
                nn.init.constant_(m.bias,0)
            elif isinstance(m,nn.Linear): # lnit dense
                nn.init.kaiming_normal_(m.weight)
                nn.init.zeros_(m.bias)
            
    def forward(self,x):
        return self.net(x)

C = ConvolutionalNeuralNetworkClass(
    name='cnn',xdim=[1,28,28],ksize=3,cdims=[32,64],
    hdims=[32],ydim=10).to(device)
loss = nn.CrossEntropyLoss()
optm = optim.Adam(C.parameters(),lr=1e-3)
print ("Done.")

Done.


### Check Parameters

In [9]:
np.set_printoptions(precision=3)
n_param = 0
for p_idx,(param_name,param) in enumerate(C.named_parameters()):
    if param.requires_grad:
        param_numpy = param.detach().cpu().numpy() # to numpy array 
        n_param += len(param_numpy.reshape(-1))
        print ("[%d] name:[%s] shape:[%s]."%(p_idx,param_name,param_numpy.shape))
        print ("    val:%s"%(param_numpy.reshape(-1)[:5]))
print ("Total number of parameters:[%s]."%(format(n_param,',d')))

[0] name:[net.conv2d_00.weight] shape:[(32, 1, 3, 3)].
    val:[ 0.686 -0.755  0.474  0.157 -0.531]
[1] name:[net.conv2d_00.bias] shape:[(32,)].
    val:[0. 0. 0. 0. 0.]
[2] name:[net.conv2d_04.weight] shape:[(64, 32, 3, 3)].
    val:[ 0.07  -0.002 -0.071  0.053 -0.041]
[3] name:[net.conv2d_04.bias] shape:[(64,)].
    val:[0. 0. 0. 0. 0.]
[4] name:[net.linear_09.weight] shape:[(32, 3136)].
    val:[ 0.004 -0.014 -0.004  0.035 -0.037]
[5] name:[net.linear_09.bias] shape:[(32,)].
    val:[0. 0. 0. 0. 0.]
[6] name:[net.linear_11.weight] shape:[(10, 32)].
    val:[-0.481 -0.507  0.176  0.488 -0.118]
[7] name:[net.linear_11.bias] shape:[(10,)].
    val:[0. 0. 0. 0. 0.]
Total number of parameters:[119,530].


### Simple Forward Path of the CNN Model

In [10]:
np.set_printoptions(precision=3)
torch.set_printoptions(precision=3)
x_numpy = np.random.rand(2,1,28,28)
x_torch = torch.from_numpy(x_numpy).float().to(device)
y_torch = C.forward(x_torch) # forward path
y_numpy = y_torch.detach().cpu().numpy() # torch tensor to numpy array
print ("x_torch:\n",x_torch)
print ("y_torch:\n",y_torch)
print ("\nx_numpy %s:\n"%(x_numpy.shape,),x_numpy)
print ("y_numpy %s:\n"%(y_numpy.shape,),y_numpy)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


x_torch:
 tensor([[[[0.981, 0.719, 0.606,  ..., 0.561, 0.603, 0.772],
          [0.835, 0.103, 0.329,  ..., 0.813, 0.913, 0.392],
          [0.250, 0.006, 0.795,  ..., 0.527, 0.455, 0.992],
          ...,
          [0.869, 0.735, 0.886,  ..., 0.124, 0.718, 0.416],
          [0.305, 0.862, 0.737,  ..., 0.545, 0.374, 0.170],
          [0.406, 0.806, 0.079,  ..., 0.564, 0.719, 0.109]]],


        [[[0.827, 0.137, 0.529,  ..., 0.652, 0.266, 0.786],
          [0.221, 0.643, 0.939,  ..., 0.282, 0.506, 0.117],
          [0.672, 0.585, 0.596,  ..., 0.943, 0.081, 0.580],
          ...,
          [0.816, 0.041, 0.202,  ..., 0.391, 0.031, 0.575],
          [0.316, 0.690, 0.126,  ..., 0.243, 0.623, 0.373],
          [0.140, 0.609, 0.034,  ..., 0.272, 0.849, 0.079]]]], device='cuda:0')
y_torch:
 tensor([[ 7.118, -2.334, -2.910, -0.727, -2.049, -1.110,  7.007, -2.895,  0.321,
          2.478],
        [-0.583,  0.270, -0.245,  0.940, -2.292, -3.005,  0.554, -2.064, -0.141,
          0.659]], device=

In [11]:
def func_eval(model,data_iter,device):
    with torch.no_grad():
        n_total,n_correct = 0,0
        model.eval() # evaluate (affects DropOut and BN)
        
        for batch_in,batch_out in data_iter:
            y_trgt = batch_out.to(device)
            
            model_pred = model(batch_in.view(-1,1,28,28).to(device))
            
            _,y_pred = torch.max(model_pred.data,1)
            
            n_correct += (y_pred==y_trgt).sum().item()
            
            n_total += batch_in.size(0)
            
        val_accr = (n_correct/n_total)
        
        model.train() # back to train mode 
        
    return val_accr


print ("Done")


Done


In [12]:
C.init_param() # initialize parameters
train_accr = func_eval(C,train_iter,device)
test_accr = func_eval(C,test_iter,device)
print ("train_accr:[%.3f] test_accr:[%.3f]."%(train_accr,test_accr))

train_accr:[0.179] test_accr:[0.174].


In [13]:
print ("Start training.")
C.init_param() # initialize parameters
C.train() # to train mode 
EPOCHS,print_every = 10,1
for epoch in range(EPOCHS):
    loss_val_sum = 0
    for batch_in,batch_out in train_iter:
        # Forward path
        y_pred = C.forward(batch_in.view(-1,1,28,28).to(device))
        loss_out = loss(y_pred,batch_out.to(device))
        # Update
        # FILL IN HERE      # reset gradient 
        optm.zero_grad()
        # FILL IN HERE      # backpropagate
        loss_out.backward()
        # FILL IN HERE      # optimizer update
        optm.step()
        loss_val_sum += loss_out
    loss_val_avg = loss_val_sum/len(train_iter)
    # Print
    if ((epoch%print_every)==0) or (epoch==(EPOCHS-1)):
        train_accr = func_eval(C,train_iter,device)
        test_accr = func_eval(C,test_iter,device)
        print ("epoch:[%d] loss:[%.3f] train_accr:[%.3f] test_accr:[%.3f]."%
               (epoch,loss_val_avg,train_accr,test_accr))
print ("Done")

Start training.
epoch:[0] loss:[0.569] train_accr:[0.953] test_accr:[0.956].
epoch:[1] loss:[0.184] train_accr:[0.973] test_accr:[0.975].
epoch:[2] loss:[0.132] train_accr:[0.979] test_accr:[0.980].
epoch:[3] loss:[0.106] train_accr:[0.983] test_accr:[0.982].
epoch:[4] loss:[0.096] train_accr:[0.984] test_accr:[0.983].
epoch:[5] loss:[0.082] train_accr:[0.987] test_accr:[0.987].
epoch:[6] loss:[0.078] train_accr:[0.987] test_accr:[0.986].
epoch:[7] loss:[0.073] train_accr:[0.989] test_accr:[0.988].
epoch:[8] loss:[0.065] train_accr:[0.989] test_accr:[0.987].
epoch:[9] loss:[0.063] train_accr:[0.992] test_accr:[0.989].
Done


In [None]:
n_sample = 25
sample_indices = np.random.choice(len(mnist_test.targets),n_sample,replace=False)
test_x = mnist_test.data[sample_indices]
test_y = mnist_test.targets[sample_indices]
with torch.no_grad():
    C.eval() # to evaluation mode 
    y_pred = C.forward(test_x.view(-1,1,28,28).type(torch.float).to(device)/255.)
y_pred = y_pred.argmax(axis=1)
plt.figure(figsize=(10,10))
for idx in range(n_sample):
    plt.subplot(5, 5, idx+1)
    plt.imshow(test_x[idx], cmap='gray')
    plt.axis('off')
    plt.title("Pred:%d, Label:%d"%(y_pred[idx],test_y[idx]))
plt.show()    
print ("Done")