# Problem 7

Import necessary libraries and load dataset.

In [15]:
import torch
import torch.nn as nn
from torch.optim import Optimizer
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import transforms

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [16]:
train_dataset = datasets.MNIST(root='./mnist_data/',
                               train=True, 
                               transform=transforms.ToTensor(),
                               download=True)

test_dataset = datasets.MNIST(root='./mnist_data/',
                              train=False, 
                              transform=transforms.ToTensor())

Our reduced conv2d layer requires different input channel for each output channel. To do this, we build own conv2d layer for each output channel. \
More specifically, suppose an output channel requires $k$ input channels. Then we allocate nn.Conv2d(k, 1, kernel_size=5) to this channel. \
We create 16 such individual conv2d layer, which can be done by nn.ModuleList. The result is following:

In [17]:
class C3_layer_full(nn.Module):
    def __init__(self):
        super(C3_layer_full, self).__init__()
        self.conv_layer = nn.Conv2d(6, 16, kernel_size=5)

    def forward(self, x):
        return self.conv_layer(x)


class C3_layer(nn.Module):
    def __init__(self):
        super(C3_layer, self).__init__()
        self.ch_in_3 = [[0, 1, 2],
                        [1, 2, 3],
                        [2, 3, 4],
                        [3, 4, 5],
                        [0, 4, 5],
                        [0, 1, 5]] # filter with 3 subset of input channels
        self.ch_in_4 = [[0, 1, 2, 3],
                        [1, 2, 3, 4],
                        [2, 3, 4, 5],
                        [0, 3, 4, 5],
                        [0, 1, 4, 5],
                        [0, 1, 2, 5],
                        [0, 1, 3, 4],
                        [1, 2, 4, 5],
                        [0, 2, 3, 5]] # filter with 4 subset of input channels
        # put implementation here
        self.layer = nn.ModuleList([nn.Conv2d(3,1,kernel_size=5) for _ in range(6)] + \
                                      [nn.Conv2d(4,1,kernel_size=5) for _ in range(9)] + \
                                      [nn.Conv2d(6,1,kernel_size=5)] )

    def forward(self, x):
        # put implementation here
        out_channel_list = []
        for i in range(6):
            out_channel_list.append( self.layer[i](x[:, self.ch_in_3[i], :, :]) )
        for i in range(9):
            out_channel_list.append( self.layer[6+i](x[:, self.ch_in_4[i], :, :]) )
        out_channel_list.append( self.layer[15](x) )
        
        out = torch.cat(out_channel_list, dim=0)
        return out

Next, we construct original LeNet. \
Before then, first we calculate the number of parameters to estimate the 'reduction' effect of parameter, which is the main purpose of original LeNet. \
 \
As you can see below, parameters of network appears only at conv2d and linear. \
For each conv2d network nn.Conv2d(a,b,kernel_size=k), the number of parameter is $b*(a*k^2+1)$. Also, each FC layer nn.Linear(a,b) has $a*b+b = (a+1)b$. \
Thus the number of parameter may become $6*26 + (6*76 + 9*101 + 1*126) + 48120 + 10164 + 850 = 60806$.

In [18]:
class LeNet(nn.Module) :
    def __init__(self) :
        super(LeNet, self).__init__()
        self.C1_layer = nn.Sequential(
                nn.Conv2d(1, 6, kernel_size=5, padding=2),
                nn.Tanh()
                )
        self.P2_layer = nn.Sequential(
                nn.AvgPool2d(kernel_size=2, stride=2),
                nn.Tanh()
                )
        self.C3_layer = nn.Sequential(
                #C3_layer_full(),
                C3_layer(),
                nn.Tanh()
                )
        self.P4_layer = nn.Sequential(
                nn.AvgPool2d(kernel_size=2, stride=2),
                nn.Tanh()
                )
        self.C5_layer = nn.Sequential(
                nn.Linear(5*5*16, 120),
                nn.Tanh()
                )
        self.F6_layer = nn.Sequential(
                nn.Linear(120, 84),
                nn.Tanh()
                )
        self.F7_layer = nn.Linear(84, 10)
        self.tanh = nn.Tanh()
        
    def forward(self, x) :
        output = self.C1_layer(x)
        output = self.P2_layer(output)
        output = self.C3_layer(output)
        output = self.P4_layer(output)
        output = output.view(-1,5*5*16)
        output = self.C5_layer(output)
        output = self.F6_layer(output)
        output = self.F7_layer(output)
        return output

In [19]:
model = LeNet().to(device)
loss_function = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-1)

param_ct = sum([p.numel() for p in model.parameters()])
print(f"Total number of trainable parameters: {param_ct}")

Total number of trainable parameters: 60806


As you can see, there are exactly 60806 parameters as we expected. \
To verify our network works well, let us train and test it.

In [24]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=100, shuffle=True)

import time
start = time.time()
for epoch in range(10) :
    print("{}th epoch starting.".format(epoch))
    for images, labels in train_loader :
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        train_loss = loss_function(model(images), labels)
        train_loss.backward()

        optimizer.step()
end = time.time()

print("Time ellapsed in training is: {}".format(end - start))

0th epoch starting.
1th epoch starting.
2th epoch starting.
3th epoch starting.
4th epoch starting.
5th epoch starting.
6th epoch starting.
7th epoch starting.
8th epoch starting.
9th epoch starting.
Time ellapsed in training is: 115.97027707099915


In [25]:
test_loss, correct, total = 0, 0, 0

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=100, shuffle=False)

for images, labels in test_loader :
    images, labels = images.to(device), labels.to(device)

    output = model(images)
    test_loss += loss_function(output, labels).item()

    pred = output.max(1, keepdim=True)[1]
    correct += pred.eq(labels.view_as(pred)).sum().item()
    
    total += labels.size(0)
            
print('[Test set] Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss /total, correct, total,
        100. * correct / total))

[Test set] Average loss: 0.0228, Accuracy: 1619/10000 (16.19%)



The result is quite poor, because we didn't go through enoughly many training epoch. \
However our object is accomplished, because our model shows larger accuracy than random choice(10% accuracy). \
In fact, when I repeat training process (i.e., increase epoch), accuracy gets higher.