## Structure of LeNet
<img src="lenet.svg">

Lenet consists of:
1. a set of CNN layers of kernel size 2 and 5;
2. a set of average polling layer;
3. a fully connected layer as the final classifier;
4. sigmoid as the activation function;

In [1]:
import torch
from torch import nn
from torch.nn import Module
from torch.utils.data import DataLoader
import torchvision
import sys
sys.path.append("../dlutils")
import importlib
import model
import loss
import train
import dataset
importlib.reload(model)
importlib.reload(loss)
importlib.reload(train)
importlib.reload(dataset)

<module 'dataset' from '/dssg/home/acct-hpc/hpchxj/deep_learning/lenet/../dlutils/dataset.py'>

In [11]:
lenet = torch.nn.Sequential(nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.Sigmoid(), \
                      nn.AvgPool2d(kernel_size=2, stride=2), \
                      nn.Conv2d(6, 16, kernel_size=5), nn.Sigmoid(), \
                      nn.AvgPool2d(kernel_size=2, stride=2), nn.Flatten(), \
                      nn.Linear(16*5*5, 120), nn.Sigmoid(), \
                      nn.Linear(120, 84), nn.Sigmoid(), nn.Linear(84, 10))


In [12]:
X = torch.rand(size=(1, 1, 28, 28), dtype=torch.float32)
for layer in lenet:
    X = layer(X)
    print(layer.__class__.__name__, 'output shape: \t', X.shape)
## All the data are on CPU now
print(list(lenet.parameters())[0].device)

Conv2d output shape: 	 torch.Size([1, 6, 28, 28])
Sigmoid output shape: 	 torch.Size([1, 6, 28, 28])
AvgPool2d output shape: 	 torch.Size([1, 6, 14, 14])
Conv2d output shape: 	 torch.Size([1, 16, 10, 10])
Sigmoid output shape: 	 torch.Size([1, 16, 10, 10])
AvgPool2d output shape: 	 torch.Size([1, 16, 5, 5])
Flatten output shape: 	 torch.Size([1, 400])
Linear output shape: 	 torch.Size([1, 120])
Sigmoid output shape: 	 torch.Size([1, 120])
Linear output shape: 	 torch.Size([1, 84])
Sigmoid output shape: 	 torch.Size([1, 84])
Linear output shape: 	 torch.Size([1, 10])
cpu


## Load Dataset 

In [14]:
from dataset import load_fashion_mnist_dataset
# get Fashion-MNIST DataLoader
batch_size = 256
train_loader,test_loader=load_fashion_mnist_dataset(batch_size)

## Loss

In [13]:
loss = nn.CrossEntropyLoss()

## Optimizer

In [None]:
lr = 0.9
optimizer = torch.optim.SGD(lenet.parameters(), lr=lr)

## Training

In [None]:
num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def init_params(m):
     if type(m)==nn.Linear or type(m)==nn.Conv2d:
        nn.init.xavier_uniform_(m.weight)
    
lenet.apply(init_params)
lenet.to(device)
loss.to(device)

from train import train_3ch
train_3ch(lenet, loss, num_epochs, train_loader, optimizer, test_loader, device )

## LeNet using ReLU

In [15]:
lenet = nn.Sequential(nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.ReLU(), \
                      nn.AvgPool2d(kernel_size=2, stride=2), \
                      nn.Conv2d(6, 16, kernel_size=5), nn.ReLU(), \
                      nn.AvgPool2d(kernel_size=2, stride=2), nn.Flatten(), \
                      nn.Linear(16*5*5, 120), nn.ReLU(), \
                      nn.Linear(120, 84), nn.ReLU(), nn.Linear(84, 10))

X = torch.rand(size=(1, 1, 28, 28), dtype=torch.float32)
for layer in lenet:
    X = layer(X)
    print(layer.__class__.__name__, 'output shape: \t', X.shape)

Conv2d output shape: 	 torch.Size([1, 6, 28, 28])
ReLU output shape: 	 torch.Size([1, 6, 28, 28])
AvgPool2d output shape: 	 torch.Size([1, 6, 14, 14])
Conv2d output shape: 	 torch.Size([1, 16, 10, 10])
ReLU output shape: 	 torch.Size([1, 16, 10, 10])
AvgPool2d output shape: 	 torch.Size([1, 16, 5, 5])
Flatten output shape: 	 torch.Size([1, 400])
Linear output shape: 	 torch.Size([1, 120])
ReLU output shape: 	 torch.Size([1, 120])
Linear output shape: 	 torch.Size([1, 84])
ReLU output shape: 	 torch.Size([1, 84])
Linear output shape: 	 torch.Size([1, 10])


In [16]:
num_epochs = 10
lr = 0.9
optimizer = torch.optim.SGD(lenet.parameters(), lr=lr)
train_3ch(lenet, loss, num_epochs,train_loader, optimizer, test_loader, device)

epoch 0, training loss 0.005942, training accuracy 0.464633, testing loss 0.006130, testing accuracy 0.465200
epoch 1, training loss 0.002113, training accuracy 0.785850, testing loss 0.002232, testing accuracy 0.774400
epoch 2, training loss 0.001731, training accuracy 0.834567, testing loss 0.001864, testing accuracy 0.823900
epoch 3, training loss 0.002049, training accuracy 0.800083, testing loss 0.002210, testing accuracy 0.791000
epoch 4, training loss 0.001322, training accuracy 0.870917, testing loss 0.001482, testing accuracy 0.860500
epoch 5, training loss 0.001381, training accuracy 0.865467, testing loss 0.001577, testing accuracy 0.853200
epoch 6, training loss 0.001491, training accuracy 0.856450, testing loss 0.001690, testing accuracy 0.844400
epoch 7, training loss 0.001597, training accuracy 0.838183, testing loss 0.001785, testing accuracy 0.829000
epoch 8, training loss 0.001450, training accuracy 0.857300, testing loss 0.001676, testing accuracy 0.840800
epoch 9, t