# LeNet-5 from scratch

In [1]:
import matplotlib.pyplot as plt
import numpy as np
from torchvision import datasets, transforms 

from modules.activation import SoftMax, SquashedTanh
from modules.criterion import CrossEntropy
from modules.init import LeCun
from modules.layer import Linear, Conv, Flatten
from modules.model import Sequential, train, test
from modules.optimizer import Adam
from modules.pooling import AvgPool
from constant import C3_MAPPING

In [2]:
composed_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize(0, 1)
])


In [3]:
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=composed_transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=composed_transform)

X_train = np.array([data[0].numpy() for data in train_dataset])
y_train = np.array([data[1] for data in train_dataset])

LeNet-5 Architecture

<img src='img/architecture.png' width=100%>

The activation function of the LeNet model <code>SquashedTanh</code> is a modification of the Tanh function with $S=\frac{2}{3}$ and $A=1.7159$

<img src='img/activation.png' width=50%>

The feature maps in layer C3 (<code>C3_MAPPING</code>) takes specific output channels from S2 according to the following table

<img src='img/mappings.png' width=50%>

>

In [10]:
init_method = LeCun()

model = Sequential([
    # Conv Layers
    Conv((32, 32), 128, 1, 6, 5), #c1
    AvgPool(2), #s2
    SquashedTanh(),
    Conv((14, 14), 128, 6, 16, 5, mapping=C3_MAPPING), #c3
    AvgPool(2), #s4
    SquashedTanh(), 
    Conv((5, 5), 128, 16, 120, 5), #c5
    SquashedTanh(),
    
    # FC Layers
    Flatten(),
    Linear(120, 84, init_method), #f6
    SquashedTanh(),
    Linear(84, 10, init_method), #output
    SoftMax()
])

In [11]:
criterion = CrossEntropy()
optimizer = Adam(model, lr=0.001)

In [13]:
train_acc, train_loss = train(
    model, X_train, y_train, criterion, optimizer, 5, 128
)



Epoch 1


Training: 100%|██████████| 468/468 [30:36<00:00,  3.92s/it]


Accuracy: 0.8699333333333333 | Loss: 189.93679164666403

Epoch 2


Training: 100%|██████████| 468/468 [33:34<00:00,  4.30s/it]


Accuracy: 0.8877833333333334 | Loss: 162.62234119030012

Epoch 3


Training: 100%|██████████| 468/468 [33:29<00:00,  4.29s/it]


Accuracy: 0.9019833333333334 | Loss: 142.21548088366814

Epoch 4


Training: 100%|██████████| 468/468 [38:00<00:00,  4.87s/it]   


Accuracy: 0.9115166666666666 | Loss: 127.5669516141283

Epoch 5


Training: 100%|██████████| 468/468 [32:32<00:00,  4.17s/it]

Accuracy: 0.9202666666666667 | Loss: 115.67366511924223





In [None]:
train_acc, train_loss = test(
    model, X_train, y_train, criterion, optimizer, 5, 128
)