# LeNet-5 
cnn architecture to classify numbers from 0 to 999

In [5]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from matplotlib import pyplot as plt
from torch.utils.data import Dataset
import random
from PIL import Image

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [39]:
# Define a CNN to classify the images
import torch.nn as nn
import torch.nn.functional as F


class LeNet5(nn.Module):
    def __init__(self):
        super().__init__()
        # first convolutional block
        # initial size = 1x32x32
        self.conv_block1 = nn.Sequential(nn.Conv2d(in_channels= 1, out_channels=32, kernel_size=5, stride=1, padding=2), #32x32x32 [(W−K+2P)/S]+1
                                         nn.Tanh(),
                                         nn.MaxPool2d(2, 2)) #32x16x16
        # second convolutional block
        self.conv_block2 = nn.Sequential(nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2), #64x16x16
                                         nn.Tanh(),
                                         nn.MaxPool2d(2, 2)) #64x8x8
        # fully connected blocks
        self.fc1 = nn.Flatten(1)
        self.fc2 = nn.Sequential(nn.Linear(64 * 8 * 8, 3072),
                                nn.Tanh())
        self.fc3 = nn.Sequential(nn.Linear(3072, 2048),
                                 nn.Tanh())
        self.fc4 = nn.Linear(2048, 1000)

    def forward(self, x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        x = self.fc4(x)
        return x


net = LeNet5().to(device)
print(net)

LeNet5(
  (conv_block1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): Tanh()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): Tanh()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Flatten(start_dim=1, end_dim=-1)
  (fc2): Sequential(
    (0): Linear(in_features=4096, out_features=3072, bias=True)
    (1): Tanh()
  )
  (fc3): Sequential(
    (0): Linear(in_features=3072, out_features=2048, bias=True)
    (1): Tanh()
  )
  (fc4): Linear(in_features=2048, out_features=1000, bias=True)
)


In [8]:
# define Loss and Optimizer
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [None]:
class DynamicWMNIST(torchvision.datasets.MNIST):
    def __init__(self, root="./data", train=True, min_digits=1, max_digits=3, dataset_size=500000, transform=None, download=True):
        super().__init__(root=root, train=train, transform=transform, download=download)

        self.min_digits = min_digits
        self.max_digits = max_digits
        self.dataset_size = dataset_size
        self.transform = transform

        self.label_to_indices = {
            i: torch.where(self.targets == i)[0] for i in range(10)
        }

    def __len__(self):
        return self.dataset_size
    
    def __getitem__(self, idx):
        n_digits = random.randint(self.min_digits,self.max_digits)
        number = random.randint(0 if n_digits == 1 else 10**(n_digits-1), 10**n_digits - 1)
        digits = list(str(number))

        digit_images = []
        for d in digits:
            label = int(d)
            indices = self.label_to_indices[label]
            chosen_idx = indices[torch.randint(len(indices), (1,)).item()]
            img = self.data[chosen_idx]
            img = img.unsqueeze(0)
            digit_images.append(img)
        concat_img = torch.cat(digit_images, dim=2)

        if self.transform:
            concat_img = self.transform(concat_img)

        return concat_img, number


In [None]:
mnist_mean = 0.1307
mnist_std = 0.3081

batch_size_train = 64

transform_train = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize(mnist_mean, mnist_std)
])

to_pil = transforms.ToPILImage()

def denormalize_train(img):
    img = img * mnist_std
    return img + mnist_mean

In [None]:
train_w_dataset = DynamicWMNIST(transform=transform_train)
train_w_dataloader = torch.utils.data.DataLoader(
    train_w_dataset,
    batch_size=batch_size_train,
    shuffle=True
    
)

In [None]:
# TRAIN!
# put net into train mode
net.train()
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_w_dataloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        # put data on correct device
        labels = torch.tensor(labels, dtype=torch.long, device=device)
        inputs, labels = inputs.to(device), labels.to(device)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 50 == 49:    # print every 50 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 50:.3f}')
            running_loss = 0.0

print('Finished Training')

  labels = torch.tensor(labels, dtype=torch.long, device=device)


[1,    50] loss: 6.914
[1,   100] loss: 6.880
[1,   150] loss: 6.836
[1,   200] loss: 6.779
[1,   250] loss: 6.701
[1,   300] loss: 6.571
[1,   350] loss: 6.421
[1,   400] loss: 6.215
[1,   450] loss: 6.039
[1,   500] loss: 5.960
[1,   550] loss: 5.751
[1,   600] loss: 5.590
[1,   650] loss: 5.607
[1,   700] loss: 5.484
[1,   750] loss: 5.450
[1,   800] loss: 5.317
[1,   850] loss: 5.323
[1,   900] loss: 5.279
[1,   950] loss: 5.229
[1,  1000] loss: 5.267
[1,  1050] loss: 5.152
[1,  1100] loss: 5.077
[1,  1150] loss: 5.168
[1,  1200] loss: 5.082
[1,  1250] loss: 5.000
[1,  1300] loss: 4.983
[1,  1350] loss: 4.914
[1,  1400] loss: 4.932
[1,  1450] loss: 4.875
[1,  1500] loss: 4.887
[1,  1550] loss: 4.825
[1,  1600] loss: 4.759
[1,  1650] loss: 4.753
[1,  1700] loss: 4.716
[1,  1750] loss: 4.731
[1,  1800] loss: 4.608
[1,  1850] loss: 4.618
[1,  1900] loss: 4.610
[1,  1950] loss: 4.497
[1,  2000] loss: 4.559
[1,  2050] loss: 4.545
[1,  2100] loss: 4.505
[1,  2150] loss: 4.539
[1,  2200] 

In [None]:
# if you want to save the model
PATH = './res/LeNet5_1.pth'
torch.save(net.state_dict(), PATH)

In [40]:
# if you want to load the model
PATH = './res/LeNet5_1.pth'
net.load_state_dict(torch.load(PATH, device))

<All keys matched successfully>

In [41]:
transformW = torchvision.transforms.Compose([
    torchvision.transforms.Resize((32,32)),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mnist_mean, mnist_std)
])

def denormalizeW(img):
    img = img * mnist_std
    return img + mnist_mean

In [42]:
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T

class testMNISTDataset(Dataset):
    def __init__(self, root, transform=None):
        self.transform = transform
        self.samples = []
        self.class_to_idx = {}
        
        # leggo le cartelle e ordino numericamente
        classes = sorted([d for d in os.listdir(root) if os.path.isdir(os.path.join(root, d))], key=int)
        self.class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
        
        # creo lista (path_file, label)
        for cls_name in classes:
            cls_idx = self.class_to_idx[cls_name]
            folder_path = os.path.join(root, cls_name)
            files = sorted(os.listdir(folder_path), key = lambda x: int(x.removesuffix('.png')))
            for fname in files:
                self.samples.append((os.path.join(folder_path, fname), cls_idx))

    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        path, label = self.samples[idx]
        img = Image.open(path)
        if self.transform:
            img = self.transform(img)
        return img, label

In [43]:
testWMNIST = testMNISTDataset('./data/testWMNIST/', transformW)

test_dataloaderW = DataLoader(testWMNIST, # dataset to iterate
                              batch_size=1000, # how many images to load every iteration
                              shuffle=False) # sample data

In [45]:
# now lets evaluate the model on the test set
correct = 0
total = 0

# put net into evaluation mode
net.eval()

# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for i, data in enumerate(test_dataloaderW):
        if i%10 == 0:
            print(i)
        inputs, labels = data
        # put data on correct device
        inputs, labels = inputs.to(device), labels.to(device)
        # calculate outputs by running images through the network
        outputs = net(inputs)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the {total} test images: {100 * correct // total} %')

0
10
20
30
40
Accuracy of the network on the 45330 test images: 90 %


In [52]:
# now lets evaluate the model on the test set
correct = 0
total = 0
err = 0
# put net into evaluation mode
net.eval()

# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for i, data in enumerate(test_dataloaderW):
        if i%10 == 0:
            print(i)
        inputs, labels = data
        # put data on correct device
        inputs, labels = inputs.to(device), labels.to(device)
        # calculate outputs by running images through the network
        outputs = net(inputs)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        for p, l in zip(predicted, labels):
            if p != l:
                err += 1
                print(f'predicted: {p} label = {l}')
print(f'Accuracy of the network on the {total} test images: {100 * correct // total} %')
print(f'err: {err}')

0
predicted: 13 label = 0
predicted: 6 label = 0
predicted: 7 label = 0
predicted: 6 label = 0
predicted: 7 label = 0
predicted: 6 label = 0
predicted: 7 label = 0
predicted: 7 label = 0
predicted: 8 label = 0
predicted: 3 label = 0
predicted: 6 label = 0
predicted: 8 label = 0
predicted: 7 label = 0
predicted: 3 label = 0
predicted: 8 label = 1
predicted: 6 label = 1
predicted: 2 label = 1
predicted: 8 label = 1
predicted: 5 label = 1
predicted: 6 label = 1
predicted: 8 label = 1
predicted: 2 label = 1
predicted: 8 label = 1
predicted: 8 label = 1
predicted: 8 label = 1
predicted: 6 label = 1
predicted: 7 label = 1
predicted: 8 label = 1
predicted: 6 label = 1
predicted: 8 label = 1
predicted: 8 label = 1
predicted: 5 label = 1
predicted: 8 label = 1
predicted: 3 label = 2
predicted: 0 label = 2
predicted: 8 label = 2
predicted: 7 label = 2
predicted: 0 label = 2
predicted: 0 label = 2
predicted: 7 label = 2
predicted: 7 label = 2
predicted: 4 label = 2
predicted: 1 label = 2
predicte