# Classifier

cnn architecture to classify numbers from 0 to 999

this classifier uses a dataset made of concatenated images on the horizontal axis (width)

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
import torch.nn.functional as F

In [None]:
import utils_datasets
import utils
import importlib
importlib.reload(utils)
importlib.reload(utils_datasets)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

### Model

In [None]:
class LeNet5(nn.Module):
    def __init__(self):
        super().__init__()
        # first convolutional block
        # initial size = 1x32x32
        self.conv_block1 = nn.Sequential(nn.Conv2d(in_channels= 1, out_channels=32, kernel_size=5, stride=1, padding=2), #32x32x32 [(W−K+2P)/S]+1
                                         nn.BatchNorm2d(32),
                                         nn.ReLU(),
                                         nn.MaxPool2d(2, 2)) #32x16x16
        # second convolutional block
        self.conv_block2 = nn.Sequential(nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2), #64x16x16
                                         nn.BatchNorm2d(64),
                                         nn.ReLU(),
                                         nn.MaxPool2d(2, 2)) #64x8x8
        # third convolutional block
        self.conv_block3 = nn.Sequential(nn.Conv2d(in_channels=64, out_channels=256, kernel_size=5, stride=1, padding=2), #256x8x8
                                         nn.BatchNorm2d(256),
                                         nn.ReLU(),
                                         nn.MaxPool2d(2, 2)) #256x4x4
        # fully connected blocks
        self.fc1 = nn.Flatten(1)
        self.fc2 = nn.Sequential(
            nn.Linear(256 * 4 * 4, 2048),
            nn.ReLU(),
            nn.Dropout(p=0.5))
        self.fc3 = nn.Sequential(
            nn.Linear(2048, 1024),
            nn.ReLU(),
            nn.Dropout(p=0.5))
        self.fc4 = nn.Linear(1024, 1000)

    def forward(self, x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = self.conv_block3(x)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        x = self.fc4(x)
        return x


net = LeNet5().to(device)
print(net)

### TensorBoard setup

In [None]:
# creating a list containing all the classes
classes = []
for i in range (0,1000):
    classes.append(str(i))

# log directory in which tensorboard save statistics
writer = SummaryWriter('./runs/large_mnist')
writer.close()


### Training

In [None]:
# define Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

In [None]:
train_batch_size = 64

# transform to apply to the train dataset to retrieve data
transform_train = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize(utils_datasets.mnist_mean, utils_datasets.mnist_std)
])

# function used to denormalize img before plot it 
def denormalize(img, std, mean):
    img = img * std
    return img + mean

defining the dataset and dataloader from which the data will be extracted

In [None]:
train_dataset = utils_datasets.TrainDatasetW(transform=transform_train, dataset_size=150000)
train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=train_batch_size,
    shuffle=True
)

training

In [None]:
# TRAIN!
# number of mini-batches after which statistics are printed
mini_batch_size = 50
# put net into train mode
net.train()
for epoch in range(10):  # loop over the dataset multiple times
    # variable to store loss accumulation in mini_batch_size mini-batches
    running_loss = 0.0
    # number of correct prediction in mini_batch_size mini-batches
    correct = 0.0

    for i, data in enumerate(train_dataloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        # put data on correct device
        inputs, labels = inputs.to(device), labels.to(device)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # computing loss for a set of mini-batches in order to show live statistics during training 
        running_loss += loss.item()

        # print statistics
        correct += utils.get_num_correct(outputs, labels)
        if i % mini_batch_size == mini_batch_size-1:    # print every mini_batch_size mini-batches
            # log the gradients
            writer.add_figure('gradients',
                            utils.add_gradient_hist(net),
                            global_step=epoch * len(train_dataloader) + i + 1)
            
            # log the running loss
            writer.add_scalar('training loss',
                            running_loss / mini_batch_size,
                            epoch * len(train_dataloader) + i + 1)
            # log the training accuracy
            writer.add_scalar('training accuracy',
                            correct / (mini_batch_size*inputs.size(0)),
                            epoch * len(train_dataloader) + i + 1)

            print('[Epoch, It]: [{},{}] Loss: {} Accuracy: {}'.format(epoch + 1, epoch * len(train_dataloader) + i + 1, running_loss / mini_batch_size, correct / (mini_batch_size*inputs.size(0))))

            # log prediction vs. real labels comparison on random mini-batches
            writer.add_figure('predictions vs. actuals',
                            utils.plot_classes_preds(net, inputs, labels, classes, denormalize),
                            global_step=epoch * len(train_dataloader) + i + 1)
            
            # resetting live statistics to zero before the next set of mini-batches 
            running_loss = 0.0
            correct = 0.0  

writer.close()
print('Finished Training')

### Save/Load the model

In [None]:
# Save the model
PATH = './res/LeNet5W_7.pth'
torch.save(net.state_dict(), PATH)

In [None]:
# Load the model
PATH = './res/LeNet5W_7.pth'
net.load_state_dict(torch.load(PATH, device))

### Test

In [None]:
test_batch_size = 1000

# transform to apply to the test dataset to retrieve data
transform_test = torchvision.transforms.Compose([
    torchvision.transforms.Resize((32,32)),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(utils_datasets.mnist_mean, utils_datasets.mnist_std)
])

defining the dataset and dataloader from which the data will be extracted

In [None]:
test_dataset = utils_datasets.TestDataset('./data/testW/', transform_test)
test_dataloader = DataLoader(test_dataset,
                              test_batch_size,
                              shuffle=False)

testing

In [None]:
# number of correct prediction
correct = 0.0 
# total number of prediction
total = 0.0
# number of correct predition every 5 mini-batches
running_accuracy = 0.0
# number of mini-batches after which statistics are printed
mini_batch_size = 5
# put net into evaluation mode
net.eval()

with torch.no_grad():
    for i, data in enumerate(test_dataloader):
        inputs, labels = data
        # put data on correct device
        inputs, labels = inputs.to(device), labels.to(device)
        # calculate outputs by running images through the network
        outputs = net(inputs)
        # picking the class with the highest value as prediction
        _, predicted = torch.max(outputs.data, 1)
        # updating the stats
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        running_accuracy += (predicted == labels).sum().item()

        # print statistics
        if i % mini_batch_size == mini_batch_size-1:    # print every mini_batch_size mini-batches
            writer.add_scalar(
                'testing accuracy',
                running_accuracy / (mini_batch_size*labels.size(0)),
                i+1
            )

            print(f'[batch {i + 1:5d}] accuracy: {100 * running_accuracy // (mini_batch_size * labels.size(0))} %')
            running_accuracy = 0.0

writer.close()
print(f'Accuracy of the network on the {total} test images: {100 * correct // total} %')

In [None]:
# number of correct prediction
correct = 0.0 
# total number of prediction
total = 0.0
# number of correct predition every 5 mini-batches
running_accuracy = 0.0
# total erroneous prediction
err = 0
# erroneous prediction on one digit numbers
unit_err = 0
# erroneous prediction on two digit numbers
dec_err = 0
# erroneous prediction on three digit numbers
cent_err = 0
# number of mini-batches after which statistics are printed
mini_batch_size = 5
# put net into evaluation mode
net.eval()

with torch.no_grad():
    for i, data in enumerate(test_dataloader):
        inputs, labels = data
        # put data on correct device
        inputs, labels = inputs.to(device), labels.to(device)
        # calculate outputs by running images through the network
        outputs = net(inputs)
        # picking the class with the highest value as prediction
        _, predicted = torch.max(outputs.data, 1)
        # updating the stats
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        running_accuracy += (predicted == labels).sum().item()
        for p, l in zip(predicted, labels):
            if p != l:
                err += 1
                if l < 10:
                    unit_err +=1
                elif l < 100:
                    dec_err +=1
                else: 
                    cent_err +=1

        # print the statistics
        if i % mini_batch_size == mini_batch_size-1:    # print every mini_batch_size mini-batches
            print(f'[batch {i + 1:5d}] accuracy: {100 * running_accuracy // (mini_batch_size * labels.size(0))} %')
            print(f'[batch {i + 1:5d}] total error: {err}, unit_err: {unit_err}, dec_err: {dec_err}, cent_err: {cent_err}')
            running_accuracy = 0.0
            
print(f'Accuracy of the network on the {total} test images: {100 * correct // total} %')
print(f'err: {err}, unit_err: {unit_err}, dec_err: {dec_err}, cent_err: {cent_err}')

### Computing precision-recall curve for each class

In [None]:
class_probs = []
class_label = []
with torch.no_grad():
    # for each sample in the test dataset
    for data in test_dataloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        output = net(images)
        # calculating the probability of a sample to belong the predicted class
        class_probs_batch = [F.softmax(el.cpu(), dim=0) for el in output]

        # creating two parallel list, containing the sample probability of belonging to the predicted class 
        # and the actual sample class respectively
        class_probs.append(class_probs_batch)
        class_label.append(labels.cpu())

# converting the lists into tensors
test_probs = torch.cat([torch.stack(batch) for batch in class_probs])
test_label = torch.cat(class_label)

# plot the pr curves for all the classes
for i in range(len(classes)):
    utils.add_pr_curve_tensorboard(i, test_probs, test_label, classes, writer)

In [None]:
utils.predict_img('./data/TestW/797/7.png', net, transform_test)