Dataset: https://www.kaggle.com/datasets/tongpython/cat-and-dog

## Requirements:

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch, torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms

In [2]:
# LeNet was created by AT&T, named after Yann LeCun
# Image size is resized to 32x32, as per CIFAR10 size
# The first linear layer must be sized based on the size of the kernels, and the size of the images used to train/test after resizing

class LeNet(nn.Module):

    def __init__(self):
        super(LeNet, self).__init__()
        # 3 input image channel, 6 output feature maps and 5x5 conv kernel
        self.cn1 = nn.Conv2d(3, 6, 5) # (32-5+0)/1 + 1 => 28 * 28 * 6
        self.maxpool1 = nn.MaxPool2d(2)
        # After pooling, 28/2 => 14 * 14 * 6
        # 6 input image channel, 16 output feature maps and 5x5 conv kernel
        self.cn2 = nn.Conv2d(6, 16, 5) # (14-5+0)/1 + 1 = 10 * 10 * 16
        self.maxpool2 = nn.MaxPool2d(2)
        # fully connected layers of size 120, 84 and 10
        # After pooling, 10/2 => 5 * 5 * 16
        self.flatten1 = nn.Flatten()
        self.fc1 = nn.Linear(16 * 5 * 5, 120)  # 5*5 is the spatial dimension at this layer
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Convolution with 5x5 kernel
        x = F.relu(self.cn1(x))
        # Max pooling over a (2, 2) window
        #x = F.max_pool2d(x, (2, 2))
        x = self.maxpool1(x)
        # Convolution with 5x5 kernel
        x = F.relu(self.cn2(x))
        # Max pooling over a (2, 2) window
        #x = F.max_pool2d(x, (2, 2))
        x = self.maxpool2(x)
        # Flatten spatial and depth dimensions into a single vector
        #x = x.view(-1, self.flattened_features(x))
        x = self.flatten1(x)
        # Fully connected operations
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def flattened_features(self, x):
        # all except the first (batch) dimension
        size = x.size()[1:]  
        num_feats = 1
        for s in size:
            num_feats *= s
        return num_feats

# Making the code device-agnostic
device = 'cuda' if torch.cuda.is_available() else 'cpu'

lenet = LeNet()
lenet.to(device)
print(lenet)

LeNet(
  (cn1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (cn2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (flatten1): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [9]:
def train(net, trainloader, optim, epoch):
    # initialize loss
    loss_total = 0.0
    final_loss_total = 0.0
    count = 0
    
    for i, data in enumerate(trainloader, 0):
        count += 1
        # get the inputs; data is a list of [inputs, labels]
        # ip refers to the input images, and ground_truth refers to the output classes the images belong to
        ip, ground_truth = data
        ip = ip.to(device)
        ground_truth = ground_truth.to(device)

        # zero the parameter gradients
        optim.zero_grad()

        # forward pass + backward pass + optimization step
        op = net(ip)
        loss = nn.CrossEntropyLoss()(op, ground_truth)        
        loss.backward()
        optim.step()

        # update loss
        loss_total += loss.item()
        final_loss_total += loss.item()
        
        # print loss statistics
        if (i+1) % 50 == 0:    # print at the interval of 50 mini-batches
            print('[Epoch number : %d, Mini-batches: %5d] loss: %.3f' %
                  (epoch + 1, i + 1, loss_total / (50)))
            loss_total = 0.0

    return final_loss_total / count

def test(net, testloader):
    success = 0
    counter = 0
    count = 0
    loss_total = 0.0
    with torch.no_grad():
        for data in testloader:
            count += 1
            im, ground_truth = data

            im = im.to(device)
            ground_truth = ground_truth.to(device)
            
            op = net(im)
            _, pred = torch.max(op.data, 1)
            loss = nn.CrossEntropyLoss()(op, ground_truth)
            # update loss
            loss_total += loss.item()
            
            counter += ground_truth.size(0)
            success += (pred == ground_truth).sum().item()

    accuracy = 100 * success / counter
    print(f"LeNet accuracy on {len(testloader.dataset)} images from test dataset: {accuracy}")
    # Return loss
    return loss_total / count, accuracy

In [10]:
train_transform = transforms.Compose([transforms.RandomHorizontalFlip(),
                                      transforms.Resize(size=(32,32), antialias=True),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

train_dataset = torchvision.datasets.ImageFolder(root="catdog/training_set", transform=train_transform)

trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)

test_transform = transforms.Compose([transforms.Resize(size=(32,32), antialias=True), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

test_dataset = torchvision.datasets.ImageFolder(root="catdog/test_set", transform=test_transform)

testloader = torch.utils.data.DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False)

classes = ('cat', 'dog')

# Print labels
print(f"Number of training data items: {len(train_dataset)}") 
print(f"Number of test data items: {len(test_dataset)}") 
print(f"Number of cat labels in training: {sum(1 for i in train_dataset.targets if i == 0)}")
print(f"Number of dog labels in training: {sum(1 for i in train_dataset.targets if i == 1)}")
print(f"Number of cat labels in test: {sum(1 for i in test_dataset.targets if i == 0)}")
print(f"Number of dog labels in test: {sum(1 for i in test_dataset.targets if i == 1)}")

Number of training data items: 8005
Number of test data items: 2023
Number of cat labels in training: 4000
Number of dog labels in training: 4005
Number of cat labels in test: 1011
Number of dog labels in test: 1012


In [11]:
# define optimizer
optim = torch.optim.Adam(lenet.parameters(), lr=0.001)

training_loss = []
val_loss = []
val_acc = []
epochs = 2

# training loop over the dataset multiple times
for epoch in range(epochs):  
    t_loss = train(lenet, trainloader, optim, epoch)
    training_loss.append(t_loss)
    print() # Newline
    v_loss, v_acc = test(lenet, testloader)
    val_loss.append(v_loss)
    val_acc.append(v_acc)
    print()

print('Finished Training')
print(training_loss)
print(val_loss)
print(val_acc)

[Epoch number : 1, Mini-batches:    50] loss: 0.642
[Epoch number : 1, Mini-batches:   100] loss: 0.593
[Epoch number : 1, Mini-batches:   150] loss: 0.612
[Epoch number : 1, Mini-batches:   200] loss: 0.611
[Epoch number : 1, Mini-batches:   250] loss: 0.617

LeNet accuracy on 2023 images from test dataset: 65.79337617399901

[Epoch number : 2, Mini-batches:    50] loss: 0.593
[Epoch number : 2, Mini-batches:   100] loss: 0.579
[Epoch number : 2, Mini-batches:   150] loss: 0.592
[Epoch number : 2, Mini-batches:   200] loss: 0.594
[Epoch number : 2, Mini-batches:   250] loss: 0.567

LeNet accuracy on 2023 images from test dataset: 70.39050914483441

Finished Training
[0.614186506584821, 0.5849051323544932]
[0.61527419090271, 0.5782939195632935]
[65.79337617399901, 70.39050914483441]


In [12]:
success = 0
counter = 0
with torch.no_grad():
    for data in testloader:
        im, ground_truth = data

        # Move tensors to GPU for doing model testing
        im = im.to(device)
        ground_truth = ground_truth.to(device)
        
        op = lenet(im)        
        _, pred = torch.max(op.data, 1)
        counter += ground_truth.size(0)
        success += (pred == ground_truth).sum().item()

print(f'Model accuracy on {len(testloader.dataset)} images from test dataset: %d %%' % (
    100 * success / counter))

Model accuracy on 2023 images from test dataset: 70 %


In [13]:
class_success = list(0. for i in range(2))
class_counter = list(0. for i in range(2))

with torch.no_grad():
    for data in testloader:
        im, ground_truth = data

        # Move tensors to GPU for doing model testing
        im = im.to(device)
        
        op = lenet(im)
        _, pred = torch.max(op, 1)

        # Move tensors back to CPU
        pred = pred.cpu()
        im = im.cpu()
        ground_truth = ground_truth.cpu()
        
        c = (pred == ground_truth).squeeze()
        for i in range(2023):
            ground_truth_curr = ground_truth[i]
            class_success[ground_truth_curr] += c[i].item()
            class_counter[ground_truth_curr] += 1

for i in range(2):
    print('Model accuracy for class %5s : %2d %%' % (
        classes[i], 100 * class_success[i] / class_counter[i]))

Model accuracy for class   cat : 76 %
Model accuracy for class   dog : 64 %


In [14]:
loss, accuracy = test(lenet, testloader)
print(loss)
print(accuracy)

LeNet accuracy on 2023 images from test dataset: 70.39050914483441
0.5782939195632935
70.39050914483441
