In [1]:
# CNN to classify Simpsons' Characters

In [2]:
# for kaggle
import kaggle
# for lolading imgs
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
# for ml
import torch

In [3]:
# # get and unzip dataset - only had to run once, done
# dataset = 'alexattia/the-simpsons-characters-dataset'
# kaggle.api.dataset_download_files(dataset, path=".", unzip=True)

In [4]:
training_character_folders = os.listdir('simpsons_dataset')
print(training_character_folders)

['abraham_grampa_simpson', 'agnes_skinner', 'apu_nahasapeemapetilon', 'barney_gumble', 'bart_simpson', 'carl_carlson', 'charles_montgomery_burns', 'chief_wiggum', 'cletus_spuckler', 'comic_book_guy']


In [5]:
training_images = [] # has images
labels = [] # has character names
label_to_index_map = {} # has character name to num index mapping
index = 0

for folder in training_character_folders:
    label_to_index_map[folder] = index
    print(f"At index: {index}")
    for img_file in os.listdir(os.path.join('simpsons_dataset',folder)):
        img = load_img(os.path.join('simpsons_dataset',folder,img_file),target_size=(224,224))
        img_array = img_to_array(img)
        training_images.append(img_array)
        labels.append(index)
    index += 1

training_images = np.array(training_images)
labels = np.array(labels)

print("Is np ndarray")

training_images = torch.tensor(training_images)
training_images = training_images.permute(0, 3, 1, 2) # apparently there is a size mismatch if i dont do this
# it expects: [batch_size, channels, height, width]
# and i was giving: [batch_size, height, width, channels]
labels = torch.tensor(labels).to(torch.long)

print(f"Train images size: {training_images.size()}")
print(f"Train labels size: {labels.size()}")

At index: 0
At index: 1
At index: 2
At index: 3
At index: 4
At index: 5
At index: 6
At index: 7
At index: 8
At index: 9
Is np ndarray
Train images size: torch.Size([5819, 3, 224, 224])
Train labels size: torch.Size([5819])


In [6]:
# dataset making
dataset = torch.utils.data.TensorDataset(training_images, labels)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=100, shuffle=True)

In [7]:
# cnn making
class SimpsonCnn(torch.nn.Module):
    def __init__(self, num_classes):
        super(SimpsonCnn,self).__init__()
        self.conv1 = torch.nn.Conv2d(in_channels=3, out_channels=16, stride=1, padding=1, kernel_size=3)
        self.conv2 = torch.nn.Conv2d(in_channels=16, out_channels=32, stride=1, padding=1, kernel_size=3)
        self.conv3 = torch.nn.Conv2d(in_channels=32, out_channels=64, stride=1, padding=1, kernel_size=3)
        self.pool = torch.nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
        self.fc1 = torch.nn.Linear(64*28*28, 512)
        self.fc2 = torch.nn.Linear(512,num_classes)
    def forward(self, inputs):
        layer_output = self.pool(torch.relu(self.conv1(inputs)))
        # new size after conv1 = floor((n-f+2p)/s) + 1 => floor((n-3+2)/1) + 1 => floor(n-1) + 1 => n
        # new size after pooling = floor((n-f+2p)/s) + 1 => floor((n-2+0)/2) + 1 => floor((224-2)/2) + 1 => floor(111) + 1 = 112
        # n = 112 now
        layer_output = self.pool(torch.relu(self.conv2(layer_output)))
        # new size after conv2 = floor((n-f+2p)/s) + 1 => floor((n-3+2)/1) + 1 => floor(n-1) + 1 => n
        # new size after pooling = floor((n-f+2p)/s) + 1 => floor((n-2+0)/2) + 1 => floor((112-2)/2) + 1 => floor(110) + 1 = 56
        # n = 56 now
        layer_output = self.pool(torch.relu(self.conv3(layer_output)))
        # new size after conv3 = floor((n-f+2p)/s) + 1 => floor((n-3+2)/1) + 1 => floor(n-1) + 1 => n
        # new size after pooling = floor((n-f+2p)/s) + 1 => floor((n-2+0)/2) + 1 => floor((56-2)/2) + 1 => floor(27) + 1 = 28
        # n = 28 now
        # each input of form 28x28x64 rn, 3d and then total inputs = 5819 so 4d
        layer_output = layer_output.view(-1,64*28*28) # reshape to 2 dim of size = (total_input_size, 28*28*64)
        layer_output = torch.relu(self.fc1(layer_output))
        layer_output = self.fc2(layer_output) # dont apply activation here on last
        return layer_output

In [8]:
model = SimpsonCnn(index)
loss_function = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [9]:
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for inputs, labels in dataloader:
        optimizer.zero_grad()
        predicted_output = model.forward(inputs)
        loss = loss_function(predicted_output, labels)
        loss.backward()
        optimizer.step()
    print(f"In epoch {epoch} the loss is {loss.item():.4f}")

In epoch 0 the loss is 2.2429
In epoch 1 the loss is 2.1461
In epoch 2 the loss is 0.8479
In epoch 3 the loss is 0.5689
In epoch 4 the loss is 0.1808
In epoch 5 the loss is 0.1064
In epoch 6 the loss is 0.0072
In epoch 7 the loss is 0.0118
In epoch 8 the loss is 0.0343
In epoch 9 the loss is 0.0120


In [10]:
test_labels = []
test_imgs = []
for img_file in os.listdir('kaggle_simpson_testset/kaggle_simpson_testset'):
    name = img_file.split('_')
    label = '_'.join(name[:-1])
    index = label_to_index_map[label]
    img = load_img(os.path.join('kaggle_simpson_testset/kaggle_simpson_testset',img_file),target_size=(224,224))
    img_array = img_to_array(img)
    # print(f"{name[:-1]} gets label {label} and index {index}")
    test_imgs.append(img_array)
    test_labels.append(index)

test_imgs = np.array(test_imgs)
test_labels = np.array(test_labels)

test_imgs = torch.tensor(test_imgs)
test_labels = torch.tensor(test_labels).to(torch.long)

test_imgs = test_imgs.permute(0,3,1,2) # reshape

print(f"Test imgs dims: {test_imgs.size()}")
print(f"Test labels dims: {test_labels.size()}")

Test imgs dims: torch.Size([295, 3, 224, 224])
Test labels dims: torch.Size([295])


In [11]:
dataset = torch.utils.data.TensorDataset(test_imgs, test_labels)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=100, shuffle=False)

In [14]:
model.eval()
correct = total = 0

with torch.no_grad():
    for inputs, labels in dataloader:
        predicted_output = model.forward(inputs)
        _, predicted_output = torch.max(predicted_output.data,1)
        total += labels.size(0) # increment by batch_size
        correct += (labels == predicted_output).sum().item()

accuracy = correct/total
print(f"Testing Accuracy: {accuracy:.2f}")

Testing Accuracy: 0.97
