In [24]:
import os

from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torch.utils.tensorboard import SummaryWriter
import torchvision.datasets
import torch
import PIL
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size = 128
number_of_labels = 42
learning_rate = 0.001
num_epochs = 10
classes = ('abraham_grampa_simpson',
            'agnes_skinner',
            'apu_nahasapeemapetilon',
            'barney_gumble',
            'bart_simpson',
            'carl_carlson',
            'charles_montgomery_burns',
            'chief_wiggum',
            'cletus_spuckler',
            'comic_book_guy',
            'disco_stu',
            'edna_krabappel',
            'fat_tony',
            'gil',
            'groundskeeper_willie',
            'homer_simpson',
            'kent_brockman',
            'krusty_the_clown',
            'lenny_leonard',
            'lionel_hutz',
            'lisa_simpson',
            'maggie_simpson',
            'marge_simpson',
            'martin_prince',
            'mayor_quimby',
            'milhouse_van_houten',
            'miss_hoover',
            'moe_szyslak',
            'ned_flanders',
            'nelson_muntz',
            'otto_mann',
            'patty_bouvier',
            'principal_skinner',
            'professor_john_frink',
            'rainier_wolfcastle',
            'ralph_wiggum',
            'selma_bouvier',
            'sideshow_bob',
            'sideshow_mel',
            'snake_jailbird',
            'troy_mcclure',
            'waylon_smithers')
class_encoder = {}
for i in range(len(classes)):
    class_encoder[classes[i]]=i
class CustomImageDataset(Dataset):
    def __init__(self, img_dir, transform=None, target_transform=None):
        self.img_labels = os.listdir(img_dir)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        
        img_path = os.path.join(self.img_dir,self.img_labels[idx])
        image = PIL.Image.open(img_path)
        label = self.img_labels[idx]
        class_indicator = label.rfind('_')
        class_str = label[:class_indicator]
        label = class_encoder[class_str]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label
transformations = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],std=[0.2023, 0.1994, 0.2010]),
    transforms.Resize((32,32))
])


full_dataset = torchvision.datasets.ImageFolder("./characters",transformations)
train_dataset,valid_dataset = torch.utils.data.random_split(full_dataset,[0.7, 0.3])
train_dataset, test_set = torch.utils.data.random_split(full_dataset,[0.8, 0.2])
train_loader = DataLoader(train_dataset,batch_size=batch_size,shuffle=True,num_workers=16)
print("The number of images in a training set is: ", len(train_loader)*batch_size)

test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=16)
print("The number of images in a test set is: ", len(test_loader)*batch_size)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=16)
print("The number of images in validation set is: ",len(valid_loader)*batch_size)
print("The number of batches per epoch is: ", len(train_loader))


The number of images in a training set is:  16768
The number of images in a test set is:  4224
The number of images in validation set is:  6400
The number of batches per epoch is:  131


In [25]:
import torch
import torch.nn as nn
import torchvision
import torch.nn.functional as F

class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        
        self.conv = nn.Sequential(
        nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(32),
        nn.ReLU(),
        nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU(),
        nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU(),
        nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(),
        nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(),
        nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU()
        )
        self.pool = nn.AdaptiveAvgPool2d((1,1))
        self.fc1 = nn.Linear(512, 42)

    def forward(self, input):
        output = self.conv(input)
        output = self.pool(output)
        output = output.view(-1, 512)
        output = self.fc1(output)
        return output

model = Network().to(device)

In [26]:
from torch.optim import SGD
loss_fn = nn.CrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=learning_rate, momentum=0.9)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, factor=0.1, patience=3, verbose=True, threshold=1e-2
)

In [27]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [28]:
from torch.autograd import Variable
import tqdm

def saveModel():
    path = "./simpsons.pth"
    torch.save(model.state_dict(), path)

def testAccuracy():
    
    model.eval()
    metric = torchmetrics.F1Score(task="multiclass", num_classes=42).to(device)
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            metric(predicted, labels)
    f1 = metric.compute()

    return f1

def testAccuracy_1():

    model.eval()
    accuracy = 0.0
    total = 0.0
    metric = torchmetrics.F1Score(task="multiclass", num_classes=42).to(device)
    with torch.no_grad():
        for data in valid_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            accuracy += (predicted == labels).sum().item()
            metric(predicted, labels)
    f1 = metric.compute()
    print("F1 metric: ",f1)
loss_metric =[]
recall_metric=[]
accuracy_metric=[]
lr_metric=[]

def train():
    
    best_f1 = 0.0
    print("The model will be running on", device, "device")
    comment = f' batch_size = {batch_size} lr = {learning_rate}'
    tb = SummaryWriter(comment=comment)
    for epoch in tqdm.tnrange(num_epochs,position=0,desc="Epochs"):
        losses = []
        total_correct=0;
        total_f1=0;
        for _, (images, labels) in enumerate(tqdm.tqdm_notebook(train_loader,position=1,desc="Batch iter",leave=True), 0):

            images = Variable(images.to(device))
            labels = Variable(labels.to(device))
            optimizer.zero_grad()
            outputs = model(images)
            total_correct+= get_num_correct(outputs, labels)
            loss = loss_fn(outputs, labels)
            losses.append(loss.item())
            loss.backward()
            optimizer.step()
        mean_loss = sum(losses) / len(losses)
        scheduler.step(mean_loss)
        print(f"Loss at epoch {epoch} = {mean_loss}")
        f1 = testAccuracy()
        print(f"For epoch {epoch} F1: {f1}")
        tb.add_scalar("Loss", mean_loss, epoch)
        tb.add_scalar("Correct", total_correct, epoch)
        tb.add_scalar("F1", f1, epoch)

        if f1 > best_f1:
            saveModel()
            best_f1 = f1

    grid = torchvision.utils.make_grid(images)
    tb.add_image("images", grid)
    tb.add_graph(model, images)
    tb.close()

In [29]:
import torchmetrics

def testClassess():
    metric = torchmetrics.F1Score(task="multiclass", num_classes=42,average=None).to(device)
    with torch.no_grad():
        for data in valid_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            metric(predicted, labels)
    acc = metric.compute()
    for i in range(number_of_labels):
        print(f'F1 of {classes[i]} : {acc[i]}')

In [30]:
import torchinfo


torchinfo.summary(model, depth=2, input_size=(128, 3, 32,32), row_settings=["var_names"], verbose=0, col_names=[
"input_size", "output_size", "num_params", "params_percent", "kernel_size", "mult_adds", "trainable"])

  action_fn=lambda data: sys.getsizeof(data.storage()),
  return super().__sizeof__() + self.nbytes()


Layer (type (var_name))                  Input Shape               Output Shape              Param #                   Param %                   Kernel Shape              Mult-Adds                 Trainable
Network (Network)                        [128, 3, 32, 32]          [128, 42]                 --                             --                   --                        --                        True
├─Sequential (conv)                      [128, 3, 32, 32]          [128, 512, 32, 32]        --                             --                   --                        --                        True
│    └─Conv2d (0)                        [128, 3, 32, 32]          [128, 32, 32, 32]         896                         0.01%                   [3, 3]                    117,440,512               True
│    └─BatchNorm2d (1)                   [128, 32, 32, 32]         [128, 32, 32, 32]         64                          0.00%                   --                        8,192           

In [31]:
if __name__ == "__main__":
    train()
    print('Finished Training')
    model = Network().to(device)
    path = "simpsons.pth"
    model.load_state_dict(torch.load(path))
    testClassess()
    testAccuracy_1()

The model will be running on cuda device


  for epoch in tqdm.tnrange(num_epochs,position=0,desc="Epochs"):


Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for _, (images, labels) in enumerate(tqdm.tqdm_notebook(train_loader,position=1,desc="Batch iter",leave=True), 0):


Batch iter:   0%|          | 0/131 [00:00<?, ?it/s]

Loss at epoch 0 = 3.130803936310397
For epoch 0 F1: 0.23053033649921417


Batch iter:   0%|          | 0/131 [00:00<?, ?it/s]

Loss at epoch 1 = 2.5713713059898553
For epoch 1 F1: 0.4147157073020935


Batch iter:   0%|          | 0/131 [00:00<?, ?it/s]

Loss at epoch 2 = 2.1095694403611978
For epoch 2 F1: 0.49976110458374023


Batch iter:   0%|          | 0/131 [00:00<?, ?it/s]

Loss at epoch 3 = 1.6577868761907097
For epoch 3 F1: 0.567367434501648


Batch iter:   0%|          | 0/131 [00:00<?, ?it/s]

Loss at epoch 4 = 1.3786659122423361
For epoch 4 F1: 0.6669852137565613


Batch iter:   0%|          | 0/131 [00:00<?, ?it/s]

Loss at epoch 5 = 1.1735272871628972
For epoch 5 F1: 0.7152412533760071


Batch iter:   0%|          | 0/131 [00:00<?, ?it/s]

Loss at epoch 6 = 1.0344073522181911
For epoch 6 F1: 0.6992355585098267


Batch iter:   0%|          | 0/131 [00:00<?, ?it/s]

Loss at epoch 7 = 0.8993698981882051
For epoch 7 F1: 0.785953164100647


Batch iter:   0%|          | 0/131 [00:00<?, ?it/s]

Loss at epoch 8 = 0.8210013768145146
For epoch 8 F1: 0.7914476990699768


Batch iter:   0%|          | 0/131 [00:00<?, ?it/s]

Loss at epoch 9 = 0.7244975398515017
For epoch 9 F1: 0.8050644993782043
Finished Training
F1 of abraham_grampa_simpson : 0.5903307795524597
F1 of agnes_skinner : 0.0
F1 of apu_nahasapeemapetilon : 0.5116279125213623
F1 of barney_gumble : 0.0
F1 of bart_simpson : 0.5117772817611694
F1 of carl_carlson : 0.0
F1 of charles_montgomery_burns : 0.5548996329307556
F1 of chief_wiggum : 0.5811403393745422
F1 of cletus_spuckler : 0.0
F1 of comic_book_guy : 0.3612903356552124
F1 of disco_stu : 0.0
F1 of edna_krabappel : 0.41999998688697815
F1 of fat_tony : 0.0
F1 of gil : 0.0
F1 of groundskeeper_willie : 0.0
F1 of homer_simpson : 0.5641025900840759
F1 of kent_brockman : 0.20359280705451965
F1 of krusty_the_clown : 0.7317073345184326
F1 of lenny_leonard : 0.336448609828949
F1 of lionel_hutz : 0.0
F1 of lisa_simpson : 0.6090425252914429
F1 of maggie_simpson : 0.0
F1 of marge_simpson : 0.7880299091339111
F1 of martin_prince : 0.0
F1 of mayor_quimby : 0.0
F1 of milhouse_van_houten : 0.5872340202331543