In [41]:
#Library imports
import os
import pandas as pd
import numpy as np

import torch
import torch.nn as nn  
import torch.optim as optim  
import torchvision.transforms as transforms  
import torchvision
from torch.utils.data import (Dataset,DataLoader) 

from PIL import Image
from skimage import io

import time

In [42]:
directory = os.getcwd() # Path of the current working directory.
print(directory)
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f" {device} being used")

C:\Users\Andreas\Desktop\AMLS_22-23_SN18086046
 cpu being used


In [43]:
num_epochs = 5 #complete passess over the data set
num_classes = 5 #five types of face shapes/eye color
batch_size = 200 #data must be loaded in batches for more efficient training (high batch size can lead to memory overload)
learning_rate = 0.01

In [44]:
class cartoonDataset(Dataset):
    def __init__(self,csv_file,root_dir,transform=None):
        self.annotations=pd.read_csv(csv_file)
        self.root_dir=root_dir
        self.transform=transform
        pass
  
    def __len__(self):
        
        return len(self.annotations)
        pass
    def __getitem__(self, index): #PyTorch chooses the index
        img_path=os.path.join(self.root_dir, self.annotations.iloc[index,0]) #row 'index' and column 0
        image=io.imread(img_path)
        y_label=torch.tensor(int(self.annotations.iloc[index,1]))
        yt=y_label.type(torch.LongTensor)
        if self.transform:
            image=self.transform(image)
        return (image,yt)
        
        pass

In [49]:
#reduce image size
tf=transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((250,250)), #from [500,500]
    transforms.ToTensor()
])
dataset=cartoonDataset(csv_file='dataset_AMLS_22-23/cartoon_set/face_shape_labels.csv',
                      root_dir='dataset_AMLS_22-23/cartoon_set/img',transform=tf)                                                                                            
                                                                                            
#split the images into test and validation sets
partial_dataset,discard_dataset=torch.utils.data.random_split(dataset,[5000,5000]) #get 5000 random images
train_dataset,validation_dataset=torch.utils.data.random_split(partial_dataset,[4000,1000]) #split the images 
train_loader=DataLoader(dataset=train_dataset,batch_size=200,shuffle=True)
validation_loader=DataLoader(dataset=validation_dataset,batch_size=200,shuffle=False)
#shuffle=True to ensure our model is not biased for some categories 
print('Train dataset size:', len(train_dataset)) #must give 4000
print('Validation dataset size:',len(validation_dataset)) #must give 1000
print(batch_size)

Train dataset size: 4000
Validation dataset size: 1000
200


In [50]:
#CNN model
class convNet(nn.Module):
    def __init__(self,num_classes=5):
        super(convNet,self).__init__()
        
        #Input shape =(batch_size,RGB_channel,Image dimensions)=(200,3,500,500)
        #The first convolutional layer
        self.conv1=nn.Conv2d(in_channels=4,out_channels=12,kernel_size=3,stride=1,padding=1)
        #New shape from formula (width-kernel+2P)/s +1 ->Shape=(200,12,250,250)
        self.bn1=nn.BatchNorm2d(num_features=12) #normalisation
        #Shape=(200,12,250,250)
        self.relu1=nn.ReLU()
        #Shape=(200,12,500,500)
        
        self.pool=nn.MaxPool2d(kernel_size=2)
        #Reduce the image size be factor 2
        #Shape= (200,12,250,250)
        
        #second convolutional layer
        self.conv2=nn.Conv2d(in_channels=12,out_channels=20,kernel_size=3,stride=1,padding=1)
        #Shape= (100,20,89,109)
        self.relu2=nn.ReLU()
        #Shape= (100,20,89,109)
        
        
        #Third convolutional layer
        self.conv3=nn.Conv2d(in_channels=20,out_channels=32,kernel_size=3,stride=1,padding=1)
        #Shape= (100,32,89,109)
        self.bn3=nn.BatchNorm2d(num_features=32)
        #Shape= (100,32,89,109)
        self.relu3=nn.ReLU()
        #Shape= (100,32,89,109)
        
        #fully connected layer
        self.fc=nn.Linear(in_features=125*125* 32,out_features=num_classes)
        
        
        #feed forward function
    def forward(self,input):
        output=self.conv1(input)
        output=self.bn1(output)
        output=self.relu1(output)
            
        output=self.pool(output)
            
        output=self.conv2(output)
        output=self.relu2(output)
        
            
        output=self.conv3(output)
        output=self.bn3(output)
        output=self.relu3(output)
            
            
        #Above output will be in matrix form, with shape (100,32,112,112)
            
        output=output.view(-1,32*125*125)
            
            
        output=self.fc(output)
            
        return output

In [51]:
model=convNet(num_classes=5).to(device) #send it to cuda/cpu

#Loss and optimizer functions
optimizer=torch.optim.Adam(model.parameters(),lr=0.001,weight_decay=0.0001)
criterion=nn.CrossEntropyLoss()

In [52]:
# Train Network
start_time=time.time()

for epoch in range(num_epochs):
    losses = []

    for batch_idx, (data, targets) in enumerate(train_loader):
        # Get data to cuda if possible
        data = data.to(device=device)
        targets = targets.to(device=device)

        # forward pass (Evaluation)
        output = model(data)
        loss = criterion(output, targets)

        losses.append(loss.item())

        # backward pass (Optimization)
        optimizer.zero_grad()
        loss.backward()

        # adam step
        optimizer.step()

    print(f"Cost at epoch {epoch} is {sum(losses)/len(losses)}")

# Check accuracy on training to see how good our model is
def check_accuracy(loader, model):
    num_correct = 0
    num_total= 0
    
    model.eval()
    with torch.no_grad():
        for images, labels in loader:
            images = images.to(device=device)
            labbels = labels.to(device=device)

            output = model(images)
            _, predictions = output.max(1)
            num_correct += (predictions == labels).sum()
            num_total += predictions.size(0)

        print( f" {num_correct} / {num_total} correct images with accuracy {float(num_correct)/float(num_total)*100:.2f}%")

    model.train()


print("Training set accuracy....")
check_accuracy(train_loader, model)
    
print("Validation set accuracy....")
check_accuracy(validation_loader, model)  

elapsed_time=time.time()
print('Time taken:',(elapsed_time-start_time),'seconds')

Cost at epoch 0 is 46.87455826997757
Cost at epoch 1 is 2.733470377326012
Cost at epoch 2 is 0.3335039782919921
Cost at epoch 3 is 0.07996633845614269
Cost at epoch 4 is 0.028385126073385437
Training set accuracy....
 3989 / 4000 correct images with accuracy 99.72%
Validation set accuracy....
 993 / 1000 correct images with accuracy 99.30%
Time taken: 1484.8630084991455 seconds
