Loading Dataset and Preprocessing

In [32]:
#loading files 

import os
import re
import cv2
from keras.preprocessing.image import img_to_array 

#images 
images = []
#automobile names 
automobile = [] 
#path to image 
images_path = []
#pixel values of images 
images_pixels = []
#label numerical value associated to image 
labels = []
dict = {}

i=0 
path = ["./Bus", "./Car", "./Bicycle", "./Motorcycle"] 
# Bus - 1229 images 
# Car - 3578 images 
# Bicycle - 800 images
# Motorcycle - 101 images
# Total - 5708

for dir_path in path: 
    #check if dir_path exists 
    if os.path.isdir(dir_path): 
        automobile.append(dir_path[2:])
        for img in os.listdir(dir_path): 
            #all images must be png 
            if len(re.findall('.png',img.lower())) != 0: 
                img_path = os.path.join(dir_path,img) 
                images.append(img) 
                images_path.append(img_path) 
                img_pix = cv2.imread(img_path,1) 
                #all images must be 120x120 
                images_pixels.append(cv2.resize(img_pix, (120, 120)))
                labels.append(i) 
    i = i+1 

#check 
print("images: ✅" if len(images) == 5708 else f"images: ❌\nError: {len(images)} =/= 5708") 
print("image paths: ✅" if len(images_path) == 5708 else f"image paths: ❌\nError: {len(images_path)} =/= 5708") 
print("Automobiles: ✅" if len(automobile) == 4 else f"images: ❌\nError: {len(automobile)} =/= 4") 
print("image pixels: ✅" if len(images_pixels) == 5708 else f"image pixels: ❌\nError: {len(images_pixels)} =/= 5708") 
print("Shape of image: ✅" if images_pixels[0].shape == (120, 120, 3) else f"Shape of image: ❌\nError: {images_pixels[0].shape} =/= (120, 120, 3)") 

images: ✅
image paths: ✅
Automobiles: ✅
image pixels: ✅
Shape of image: ✅


In [33]:
import random 

shuffled = list(zip(images_pixels, labels))
random.shuffle(shuffled) 

train_data, labels_data = zip(*shuffled) 

In [34]:
from keras.utils import to_categorical
import numpy as np 

X_data = np.array(train_data)/255 
Y_data = to_categorical(labels_data, num_classes=4)

In [35]:
from sklearn.model_selection import train_test_split 

X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_data, test_size=0.2, random_state=1)

In [36]:
#cehck shape of X_train, X_test, Y_train, Y_test 
print("X_train shape: ", X_train.shape) 
print("Y_train shape: ", Y_train.shape) 
print("X_test shape: ", X_test.shape) 
print("Y_test shape: ", Y_test.shape)

X_train shape:  (4566, 120, 120, 3)
Y_train shape:  (4566, 4)
X_test shape:  (1142, 120, 120, 3)
Y_test shape:  (1142, 4)


CNN

In [37]:
from torch.utils.data import DataLoader
from torch.utils.data import Dataset 

class CustomDataset(Dataset): 
    def __init__(self, X, Y, transform=None): 
        self.X = X 
        self.Y = Y 
        self.transform = transform

    def __len__(self): 
        return len(self.X) 

    def __getitem__(self, idx): 
        x_sample = self.X[idx]
        y_sample = self.Y[idx]

        if self.transform: 
            x_sample = self.transform(x_sample) 
        
        return x_sample, y_sample 


In [38]:
from torchvision import transforms 

img_transform = transforms.Compose([
    transforms.ToTensor(), 
])

train_dataset = CustomDataset(X_train, Y_train, transform=img_transform)
test_dataset = CustomDataset(X_test, Y_test, transform=img_transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True) 
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)

image_check, labels_check = next(iter(train_loader))
print(f"Feature batch shape: {image_check.size()}")
print(f"Labels batch shape: {labels_check.size()}")

Feature batch shape: torch.Size([64, 3, 120, 120])
Labels batch shape: torch.Size([64, 4])


In [39]:
from torch import nn, optim 

class NeuralNetwork(nn.Module): 
    def __init__(self): 
        super().__init__() 
        #creating sequential to utilize for optimizer parameters
        self.cnn_stack = nn.Sequential( 
            # applies a 2D convolution over an input signal composed of several input planes, inchannels=3 since we're using RGB, outchannels=16 since we're using 16 filters 
            # kernel_size=3 since we're using 3x3 filter, padding=1 since we're using 3x3 filter
            # input ex: [64, 3, 100, 100]
            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
            # applies Batch Normalization over a 4D input (a mini-batch of 3D inputs with additional channel dimension), 
            # since we're directly going from convolutional layer the outchannel = inchannel 
            nn.BatchNorm2d(16), 
            # to introduce non-linearity helping neural networks learn a wide variety of phenomena (converts negative values to 0)
            nn.ReLU(inplace=True), 
            # applies a 2D max pooling over an input signal composed of several input planes to reduce the size of image (since it's 2 we went from 100 -> 100/2)
            nn.MaxPool2d(kernel_size=2, stride=2),
            # applies a ２D convolution over an input signal composed of several input planes, since there's no change in channels the input channel will be 16 and the output will be the same
            nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1),
            # again, converting negative values into 0 
            nn.ReLU(inplace=True), 
            # applies a 2D max pooling over an input signal composed of several input planes (50 -> 50/2) 
            nn.MaxPool2d(kernel_size=2, stride=2),
            # applies linear transformation on the input using its stored weights and biases <-- basically linear transformation for weight*height, label classes 
            # since we utilized 2 MaxPool2d we'll be getting (100/2)/2 and end up with 25*25 along with the channels it'll be 625*16 = 10000 with 9 classes it can map to 
            # input ex: [64, 16, 25, 25] 
            nn.Flatten(),
            nn.Linear(10000, 9) 
        )
    
    def forward(self, x) : 
        #x: [64, 3, 120, 120] 
        #print("x shape: ", x.shape) <-- [64, 3, 120, 120]
        logits = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)(x) 
        #print("logits shape: ", logits.shape) <-- [64, 16, 120, 120]
        logits = nn.BatchNorm2d(16)(logits) 
        #print("logits shape: ", logits.shape) <-- [64, 16, 120, 120]
        logits = nn.ReLU(inplace=True)(logits)
        #print("logits shape: ", logits.shape) <-- [64, 16, 120, 120]
        logits = nn.MaxPool2d(kernel_size=2, stride=2)(logits)
        #print("logits shape: ", logits.shape) <-- [64, 16, 60, 60]
        logits = nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1)(logits)
        #print("logits shape: ", logits.shape) <-- [64, 16, 60, 60]
        logits = nn.ReLU(inplace=True)(logits)
        #print("logits shape: ", logits.shape) <-- [64, 16, 60, 60]
        logits = nn.MaxPool2d(kernel_size=2, stride=2)(logits)
        #print("logits shape: ", logits.shape) <-- [64, 16, 30, 30]
        #testing if flatten helps -> it works 
        logits = nn.Flatten()(logits)
        #print("logits shape: ", logits.shape) <-- [64, 14400]
        logits = nn.Linear(14400, 4)(logits)
        return logits 
    
model = NeuralNetwork() #testing CrossEntropyLoss

loss_fn = nn.CrossEntropyLoss()

optimizer = optim.NAdam(model.parameters(), lr=0.001) 

model.train() 
for epoch in range(200): 
    for x_batch, y_batch in train_loader:
        optimizer.zero_grad() 
        x_batch = x_batch.float() 
        y_pred = model(x_batch)
        loss = loss_fn(y_pred, y_batch)
        loss.backward() 
        optimizer.step()

#test model 
model.eval()
i = 1
for x_test, y_test in test_loader: 
    x_test = x_test.float()
    y_pred = model(x_test) 
    acc = (y_pred.round() == y_test).float().mean()
    acc = float(acc) 
    print("Accuracy of CrossEntropyLoss batch ", i, ": %.2f%%" % (acc*100)) 
    i += 1

Accuracy of CrossEntropyLoss batch  1 : 73.44%
Accuracy of CrossEntropyLoss batch  2 : 65.62%
Accuracy of CrossEntropyLoss batch  3 : 66.02%
Accuracy of CrossEntropyLoss batch  4 : 75.39%
Accuracy of CrossEntropyLoss batch  5 : 72.27%
Accuracy of CrossEntropyLoss batch  6 : 75.00%
Accuracy of CrossEntropyLoss batch  7 : 74.22%
Accuracy of CrossEntropyLoss batch  8 : 75.00%
Accuracy of CrossEntropyLoss batch  9 : 74.22%
Accuracy of CrossEntropyLoss batch  10 : 68.36%
Accuracy of CrossEntropyLoss batch  11 : 64.84%
Accuracy of CrossEntropyLoss batch  12 : 74.61%
Accuracy of CrossEntropyLoss batch  13 : 73.44%
Accuracy of CrossEntropyLoss batch  14 : 52.34%
Accuracy of CrossEntropyLoss batch  15 : 73.83%
Accuracy of CrossEntropyLoss batch  16 : 75.00%
Accuracy of CrossEntropyLoss batch  17 : 60.16%
Accuracy of CrossEntropyLoss batch  18 : 64.81%


In [40]:
model = NeuralNetwork() #testing HingeEmbeddingLoss

loss_fn = nn.HingeEmbeddingLoss()

optimizer = optim.NAdam(model.parameters(), lr=0.001) 

model.train() 
for epoch in range(200): 
    for x_batch, y_batch in train_loader:
        optimizer.zero_grad() 
        x_batch = x_batch.float() 
        y_pred = model(x_batch)
        loss = loss_fn(y_pred, y_batch)
        loss.backward() 
        optimizer.step() 

#test model 
model.eval()
i = 1
for x_test, y_test in test_loader: 
    x_test = x_test.float()
    y_pred = model(x_test) 
    acc = (y_pred.round() == y_test).float().mean()
    acc = float(acc) 
    print("Accuracy of HingeEntropyLoss batch ", i, ": %.2f%%" % (acc*100)) 
    i += 1

Accuracy of HingeEntropyLoss batch  1 : 74.61%
Accuracy of HingeEntropyLoss batch  2 : 52.73%
Accuracy of HingeEntropyLoss batch  3 : 76.17%
Accuracy of HingeEntropyLoss batch  4 : 64.06%
Accuracy of HingeEntropyLoss batch  5 : 74.61%
Accuracy of HingeEntropyLoss batch  6 : 74.61%
Accuracy of HingeEntropyLoss batch  7 : 73.83%
Accuracy of HingeEntropyLoss batch  8 : 72.27%
Accuracy of HingeEntropyLoss batch  9 : 74.61%
Accuracy of HingeEntropyLoss batch  10 : 74.22%
Accuracy of HingeEntropyLoss batch  11 : 75.00%
Accuracy of HingeEntropyLoss batch  12 : 75.00%
Accuracy of HingeEntropyLoss batch  13 : 74.61%
Accuracy of HingeEntropyLoss batch  14 : 73.44%
Accuracy of HingeEntropyLoss batch  15 : 73.83%
Accuracy of HingeEntropyLoss batch  16 : 73.83%
Accuracy of HingeEntropyLoss batch  17 : 74.22%
Accuracy of HingeEntropyLoss batch  18 : 56.94%
