In [None]:
!wget https://github.com/Harikrishnan6336/ShipORTruck-ImageClassifier/raw/main/ML_model/dataset.zip

In [None]:
!unzip dataset.zip

In [None]:
from PIL import Image
import torch
import torchvision
from torchvision.transforms import ToTensor
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt

import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import os
import pickle

In [None]:
class Dataset:
    def __init__(self, dir_name):
        self.dir_name = dir_name
        self.labels,self.images = self.load_data()
        
    # To load images and labels for dataloader
    def load_data(self):
        labels={}
        images = {}
        # Composes Resize transform, Here resize transforms the input image to size (256, 256)
        resize = transforms.Compose([transforms.Resize((256,256))])
        main_dir = os.listdir(os.path.join("dataset",self.dir_name))
        count = 0
        # traversing through the categories/directories(here SHIP and TRUCK) in the main directory
        for i,dir in enumerate(main_dir):
            images_list = os.listdir(os.path.join("dataset",self.dir_name,dir))
            # traversing through the images in each directory/category
            local_cnt = 0
            for img in images_list: 
                if(local_cnt<3960):
                  labels[count] = i
                  img_path = os.path.join("dataset",self.dir_name,dir,img)
                  image = Image.open(img_path)
                  image = ToTensor()(image)
                  images[count] = resize(image)
                  count += 1
                  local_cnt += 1
                else:             
                  break
            print(count)
        return labels,images
      
    def __len__(self):
        return len(self.labels)
    
    # To return x,y values in each iteration over dataloader as batches.
    def __getitem__(self, idx):
        return (
            self.images[idx],
            self.labels[idx],
        )

In [None]:
dataset = Dataset("train")

In [None]:
validdataset = Dataset("valid")

In [None]:
# Model Architecture
class Network(nn.Module):
    def __init__(self):
        super(Network,self).__init__()

        # Convolutional layers
        self.conv1= nn.Conv2d(in_channels=3,out_channels=6,kernel_size=5)
        self.conv2= nn.Conv2d(in_channels=6,out_channels=12,kernel_size=5)
        self.conv3= nn.Conv2d(in_channels=12,out_channels=24,kernel_size=5)
        self.conv4= nn.Conv2d(in_channels=24,out_channels=48,kernel_size=5)
        
        # Fully Connected layers
        self.fc1 = nn.Linear(in_features=48*12*12,out_features=240)
        self.fc2 = nn.Linear(in_features=240,out_features=120)
        self.out = nn.Linear(in_features=120,out_features=2)
        
        
    def forward(self,t):
        t = t
        
        t=self.conv1(t)
        t=F.relu(t)
        t=F.max_pool2d(t,kernel_size = 2, stride = 2)
        
        
        t=self.conv2(t)
        t=F.relu(t)
        t=F.max_pool2d(t,kernel_size = 2, stride = 2)

        t=self.conv3(t)
        t=F.relu(t)
        t=F.max_pool2d(t,kernel_size = 2, stride = 2)

        t=self.conv4(t)
        t=F.relu(t)
        t=F.max_pool2d(t,kernel_size = 2, stride = 2)
        
        t=t.reshape(-1,48*12*12)
        t=self.fc1(t)
        t=F.relu(t)
        t=self.fc2(t)
        t=F.relu(t)
        
        t=self.out(t)
        
        return t

In [None]:
model = Network()

In [None]:
def train(dataset,validdataset, model):
    model.train()

    # dataloader in pytorch to load validation and train dataset
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=64,shuffle=True)
    valdataloader = torch.utils.data.DataLoader(validdataset, batch_size=32,shuffle=True)

    # Defining the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    num_of_epochs = 20
    epochs = []
    losses = []
    for epoch in range(num_of_epochs):
        cnt = 0
        tot_loss = 0
        for batch, (x, y) in enumerate(dataloader):
            # Sets the gradients of all optimized tensors to zero
            optimizer.zero_grad()
            y_pred = model(x)
            # Compute loss (here CrossEntropyLoss)
            loss = F.cross_entropy(y_pred,y)

            loss.backward()
            optimizer.step()

        for batch, (x, y) in enumerate(valdataloader):
            # Sets the gradients of all optimized tensors to zero
            optimizer.zero_grad()
            with torch.no_grad():
                y_pred = model(x)
                # Compute loss (here CrossEntropyLoss)
                loss = F.cross_entropy(y_pred,y)

            tot_loss+=loss.item()
           
        epochs.append(epoch)
        losses.append(tot_loss)
        print("Epoch",epoch,"loss:",tot_loss)
        # Save model after each epoch, so that we can choose the best model, later
        torch.save(model.state_dict(), "model_ep"+str(epoch+1)+".pth")

    # Plot a Validation Loss vs Epochs graph 
    plt.plot(epochs, losses, color='green', linewidth = 3, 
         marker='o', markerfacecolor='blue', markersize=8) 
    plt.xlabel('epochs ---->',color='m',fontsize='xx-large' ) 
    plt.ylabel('loss ------>',color='m',fontsize='xx-large') 
    axes = plt.gca()        # 'gca' - get current axes
    axes.set_facecolor('c') #'c' - cyan (color name)
    axes.tick_params(axis='y', which='both', colors='tomato')
    axes.tick_params(axis='x', which='both', colors='#20ff14')
    plt.title("Val Loss vs Epoch",color='m',fontsize='xx-large')

In [None]:
train(dataset,validdataset, model)

In [None]:
# Saving labels to label value as a json
main_dir = os.listdir(os.path.join("dataset","train"))
reference = {}
for i,dir in enumerate(main_dir):
    reference[dir]=i
print(reference)
with open('labels.json', 'wb') as iw:
    pickle.dump(reference, iw)

In [None]:
#Save the trained model
torch.save(model.state_dict(), "model--.pth")

In [None]:
# prediction function to test
def predict(img_path):
    image = Image.open(img_path)
    image = ToTensor()(image)
    resize = transforms.Compose([transforms.Resize((256,256))])
    y_result = model(resize(image).unsqueeze(0))
    result_idx = y_result.argmax(dim=1)
    for key,value in reference.items():
        if(value==result_idx):
            print(value)
            break

In [None]:
predict("<IMG_PATH_HERE>")