In [2]:
import torch
import torch.nn as nn
from torchvision.transforms import transforms
import numpy as np
from torch.autograd import Variable
from torchvision.models import squeezenet1_1
import torch.functional as F
from io import open
import os
from PIL import Image
import pathlib
import glob
import cv2

In [4]:
train_path = '/home/sadam/Aletheia-AI/Aletheia-AI Developments/Pytorch_Training/intel_image_dataset_training/scene_detection/seg_train/seg_train'
pred_path = '/home/sadam/Aletheia-AI/Aletheia-AI Developments/Pytorch_Training/intel_image_dataset_training/scene_detection/seg_pred/seg_pred'

In [5]:
# categories
root = pathlib.Path(train_path)
classes = sorted([j.name.split('/')[-1] for j in root.iterdir()])

In [6]:
# CNN Network

class ConvNet(nn.Module):
    def __init__(self, num_classes=6):
        super(ConvNet, self).__init__()
        # Output size after convolution filter
        # ((w-f+2P)/s)+1
        # Input shape = (256, 3, 150, 150)
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)
        # Shape = (256, 12, 150, 150)
        self.bn1 = nn.BatchNorm2d(num_features=12)
        # Shape = (256, 12, 150, 150)
        
        self.relu1 = nn.ReLU()
        # Shape = (256, 12, 150, 150)
        
        self.pool = nn.MaxPool2d(kernel_size=2)
        # Reduce the image size be factor 2
        # Shape = (256, 12, 75, 75)
        
        
        self.conv2 = nn.Conv2d(in_channels=12, out_channels=20, kernel_size=3, stride=1, padding=1)
        # Shape = (256, 20, 75, 75)        
        self.relu2 = nn.ReLU()
        # Shape = (256, 20, 75, 75)
        
        
        self.conv3 = nn.Conv2d(in_channels=20, out_channels=32, kernel_size=3, stride=1, padding=1)
        # Shape = (256, 32, 75, 75)
        self.bn3 = nn.BatchNorm2d(num_features=32)
        # Shape = (256, 32, 75, 75)
        self.relu3 = nn.ReLU()
        # Shape = (256, 32, 75, 75)
        
        self.fc = nn.Linear(in_features=75 * 75 * 32, out_features=num_classes)
        
        
    # Feed Forward function
    def forward(self, input):
        output = self.conv1(input)
        output = self.bn1(output)
        output = self.relu1(output)
        
        output = self.pool(output)
        
        output = self.conv2(output)
        output = self.relu2(output)
        
        output = self.conv3(output)
        output = self.bn3(output)
        output = self.relu3(output)
        
        # Above output will be in mstrix form, with shape (256, 32, 75, 75)
        
        output = output.view(-1, 32 * 75 * 75)
        
        output = self.fc(output)
        return output
        
        

In [None]:
checkpoint = torch.load('best_checkpoint.model')
model = ConvNet(num_classes=6)
model.load_state_dict(checkpoint)
model.eval()

In [None]:
#Transforms
transformer=transforms.Compose([
    transforms.Resize((150,150)),
    transforms.ToTensor(),  #0-255 to 0-1, numpy to tensors
    transforms.Normalize([0.5,0.5,0.5], # 0-1 to [-1,1] , formula (x-mean)/std
                        [0.5,0.5,0.5])
])

In [None]:
# prediction function
def prediction(img_path, transformer):

    image = Image.open(img_path)

    image_tensor = transformer(image).float()

    if torch.cuda.is_available():
        image_tensor.cuda()
    
    input = Variable(image_tensor)

    output = model(input)
    index = output.data.numpy().argmax()
    pred = classes[index]

    return pred


In [None]:
images_path = glob.glob(pred_path + '/*.jpg')

In [None]:
pred_dict = {}

for i in images_path:
    pred_dict[i[i.rfind('/')+1:]]=prediction(i, transformer)

In [None]:
pred_dict