In [1]:
import torch
import torch.nn as nn
from torchvision.transforms import transforms
import numpy as np
from torch.autograd import Variable
from torchvision.models import squeezenet1_1
import torch.functional as F
from io import open
import os
from PIL import Image
import pathlib
import glob
import cv2

In [2]:
train_path = '/home/sadam/Aletheia-AI/Aletheia-AI Developments/Pytorch_Training/intel_image_dataset_training/scene_detection/seg_train/seg_train'
pred_path = '/home/sadam/Aletheia-AI/Aletheia-AI Developments/Pytorch_Training/intel_image_dataset_training/scene_detection/seg_pred/seg_pred'

In [3]:
# categories
root = pathlib.Path(train_path)
classes = sorted([j.name.split('/')[-1] for j in root.iterdir()])

# categoris
root = pathlib.Path(train_path)
classes = sorted([j.name.split('/')[-1] for j in root.iterdir()])

In [4]:
# CNN Network

class ConvNet(nn.Module):
    def __init__(self, num_classes=6):
        super(ConvNet, self).__init__()
        # Output size after convolution filter
        # ((w-f+2P)/s)+1
        # Input shape = (256, 3, 150, 150)
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)
        # Shape = (256, 12, 150, 150)
        self.bn1 = nn.BatchNorm2d(num_features=12)
        # Shape = (256, 12, 150, 150)
        
        self.relu1 = nn.ReLU()
        # Shape = (256, 12, 150, 150)
        
        self.pool = nn.MaxPool2d(kernel_size=2)
        # Reduce the image size be factor 2
        # Shape = (256, 12, 75, 75)
        
        
        self.conv2 = nn.Conv2d(in_channels=12, out_channels=20, kernel_size=3, stride=1, padding=1)
        # Shape = (256, 20, 75, 75)        
        self.relu2 = nn.ReLU()
        # Shape = (256, 20, 75, 75)
        
        
        self.conv3 = nn.Conv2d(in_channels=20, out_channels=32, kernel_size=3, stride=1, padding=1)
        # Shape = (256, 32, 75, 75)
        self.bn3 = nn.BatchNorm2d(num_features=32)
        # Shape = (256, 32, 75, 75)
        self.relu3 = nn.ReLU()
        # Shape = (256, 32, 75, 75)
        
        self.fc = nn.Linear(in_features=75 * 75 * 32, out_features=num_classes)
        
        
    # Feed Forward function
    def forward(self, input):
        output = self.conv1(input)
        output = self.bn1(output)
        output = self.relu1(output)
        
        output = self.pool(output)
        
        output = self.conv2(output)
        output = self.relu2(output)
        
        output = self.conv3(output)
        output = self.bn3(output)
        output = self.relu3(output)
        
        # Above output will be in mstrix form, with shape (256, 32, 75, 75)
        
        output = output.view(-1, 32 * 75 * 75)
        
        output = self.fc(output)
        return output
        
        

In [5]:
checkpoint = torch.load('best_checkpoint.model')
model = ConvNet(num_classes=6)
model.load_state_dict(checkpoint)
model.eval()

ConvNet(
  (conv1): Conv2d(3, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(12, 20, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2): ReLU()
  (conv3): Conv2d(20, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu3): ReLU()
  (fc): Linear(in_features=180000, out_features=6, bias=True)
)

In [6]:
#Transforms
transformer=transforms.Compose([
    transforms.Resize((150,150)),
    transforms.ToTensor(),  #0-255 to 0-1, numpy to tensors
    transforms.Normalize([0.5,0.5,0.5], # 0-1 to [-1,1] , formula (x-mean)/std
                        [0.5,0.5,0.5])
])

In [10]:
# prediction function
def prediction(img_path, transformer):

    image = Image.open(img_path)

    image_tensor = transformer(image).float()
    image_tensor = image_tensor[None, :]

    if torch.cuda.is_available():
        image_tensor.cuda()
    
    input = Variable(image_tensor)

    output = model(input)
    index = output.data.numpy().argmax()
    pred = classes[index]

    return pred

In [11]:
images_path = glob.glob(pred_path + '/*.jpg')

In [12]:
pred_dict = {}

for i in images_path:
    pred_dict[i[i.rfind('/')+1:]]=prediction(i, transformer)

In [13]:
pred_dict

{'563.jpg': 'mountain',
 '13918.jpg': 'forest',
 '3714.jpg': 'glacier',
 '15832.jpg': 'mountain',
 '13342.jpg': 'street',
 '13447.jpg': 'sea',
 '8157.jpg': 'glacier',
 '4045.jpg': 'forest',
 '6701.jpg': 'forest',
 '14496.jpg': 'sea',
 '19902.jpg': 'street',
 '7739.jpg': 'glacier',
 '3906.jpg': 'mountain',
 '7283.jpg': 'street',
 '4011.jpg': 'glacier',
 '14650.jpg': 'buildings',
 '9845.jpg': 'street',
 '20919.jpg': 'sea',
 '21005.jpg': 'glacier',
 '16822.jpg': 'mountain',
 '12201.jpg': 'mountain',
 '7104.jpg': 'buildings',
 '7577.jpg': 'buildings',
 '3673.jpg': 'buildings',
 '18624.jpg': 'forest',
 '3171.jpg': 'street',
 '20248.jpg': 'buildings',
 '15177.jpg': 'mountain',
 '11849.jpg': 'buildings',
 '9039.jpg': 'street',
 '21282.jpg': 'glacier',
 '14405.jpg': 'street',
 '6774.jpg': 'mountain',
 '13585.jpg': 'sea',
 '22781.jpg': 'street',
 '5179.jpg': 'street',
 '22497.jpg': 'forest',
 '6010.jpg': 'mountain',
 '21122.jpg': 'sea',
 '21112.jpg': 'buildings',
 '948.jpg': 'glacier',
 '18970.