In [1]:
pip install opencv-python

Note: you may need to restart the kernel to use updated packages.


In [2]:
import torch
import torch.nn as nn
from torchvision.transforms import transforms
import numpy as np
from torch.autograd import Variable
from torchvision.models import squeezenet1_1
import torch.functional as f
from io import open 
import os
from PIL import Image
import pathlib
import glob
import cv2

In [3]:
pip install --upgrade numpy


Note: you may need to restart the kernel to use updated packages.


In [4]:
train_path = '/Users/gourabsaha/Desktop/scene_detection/seg_train/seg_train'
pred_path = '/Users/gourabsaha/Desktop/scene_detection/seg_pred/seg_pred'


In [5]:
#categories
root = pathlib.Path(train_path)
classes = sorted([j.name.split('/')[-1] for j in root.iterdir()])

In [6]:
#CNN Network 
class ConvNet(nn.Module):
    def __init__(self,num_classes=6):
        super(ConvNet,self).__init__()
        
        #output size after convolution filter 
        #((w-f+2p)/s) +1
        #Input Shape = (256,3,150,150)
        self.conv1=nn.Conv2d(in_channels=3,out_channels=12,kernel_size=3,stride=1,padding=1)
        #Shape=(256,12,150,150)
        self.bn1=nn.BatchNorm2d(num_features=12)
        #Shape=(256,12,150,150)
        self.relu1=nn.ReLU()
        #Shape=(256,12,150,150)
        
        self.pool=nn.MaxPool2d(kernel_size=2)
        #Reduce the image factor by 2 
        #Shape=(256,12,75,75)
        
        #adding another convolutional layer and this time we have imput channel as 12 and output channel as 20
        self.conv2=nn.Conv2d(in_channels=12,out_channels=20,kernel_size=3,stride=1,padding=1)
        #Shape=(256,20,75,75)
        self.relu2=nn.ReLU()
        #Shape=(256,20,75,75)
        
        self.conv3=nn.Conv2d(in_channels=20,out_channels=32,kernel_size=3,stride=1,padding=1)
        #Shape=(256,32,75,75)
        self.bn3=nn.BatchNorm2d(num_features=32)
        #Shape=(256,32,75,75)
        self.relu3=nn.ReLU()
        #Shape=(256,32,75,75)
        
        #This is a random convolution network to explain how we can add different layers to increase the accuracy of the network
        
        self.fc=nn.Linear(in_features=32*75*75,out_features=num_classes) 
        
        #Feed forwad function
    def forward (self, input):
        output=self.conv1(input)
        output=self.bn1(output)
        output=self.relu1(output)
            
        output=self.pool(output)
            
        output=self.conv2(output)
        output=self.relu2(output)
            
        output=self.conv3(output)
        output=self.bn3(output)
        output=self.relu3(output)
        
        #the above output will be in the matrix form with the shape (256,32,75,75)
            
        output = output.view(-1,32*75*75)
        output = self.fc(output)
            
        return output 

In [7]:
checkpoint=torch.load('best_checkpoint.model')
model=ConvNet(num_classes=6)
model.load_state_dict(checkpoint)
model.eval()

ConvNet(
  (conv1): Conv2d(3, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(12, 20, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2): ReLU()
  (conv3): Conv2d(20, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu3): ReLU()
  (fc): Linear(in_features=180000, out_features=6, bias=True)
)

In [8]:
#Transforms
transformer=transforms.Compose([
    transforms.Resize((150,150)),
    transforms.ToTensor(), #0-255 0-1, numpy to tensors
    transforms.Normalize([0.5,0.5,0.5], # 0-1 [-1,1] , formula (x-mean)/std
                        [0.5,0.5,0.5])
])

In [13]:
#prediction function 
def prediction(img_path,transformer):
    
    image = Image.open(img_path)
    
    image_tensor = transformer(image).float()
    
    #Since pytorch treats all images as batches we have to use an unsqueeze function
    
    image_tensor = image_tensor.unsqueeze_(0)
    
    if torch.cuda.is_available():
        image_tensor.cuda()
    input=Variable(image_tensor)
    
    output=model(input)
    
    index=output.data.numpy().argmax()
    
    pred=classes[index]
    
    return pred
    

In [14]:
#next step is save all the images of seg_pred and save it inside images_path
images_path=glob.glob(pred_path+'/*.jpg')

In [15]:
#here we are creating an empty dictionary and store the image name as key and the prediction as value 
pred_dict = {}
for i in images_path:
    pred_dict[i[i.rfind('/')+1:]]=prediction(i,transformer)

In [21]:
pred_dict

{'63.jpg': 'sea',
 '24084.jpg': 'mountain',
 '13628.jpg': 'glacier',
 '3578.jpg': 'forest',
 '10321.jpg': 'buildings',
 '10447.jpg': 'forest',
 '12250.jpg': 'glacier',
 '1409.jpg': 'forest',
 '23099.jpg': 'sea',
 '9041.jpg': 'mountain',
 '11995.jpg': 'buildings',
 '4203.jpg': 'buildings',
 '9727.jpg': 'buildings',
 '77.jpg': 'forest',
 '24090.jpg': 'mountain',
 '837.jpg': 'street',
 '3544.jpg': 'sea',
 '13614.jpg': 'glacier',
 '7044.jpg': 'mountain',
 '17300.jpg': 'glacier',
 '18033.jpg': 'buildings',
 '5647.jpg': 'mountain',
 '2128.jpg': 'buildings',
 '11771.jpg': 'glacier',
 '4559.jpg': 'forest',
 '7050.jpg': 'buildings',
 '12278.jpg': 'street',
 '1347.jpg': 'forest',
 '7736.jpg': 'buildings',
 '8405.jpg': 'forest',
 '3550.jpg': 'street',
 '11017.jpg': 'sea',
 '18966.jpg': 'mountain',
 '638.jpg': 'buildings',
 '7905.jpg': 'mountain',
 '12293.jpg': 'street',
 '11942.jpg': 'sea',
 '20547.jpg': 'forest',
 '10484.jpg': 'mountain',
 '13199.jpg': 'mountain',
 '9082.jpg': 'buildings',
 '223