In [1]:
#Load libraries
import os
import numpy as np
import torch
import glob
import torch.nn as nn
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable
import torchvision
import pathlib

In [2]:
#checking for device
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
device

device(type='cpu')

In [4]:
# data preprocessing..just to check we are usig images all of same size
#Transforms...
transformer=transforms.Compose([
    transforms.Resize((150,150)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),  #0-255 to 0-1, numpy to tensors
    transforms.Normalize([0.5,0.5,0.5], # 0-1 to [-1,1] , formula (x-mean)/std
                        [0.5,0.5,0.5])
])

In [5]:
## in pytorch we feed data in the foerm of data loader...to feed it in the batches
## we can also make batches for test images
#Dataloader

#Path for training and testing directory
train_path=r'C:\Users\nitin\scene\seg_train\seg_train'
test_path=r'C:\Users\nitin\scene\seg_test\seg_test'

train_loader=DataLoader(
    torchvision.datasets.ImageFolder(train_path,transform=transformer),
    batch_size=64, shuffle=True
)
test_loader=DataLoader(
    torchvision.datasets.ImageFolder(test_path,transform=transformer),
    batch_size=32, shuffle=True
)

In [6]:
#categories.. to print classes
root=pathlib.Path(train_path)
classes=sorted([j.name.split('/')[-1] for j in root.iterdir()])

In [7]:
print(classes)

['buildings', 'forest', 'glacier', 'mountain', 'sea', 'street']


In [8]:
#CNN Network....


class ConvNet(nn.Module):
    def __init__(self,num_classes=6):
        super(ConvNet,self).__init__()
        
        #Output size after convolution filter
        #((w-f+2P)/s) +1
        
        #Input shape= (256,3,150,150)..256 batch size...3..is RGB
        
        self.conv1=nn.Conv2d(in_channels=3,out_channels=12,kernel_size=3,stride=1,padding=1)
        #Shape= (256,12,150,150)...again output 150....down is batc norm
        self.bn1=nn.BatchNorm2d(num_features=12)
        #Shape= (256,12,150,150)
        self.relu1=nn.ReLU()    # relu added for non linearity
        #Shape= (256,12,150,150)
        
        self.pool=nn.MaxPool2d(kernel_size=2)
        #Reduce the image size be factor 2
        #Shape= (256,12,75,75)
        
        
        self.conv2=nn.Conv2d(in_channels=12,out_channels=20,kernel_size=3,stride=1,padding=1)
        #Shape= (256,20,75,75)
        self.relu2=nn.ReLU()
        #Shape= (256,20,75,75)
        
        
        
        self.conv3=nn.Conv2d(in_channels=20,out_channels=32,kernel_size=3,stride=1,padding=1)
        #Shape= (256,32,75,75)
        self.bn3=nn.BatchNorm2d(num_features=32)
        #Shape= (256,32,75,75)
        self.relu3=nn.ReLU()
        #Shape= (256,32,75,75)
        
        
        self.fc=nn.Linear(in_features=75 * 75 * 32,out_features=num_classes)
        
        
        
        #Feed forwad function
        
    def forward(self,input):
        output=self.conv1(input)
        output=self.bn1(output)
        output=self.relu1(output)
            
        output=self.pool(output)
            
        output=self.conv2(output)
        output=self.relu2(output)
            
        output=self.conv3(output)
        output=self.bn3(output)
        output=self.relu3(output)
            
            
            #Above output will be in matrix form, with shape (256,32,75,75)
            
        output=output.view(-1,32*75*75)  # to feed the matrix to nn.linear change it size to 32*75*75
            
            
        output=self.fc(output)
            
        return output
            
        

In [9]:
model=ConvNet(num_classes=6).to(device)

In [10]:
#Optmizer and loss function
optimizer=Adam(model.parameters(),lr=0.001,weight_decay=0.0001)
loss_function=nn.CrossEntropyLoss()

In [11]:
num_epochs=2

In [12]:
#calculating the size of training and testing images
train_count=len(glob.glob(train_path+'/**/*.jpg'))
test_count=len(glob.glob(test_path+'/**/*.jpg'))

In [13]:
print(train_count,test_count)

2253 1405


In [14]:
#Model training and saving best model

best_accuracy=0.0

for epoch in range(num_epochs):
    
    #Evaluation and training on training dataset
    model.train()   ## set to trainig mode..like dropout is actiated..normalization is activated
    train_accuracy=0.0
    train_loss=0.0
    
    for i, (images,labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        optimizer.zero_grad()   # we dont want gradient to mix up between zero batches
        
        outputs=model(images)   ## it will give predection 
        loss=loss_function(outputs,labels)
        loss.backward()
        optimizer.step()
        
        
        train_loss+= loss.cpu().data*images.size(0)
        _,prediction=torch.max(outputs.data,1)
        
        train_accuracy+=int(torch.sum(prediction==labels.data))
        
    train_accuracy=train_accuracy/train_count
    train_loss=train_loss/train_count
    
    
    # Evaluation on testing dataset
    model.eval()
    
    test_accuracy=0.0
    for i, (images,labels) in enumerate(test_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        outputs=model(images)
        _,prediction=torch.max(outputs.data,1)
        test_accuracy+=int(torch.sum(prediction==labels.data))
    
    test_accuracy=test_accuracy/test_count
    
    
    print('Epoch: '+str(epoch)+' Train Loss: '+str(train_loss)+' Train Accuracy: '+str(train_accuracy)+' Test Accuracy: '+str(test_accuracy))
    
    #Save the best model
    if test_accuracy>best_accuracy:
        torch.save(model.state_dict(),'best_checkpoint.model')
        best_accuracy=test_accuracy
    
       

Epoch: 0 Train Loss: tensor(12.7970) Train Accuracy: 0.465601420328451 Test Accuracy: 0.31743772241992885
Epoch: 1 Train Loss: tensor(4.1796) Train Accuracy: 0.6054150022192633 Test Accuracy: 0.6206405693950178


In [16]:
## after training we have to load the saved model...


In [17]:
import torch
import torch.nn as nn
from torchvision.transforms import transforms
import numpy as np
from torch.autograd import Variable
from torchvision.models import squeezenet1_1
import torch.functional as F
from io import open
import os
from PIL import Image
import pathlib
import glob
import cv2

In [19]:
train_path=r'C:\Users\nitin\scene\seg_train\seg_train'
pred_path=r'C:\Users\nitin\scene\seg_pred\seg_pred'

In [20]:
checkpoint=torch.load('best_checkpoint.model')
model=ConvNet(num_classes=6)
model.load_state_dict(checkpoint)
model.eval()  ## model eveluation wil set pytorch to evaluation mode

ConvNet(
  (conv1): Conv2d(3, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(12, 20, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2): ReLU()
  (conv3): Conv2d(20, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu3): ReLU()
  (fc): Linear(in_features=180000, out_features=6, bias=True)
)

In [21]:
#Transforms..of prdection image
transformer=transforms.Compose([
    transforms.Resize((150,150)),
    transforms.ToTensor(),  #0-255 to 0-1, numpy to tensors
    transforms.Normalize([0.5,0.5,0.5], # 0-1 to [-1,1] , formula (x-mean)/std
                        [0.5,0.5,0.5])
])

In [22]:
## for image opening..PIL package is used
#prediction function
def prediction(img_path,transformer):
    
    image=Image.open(img_path)    ## using pillow library
    
    image_tensor=transformer(image).float()
    
    
    image_tensor=image_tensor.unsqueeze_(0)   ## to maintain batch size for predection
    
    if torch.cuda.is_available():
        image_tensor.cuda()
        
    input=Variable(image_tensor)     ## tensor converted to vaiable
    
    
    output=model(input)
    
    index=output.data.numpy().argmax()
    
    pred=classes[index]
    
    return pred

In [24]:
## path for all predected images
images_path=glob.glob(pred_path+'/*.jpg')

In [25]:
images_path

['C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10004.jpg',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10005.jpg',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10012.jpg',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10013.jpg',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10017.jpg',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10021.jpg',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\1003.jpg',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10034.jpg',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10038.jpg',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10040.jpg',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10043.jpg',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10045.jpg',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10047.jpg',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10048.jpg',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10052.jpg',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10054.jpg',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10059.jpg'

In [26]:
pred_dict={}

for i in images_path:
    pred_dict[i[i.rfind('/')+1:]]=prediction(i,transformer)

In [27]:
pred_dict

{'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10004.jpg': 'street',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10005.jpg': 'mountain',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10012.jpg': 'street',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10013.jpg': 'mountain',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10017.jpg': 'glacier',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10021.jpg': 'forest',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\1003.jpg': 'sea',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10034.jpg': 'glacier',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10038.jpg': 'mountain',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10040.jpg': 'street',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10043.jpg': 'sea',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10045.jpg': 'buildings',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10047.jpg': 'glacier',
 'C:\\Users\\nitin\\scene\\seg_pred\\seg_pred\\10048.jpg': 'buildings',
 'C:\\Users\\nitin\\scene\