In [1]:
import os
import numpy as np
import glob
import torch
import torchvision
import torchvision.transforms as transform
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable
import pathlib


In [2]:
torch.cuda.is_available()

True

In [3]:
transformer=transform.Compose([
    transform.Resize((256,256)),
    transform.RandomHorizontalFlip(),
    transform.ToTensor(),
    transform.Normalize([0.5,0.5,0.5],
                       [0.5,0.5,0.5])
    
])

In [4]:
train_path='seg_train/seg_train'
test_path='seg_test/seg_test'

trainloader=DataLoader(
    torchvision.datasets.ImageFolder(train_path,transform=transformer),
    batch_size=64,shuffle=True
)
testloader=DataLoader(
    torchvision.datasets.ImageFolder(test_path,transform=transformer),
    batch_size=64,shuffle=True
)

In [5]:
root=pathlib.Path(train_path)
classes=sorted([j.name.split('/')[-1] for j in root.iterdir()])
classes

['buildings', 'forest', 'glacier', 'mountain', 'sea', 'street']

In [6]:
dataiter = iter(trainloader)
images, labels = next(dataiter)
print(type(images))
print(images.shape)
print(labels.shape)

<class 'torch.Tensor'>
torch.Size([64, 3, 256, 256])
torch.Size([64])


In [7]:
class ConvNet(nn.Module):
    def __init__(self,num_classes=6):
        super(ConvNet,self).__init__()
        #((w-f+2p)/s)+1
    
        self.conv1=nn.Conv2d(in_channels=3,out_channels=20,kernel_size=3,stride=1,padding=1)
        self.bn1=nn.BatchNorm2d(num_features=20)
        self.relu1=nn.ReLU()
        #shape=(32,12,256,256)
        self.pool=nn.MaxPool2d(kernel_size=2)
        #shape=(32,12,128,128)
        
        
        self.conv2=nn.Conv2d(in_channels=20,out_channels=8,kernel_size=3,stride=1,padding=1)
        self.relu2=nn.ReLU()
        #shape=(32,12,125,125)
        
        self.conv3=nn.Conv2d(in_channels=8,out_channels=12,kernel_size=3,stride=1,padding=1)
        self.relu3=nn.ReLU()
        #shape=(32,12,123,123)
        self.pool3=nn.MaxPool2d(kernel_size=2)
        
        
        self.conv4=nn.Conv2d(in_channels=12,out_channels=32,kernel_size=3,stride=1,padding=1)
        self.bn4=nn.BatchNorm2d(num_features=32)
        self.relu4=nn.ReLU()
        
        #shape=(32,12,121,121)
        
        self.fc=nn.Linear(in_features=32*64*64,out_features=num_classes)
        
        
    def forward(self,input):
        output=self.conv1(input)
        output=self.bn1(output)
        output=self.relu1(output)
        
        output=self.pool(output)
        
        output=self.conv2(output)
        output=self.relu2(output)
        
        output=self.conv3(output)
        output=self.relu3(output)
        output=self.pool3(output)
        
        output=self.conv4(output)
        output=self.bn4(output)
        output=self.relu4(output)
        
        
        output=output.view(-1,32*64*64)
        
        output=self.fc(output)
        
        return output
        
        
        
        
        
        
        
        

In [8]:
# class ConvNet(nn.Module):
#     def __init__(self,num_classes=6):
#         super(ConvNet,self).__init__()
        
#         #Output size after convolution filter
#         #((w-f+2P)/s) +1
        
#         #Input shape= (256,3,150,150)
        
#         self.conv1=nn.Conv2d(in_channels=3,out_channels=12,kernel_size=3,stride=1,padding=1)
#         #Shape= (256,12,150,150)
#         self.bn1=nn.BatchNorm2d(num_features=12)
#         #Shape= (256,12,150,150)
#         self.relu1=nn.ReLU()
#         #Shape= (256,12,150,150)
        
#         self.pool=nn.MaxPool2d(kernel_size=2)
#         #Reduce the image size be factor 2
#         #Shape= (256,12,75,75)
        
        
#         self.conv2=nn.Conv2d(in_channels=12,out_channels=20,kernel_size=3,stride=1,padding=1)
#         #Shape= (256,20,75,75)
#         self.relu2=nn.ReLU()
#         #Shape= (256,20,75,75)
        
        
        
#         self.conv3=nn.Conv2d(in_channels=20,out_channels=32,kernel_size=3,stride=1,padding=1)
#         #Shape= (256,32,75,75)
#         self.bn3=nn.BatchNorm2d(num_features=32)
#         #Shape= (256,32,75,75)
#         self.relu3=nn.ReLU()
#         #Shape= (256,32,75,75)
        
        
#         self.fc=nn.Linear(in_features=75 * 75 * 32,out_features=num_classes)
        
        
        
#         #Feed forwad function
        
#     def forward(self,input):
#         output=self.conv1(input)
#         output=self.bn1(output)
#         output=self.relu1(output)
            
#         output=self.pool(output)
            
#         output=self.conv2(output)
#         output=self.relu2(output)
            
#         output=self.conv3(output)
#         output=self.bn3(output)
#         output=self.relu3(output)
            
            
#             #Above output will be in matrix form, with shape (256,32,75,75)
            
#         output=output.view(-1,32*75*75)
            
            
#         output=self.fc(output)
            
#         return output

In [9]:
model=ConvNet(num_classes=6).to('cuda')

In [10]:
optimiser=Adam(model.parameters(),lr=0.001,weight_decay=0.0001)
loss_function=nn.CrossEntropyLoss()

In [11]:
num_epochs=250

In [12]:
train_count=len(glob.glob(train_path+'/**/*.jpg'))
test_count=len(glob.glob(test_path+'/**/*.jpg'))

In [13]:
print(train_count,test_count)

14034 3000


In [14]:
best_accuracy=0.0

for epoch in range (num_epochs):
    model.train()
    train_accuracy=0.0
    train_loss=0.0
    
    for i ,(images,labels) in enumerate(trainloader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        optimiser.zero_grad()
        outputs=model(images)
        loss=loss_function(outputs, labels)
        loss.backward()
        optimiser.step()
        
        train_loss+=loss.cpu().data*images.size(0)
        _, prediction=torch.max(outputs.data,1)
        
        train_accuracy+=int(torch.sum(prediction==labels.data))
    train_accuracy=train_accuracy/train_count
    train_loss=train_loss/train_count
    
    
    model.eval()
    
    
    for i ,(images,labels) in enumerate(testloader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
            
        outputs=model(images)
        _, prediction=torch.max(outputs.data,1)
        test_accuracy+=int(torch.sum(prediction==labels.data))
        
    test_accuracy=test_accuracy/test_count
    
    if epoch%10==0:
        print('Epoch: '+str(epoch)+' Train loss: '+str(train_loss)+' Train accuracy '+str(train_accuracy)+' Test accuracy '+str(test_accuracy))
    
    if test_accuracy>best_accuracy:
        torch.save(model.state_dict(),'best_checkpoint.model')
        best_accuracy=test_accuracy



KeyboardInterrupt: 

In [None]:
print(torch.cuda.memory_allocated())
#del images,labels,model
torch.cuda.empty_cache()
print(torch.cuda.memory_allocated())
torch.cuda.empty_cache()

In [None]:
checkpoint=torch.load('best_checkpoint.model')
model=ConvNet(num_classes=6)
model.load_state_dict(checkpoint)
model.eval()

In [None]:
transformer=transform.Compose([
    transform.Resize((256,256)),
    transform.ToTensor(),  #0-255 to 0-1, numpy to tensors
    transform.Normalize([0.5,0.5,0.5], # 0-1 to [-1,1] , formula (x-mean)/std
                        [0.5,0.5,0.5])
])

In [None]:
from PIL import Image
def prediction(img_path,transformer):
    
    image=Image.open(img_path)
    
    image_tensor=transformer(image).float()
    
    
    image_tensor=image_tensor.unsqueeze_(0)
    
    if torch.cuda.is_available():
        image_tensor.cuda()
        
    input=Variable(image_tensor)
    
    
    output=model(input)
    
    index=output.data.numpy().argmax()
    
    pred=classes[index]
    
    return pred

In [None]:
pred_path='seg_pred/seg_pred'
images_path=glob.glob(pred_path+'/*.jpg')


In [None]:
pred_dict={}

for i in images_path:
    pred_dict[i[i.rfind('/')+1:]]=prediction(i,transformer)

In [None]:
pred_dict

In [None]:
Image.open(r"seg_pred/seg_pred/10631.jpg")

In [None]:
best_accuracy