In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
%matplotlib inline

import os


import torch 
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torchvision
from torchvision import transforms 

# Image processing lib
import albumentations as album
import cv2
from PIL import Image

from tqdm.notebook import tqdm

import warnings
warnings.filterwarnings('ignore')

!pip install -q segmentation-models-pytorch
import segmentation_models_pytorch as smp

from sklearn.model_selection import train_test_split

In [3]:
import torch.nn.functional as F

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') #device on which data will be trained

In [5]:
images_path = '../input/offroad-terrain-attention-region-images/TrainingImages/TrainingImages/OriginalImages'
mask_path_1 = '../input/offroad-terrain-attention-region-images/TrainingImages/TrainingImages/EnumMasks/png_0_1'
mask_path_2 = '../input/offroad-terrain-attention-region-images/TrainingImages/TrainingImages/EnumMasks/png_0_255'

In [6]:
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
def dataset_ser(path):
    images_list = []
    for dirname, _, filenames in os.walk(path):
        for filename in filenames:
            images_list.append(filename)
    images_list.sort()
    return pd.Series(images_list)

image_df = dataset_ser(images_path) 
mask_df_1  = dataset_ser(mask_path_1)
mask_df_2 = dataset_ser(mask_path_2)

In [7]:
#sample image with mask applied

img = Image.open(os.path.join(images_path, image_df[0]))
msk = Image.open(os.path.join(mask_path_1, mask_df_1[0]))

plt.imshow(img)
plt.imshow(msk, alpha = 0.3)

In [8]:
print('Number of images : ', len(image_df))
print('Number of masks_1 : ', len(mask_df_1))
print('Number of masks_2 : ', len(mask_df_2))
print('size of image : ', np.array(Image.open(os.path.join(images_path, image_df[0]))).shape)
print('size of masks : ', np.array(Image.open(os.path.join(mask_path_1, mask_df_1[0]))).shape)

In [9]:
def visualize_masks(**images):
    """
    To visulize multiple images simultaneously
    
    input : images is **kwargs, just like dict
    More abour kwargs : https://realpython.com/python-kwargs-and-args/
    
    
    """
    plt.figure(figsize = (30,10)) #setting image size
    for id, (name , image) in enumerate(images.items()):
        
        plt.subplot(1, len(images), id+1) # creating subplots
        plt.title(name) # naming subplots
        plt.imshow(image)

In [10]:
def img_to_nparray(img):
    
    """
    covert image read by PIL.Image to numpy array
    
    """
    
    return np.array(img)

def read_image(i):
    
    """
    input : i , int, index of image or mask  to visualize
    
    """
    
    img = img_to_nparray(Image.open(os.path.join(images_path, image_df[i])))
    mask_1 = img_to_nparray(Image.open(os.path.join(mask_path_1, mask_df_1[i])))
    mask_2 = img_to_nparray(Image.open(os.path.join(mask_path_2, mask_df_2[i])))
    
    
    return img, mask_1, mask_2
    


In [11]:
img, mask_1, mask_2 = read_image(6)

visualize_masks(Image = img , 
               Mask_1 = mask_1,
               Mask_2 = mask_2)

There is  visually no difference. Only difference is values of pixels i.e. 1 or 255. So we will 
continue only with mask_1

In [12]:
dummy_images, test_images, dummy_masks,test_masks  = train_test_split(image_df, mask_df_1 , test_size = 0.2 , shuffle = True)
train_images, val_images, train_masks, val_masks  = train_test_split(dummy_images,dummy_masks , test_size = 0.2 , shuffle = True)

In [13]:
print('Train Images   : ', len(train_images))
print('Val Images     : ', len(val_images))
print('Test Images    : ', len(test_images))

In [14]:
class OffRoadData(Dataset):
    
    def __init__(self,
                 image_path,mask_path,
                 images, masks,
                 transform = None):
    
        self.image_path = image_path
        self.mask_path  = mask_path
        self.images     = images
        self.masks      = masks
        self.transform  = transform

    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, i):
        
        img = cv2.cvtColor(cv2.imread(os.path.join(self.image_path , self.images.iloc[i])), cv2.COLOR_BGR2RGB)
        msk = cv2.imread(os.path.join(self.mask_path , self.masks.iloc[i]), cv2.IMREAD_GRAYSCALE)
        
        if self.transform is not None:
            aug = self.transform( image = img, mask = msk)
            img = aug['image']
            msk = aug['mask']
           
        tensor_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
        img = tensor_transform(img)
        msk = torch.from_numpy(msk).long()
        
        
        return img,msk
             

In [15]:
train_transforms = album.Compose([
    album.Resize(704, 960, interpolation = cv2.INTER_NEAREST),
    album.OneOf([album.HorizontalFlip(p=1), album.VerticalFlip(p=1)],p=0.75),
    album.RandomBrightnessContrast((0.1, 0.4), (0.1,0.4), p = 0.9),
    album.GridDistortion(p = 0.2), 
    
])

val_transforms = album.Compose([
    album.Resize(704, 960, interpolation = cv2.INTER_NEAREST),
    album.OneOf([album.HorizontalFlip(p=1), album.VerticalFlip(p=1)],p=0.9),
    
])

In [16]:
train_data = OffRoadData(images_path, mask_path_1, train_images, train_masks, train_transforms)
val_data   = OffRoadData(images_path, mask_path_1, val_images, val_masks, val_transforms)

In [17]:
batch_size= 3

train_loader = DataLoader(train_data, batch_size= batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size= batch_size , shuffle= True)  

In [18]:
model = smp.Unet(encoder_name= 'resnet34', encoder_weights= 'imagenet', classes= 2, activation= None ,encoder_depth=5, decoder_channels=[256, 128, 64, 32,16])

In [19]:
model.to(device) #moving model to the cuda

In [20]:
#number of epochs
n_epochs = 12

#optimizer 
optimizer = torch.optim.Adam(params= model.parameters(), lr = 1e-5, weight_decay= 1e-4)

loss_fn = nn.CrossEntropyLoss()

sched = torch.optim.lr_scheduler.OneCycleLR(optimizer,max_lr= 1e-5, epochs= n_epochs,steps_per_epoch= len(train_loader))


In [21]:
#inspired by another notebook(https://www.kaggle.com/ligtfeather/semantic-segmentation-is-easy-with-pytorch)
def mIoU(pred_mask, mask, smooth=1e-10, n_classes=2):
    with torch.no_grad():
        pred_mask = F.softmax(pred_mask, dim=1)
        pred_mask = torch.argmax(pred_mask, dim=1)
        pred_mask = pred_mask.contiguous().view(-1)
        mask = mask.contiguous().view(-1)

        iou_per_class = []
        for clas in range(0, n_classes): #loop per pixel class
            true_class = pred_mask == clas
            true_label = mask == clas

            if true_label.long().sum().item() == 0: #no exist label in this loop
                iou_per_class.append(np.nan)
            else:
                intersect = torch.logical_and(true_class, true_label).sum().float().item()
                union = torch.logical_or(true_class, true_label).sum().float().item()

                iou = (intersect + smooth) / (union +smooth)
                iou_per_class.append(iou)
        return np.nanmean(iou_per_class)

In [22]:
def train_loop(model, train_loader, val_loader, loss_fn, optimizer, scheduler, n_epochs):
    
    torch.cuda.empty_cache()
    per_epoch_train_loss = []; per_epoch_val_loss = [];
    per_epoch_train_iou = []; per_epoch_val_iou = [];
    
    for epoch in range(n_epochs):
        
        model.train()
        train_loss = 0
        train_iou = 0
        
        
        for i, (images, masks) in enumerate(tqdm(train_loader)):
            
            image = images.to(device) ; mask = masks.squeeze().to(device)
            
            output = model(image)
            
            loss = loss_fn(output, mask)
            train_iou += mIoU(output, mask)
            
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            
            scheduler.step()
            
            train_loss += loss.item()
        
        per_epoch_train_loss.append(train_loss/len(train_loader))
        per_epoch_train_iou.append(train_iou/len(train_loader))    
        
        model.eval()
        with torch.no_grad():
            val_loss = 0
            val_iou =  0



            for i, (images, masks) in enumerate(tqdm(val_loader)):

                image = images.to(device) ; mask = masks.squeeze().to(device)

                output = model(image)

                loss = loss_fn(output, mask)
                val_iou += mIoU(output, mask)
                val_loss += loss.item()
        
        per_epoch_val_loss.append(val_loss/len(val_loader))
        per_epoch_val_iou.append(val_iou/len(val_loader))
        
        print("Epoch:{}/{}..".format(epoch+1, n_epochs),
                  "Train Loss: {:.3f}..".format(train_loss/len(train_loader)),
                  "Val Loss: {:.3f}..".format(val_loss/len(val_loader)),
                  "Train mIoU:{:.3f}..".format(train_iou/len(train_loader)),
                  "Val mIoU: {:.3f}..".format(val_iou/len(val_loader)),
                  )
        
    history = {'train_loss' : per_epoch_train_loss, 'val_loss': per_epoch_val_loss,
               'train_miou' : per_epoch_train_iou, 'val_miou': per_epoch_val_iou
               }
    
    
    return history

A lot of the time was utilized to find an optimum values of hyperparameters.

In [23]:
a = train_loop(model,train_loader,val_loader ,loss_fn, optimizer, sched, n_epochs)

In [24]:
torch.save(model, 'Model_1.pt')

In [25]:
def plot(history, val_1, val_2, title, x_label, y_label):
    
    plt.plot(history[val_1],label = 'train', marker='o')
    plt.plot( history[val_2], label='val', marker='*')
    plt.title(title); plt.ylabel(y_label);
    plt.xlabel(x_label)
    plt.legend()
    plt.show()

In [26]:
def plot_loss_metrics(history, loss = False , mIoU = False):
    
    if loss:
        val_1 = list(history.keys())[0]
        val_2 = list(history.keys())[1]
        title = 'Loss per epoch'
        x_label = 'epoch'
        y_label = 'loss'
        plot(history, val_1,val_2,title, x_label, y_label)
        
    if mIoU:
        val_1 = list(history.keys())[2]
        val_2 = list(history.keys())[3]
        title = 'Mean IoU'
        x_label = 'epoch'
        y_label = 'mIoU'
        plot(history,val_1,val_2,title, x_label, y_label)

In [27]:
plot_loss_metrics(history= a, loss = True,mIoU = True)

In [28]:
test_transforms = album.Compose([album.Resize(704, 960, interpolation = cv2.INTER_NEAREST)])

In [29]:
test_data   = OffRoadData(images_path, mask_path_1, test_images, test_masks, test_transforms)

In [30]:
def predict_image_mask_miou(model, image, mask):
    model.eval()
    
    model.to(device); image=image.to(device)
    mask = mask.to(device)
    with torch.no_grad():
        
       
        image = image.unsqueeze(0)
        mask = mask.unsqueeze(0)
        output = model(image)
        
        score = mIoU(output, mask)
        
        masked = torch.argmax(output, dim=1)
        masked = masked.cpu().squeeze(0)
        
    return masked, score

In [67]:
image , mask = test_data[100]

In [68]:
pred_mask, score = predict_image_mask_miou(model, image, mask)

In [69]:
visualize_masks(original_mask = mask, predicted_mask = pred_mask)

In [37]:
def miou_score(model, test_set):
    score_iou = []
    for i in tqdm(range(len(test_set))):
        img, mask = test_set[i]
        pred_mask, score = predict_image_mask_miou(model, img, mask)
        score_iou.append(score)
    return score_iou

In [38]:
mob_miou = miou_score(model, test_data)

In [39]:
print('Test Set mIoU', np.mean(mob_miou))