In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
import seaborn as sns

import os
import random

from PIL import Image
import cv2

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, Dataset
from torchvision import datasets, transforms
# from utils import *
import albumentations as album
#import extractors
import segmentation_models_pytorch as smp

sns.set(style='white', context='notebook', palette='deep')

In [2]:
# Need to be run only one time
ENCODER = 'resnext50_32x4d'
ENCODER_WEIGHTS = 'imagenet'
CLASSES = 27
ACTIVATION = 'softmax2d' # could be None for logits or 'softmax2d','softmax' for multiclass segmentation

# create segmentation model with pretrained encoder
model = smp.PSPNet(
    encoder_name=ENCODER, 
    encoder_weights=ENCODER_WEIGHTS, 
    classes=27, 
    activation=ACTIVATION,
)

preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)

Downloading: "https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth" to /Users/rishikeshavanrengarajan/.cache/torch/hub/checkpoints/resnext50_32x4d-7cdf4587.pth


  0%|          | 0.00/95.8M [00:00<?, ?B/s]

In [3]:
def keep_image_size_open(path, size=(640, 640)):
    img = Image.open(path)
    side = max(img.size)  # Get the longest side of the image
    mask = Image.new('RGB', (side, side), (0, 0, 0))  # Create a square canvas
    mask.paste(img, (0, 0))  # Paste the original image on the left top of the canvas
    mask = mask.resize(size)  # Resize the new image to a uniform size
    return mask

def keep_mask_size_open(path, size=(640, 640)):
    img = Image.open(path)
    side = max(img.size)  # Get the longest side of the image
    mask = Image.new('L', (side, side), 0)  # Create a square canvas
    mask.paste(img, (0, 0))  # Paste the original image on the left top of the canvas
    mask = mask.resize(size)  # Resize the new image to a uniform size
    return mask

In [4]:
# helper function for data visualization
def visualize(**images):
    """
    Plot images in one row
    """
    n_images = len(images)
    plt.figure(figsize=(20,8))
    for idx, (name, image) in enumerate(images.items()):
        plt.subplot(1, n_images, idx + 1)
        plt.xticks([]); 
        plt.yticks([])
        # get title from the parameter names
        plt.title(name.replace('_',' ').title(), fontsize=20)
        plt.imshow(image)
    plt.show()

# Perform one hot encoding on label
def one_hot_encode(image,n_classes):
    """
    Convert a segmentation image label array to one-hot format
    by replacing each pixel value with a vector of length num_classes
    # Arguments
        label: The 2D array segmentation image label
        label_values
        
    # Returns
        A 2D array with the same width and hieght as the input, but
        with a depth size of num_classes
    """
    x = F.one_hot(image,n_classes)
    return x
 
    
# Perform reverse one-hot-encoding on labels / preds
def reverse_one_hot(image):
    x = np.argmax(image, axis = -1)
    return x

# Perform colour coding on the reverse-one-hot outputs
def colour_code_segmentation(image, label_values):
    """
    Given a 1-channel array of class keys, colour code the segmentation results.
    # Arguments
        image: single channel array where each value represents the class key.
        label_values

    # Returns
        Colour coded image for segmentation visualization
    """
    colour_codes = np.array(label_values)
    x = colour_codes[image.astype(int)]

    return x

In [5]:
def get_training_augmentation():
    train_transform = [
        album.PadIfNeeded(min_height=640, min_width=640, always_apply=True, border_mode=0),
        album.OneOf([album.HorizontalFlip(p=1),album.VerticalFlip(p=1),album.RandomRotate90(p=1),],p=0.5)
#         album.ShiftScaleRotate(scale_limit=0.5,rotate_limit=0,shift_limit=0.1,p=0.5,border_mode=0),
#         album.GridDistortion(p=0.5)
    ]
    return album.Compose(train_transform)


def get_validation_augmentation():
    # Add sufficient padding to ensure image is divisible by 32
    test_transform = [
        album.PadIfNeeded(min_height=640, min_width=640, always_apply=True, border_mode=0),
    ]
    return album.Compose(test_transform)


def to_tensor(x, **kwargs):
    return x.transpose(2,0,1).astype('float32')


def get_preprocessing(preprocessing_fn=None):
    """Construct preprocessing transform    
    Args:
        preprocessing_fn (callable): data normalization function 
            (can be specific for each pretrained neural network)
    Return:
        transform: albumentations.Compose
    """   
    _transform = [
        album.Lambda(image=preprocessing_fn),
        album.Lambda(image=to_tensor, mask=to_tensor),
    ]
    return album.Compose(_transform)

In [8]:
path

NameError: name 'path' is not defined

In [14]:
transform = transforms.Compose([transforms.ToTensor()])

class BackgroundDataset(torch.utils.data.Dataset):

    """Stanford Background Dataset. Read images, apply augmentation and preprocessing transformations.
    
    Args:
        df (str): DataFrame containing images / labels paths
        class_rgb_values (list): RGB values of select classes to extract from segmentation mask
        augmentation (albumentations.Compose): data transfromation pipeline 
            (e.g. flip, scale, etc.)
        preprocessing (albumentations.Compose): data preprocessing 
            (e.g. noralization, shape manipulation, etc.)
    
    """
    def __init__(
            self,path, 
            augmentation=None, 
            preprocessing=None,
    ):
        self.path = path
        print(path)
        self.name = os.listdir(os.path.join(path, '../data/train_masks_copy'))
        self.augmentation = augmentation
        self.preprocessing = preprocessing
    
    def __getitem__(self, idx):
        
        # read images and masks
        mask_name = self.name[idx]
        mask_path = os.path.join(self.path,'../data/train_masks_copy',mask_name)
        img_path = os.path.join(self.path,'../data/train_images_copy',mask_name.replace('png','jpg'))

        
#         image = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
#         mask = cv2.cvtColor(cv2.imread(mask_path),0)
        image = keep_image_size_open(img_path)
        mask = keep_mask_size_open(mask_path)
        
        image = np.asarray(image).astype('int64')
        mask = np.asarray(mask).astype('int64')
        
        
   
    
        # one-hot-encode the mask  
        mask = torch.from_numpy(mask).to(torch.int64)
        mask = one_hot_encode(mask,27)
        
        
         #Augmentation
        mask = np.asarray(mask).astype('int64')
        sample = self.augmentation(image=image, mask=mask)
        image, mask = sample['image'], sample['mask']
        

        
         # preprocessing applied only on numpy array image
        sample = self.preprocessing(image=image, mask=mask)
        image, mask = sample['image'], sample['mask']
        
            
        return image,mask
        
    def __len__(self):
        # return length of 
        return len(self.name)

In [15]:
if __name__ == '__main__':
    data = BackgroundDataset('',augmentation=get_training_augmentation(),preprocessing=get_preprocessing(preprocessing_fn))
    check_image = data[100][0] # checking for the random 100th image
    check_mask = data[10|0][1]
    print(check_image.shape,check_mask.shape)
    print(check_image.dtype,check_mask.dtype)
    print(len(data))


(3, 640, 640) (27, 640, 640)
float32 float32
261


In [16]:
# p  = os.path.join('train_images',os.listdir('train_images')[11])
# ip = Image.open(p)
# ipa = np.asarray(ip)
# ic = cv2.cvtColor(cv2.imread(p), cv2.COLOR_BGR2RGB)
# tp=np.transpose(ic,(2,1,0))
# tp.shape
# tt = torch.from_numpy(ipa).transpose(2,0)
# tt.shape


In [17]:
bs = 8
nw = 0
# Splitting into Train and Val
full_dataset = BackgroundDataset('',augmentation=get_training_augmentation(),preprocessing=get_preprocessing(preprocessing_fn))
train_size = int(0.9 * len(full_dataset))
val_size   = len(full_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size])

# Creating  data_loader
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=bs,num_workers=nw,shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=bs,num_workers=nw,shuffle=True)





In [18]:
it, lt = next(iter(train_loader))
print(it.shape,lt.shape)
print(it.dtype,lt.dtype)

print(len(train_loader)*bs,len(val_loader)*bs)

torch.Size([8, 3, 640, 640]) torch.Size([8, 27, 640, 640])
torch.float32 torch.float32
240 32


In [23]:
# Set flag to train the model or not. If set to 'False', only prediction is performed (using an older model checkpoint)
TRAINING = False

# Set num of epochs
EPOCHS = 20

# Set device: `cuda` or `cpu`
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# define loss function
loss = smp.utils.losses.DiceLoss()

# define metrics
metrics = [
    smp.utils.metrics.IoU(threshold=0.5),
]

# define optimizer
optimizer = torch.optim.Adam([ 
    dict(params=model.parameters(), lr=0.0001),
])

# define learning rate scheduler (not used in this NB)
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
    optimizer, T_0=1, T_mult=2, eta_min=5e-5,
)

# load best saved model checkpoint from previous commit (if present)
if os.path.exists('best_model_pspnet.pth'):
    model = torch.load('best_model_pspnet.pth', map_location=DEVICE)

In [24]:
train_epoch = smp.utils.train.TrainEpoch(
    model, 
    loss=loss, 
    metrics=metrics, 
    optimizer=optimizer,
    device=DEVICE,
    verbose=True,
)

valid_epoch = smp.utils.train.ValidEpoch(
    model, 
    loss=loss, 
    metrics=metrics, 
    device=DEVICE,
    verbose=True,
)

In [25]:
%%time

if TRAINING:

    best_iou_score = 0.0
    train_logs_list, valid_logs_list = [], []

    for i in range(0, EPOCHS):

        # Perform training & validation
        print('\nEpoch: {}'.format(i))
        train_logs = train_epoch.run(train_loader)
        valid_logs = valid_epoch.run(val_loader)
        train_logs_list.append(train_logs)
        valid_logs_list.append(valid_logs)

        # Save model if a better val IoU score is obtained
        if best_iou_score < valid_logs['iou_score']:
            best_iou_score = valid_logs['iou_score']
            torch.save(model, 'best_model_pspnet.pth')
            print('Model saved!')

CPU times: user 3 µs, sys: 1 µs, total: 4 µs
Wall time: 6.91 µs


In [285]:
torch.save(model, 'best_model_pspnet.pth')

In [22]:
PATH = 'best_model_pspnet.pth'
device = torch.device('cpu')
model.load_state_dict(torch.load(PATH, map_location=device))
model.eval();

AttributeError: 'PSPNet' object has no attribute 'copy'