In [1]:
!pip install segmentation-models-pytorch



In [None]:
#!pip install git+https://github.com/qubvel/segmentation_models.pytorch --upgrade

Collecting git+https://github.com/qubvel/segmentation_models.pytorch
  Cloning https://github.com/qubvel/segmentation_models.pytorch to /tmp/pip-req-build-2bkvx0ir
  Running command git clone -q https://github.com/qubvel/segmentation_models.pytorch /tmp/pip-req-build-2bkvx0ir
Building wheels for collected packages: segmentation-models-pytorch
  Building wheel for segmentation-models-pytorch (setup.py) ... [?25l[?25hdone
  Created wheel for segmentation-models-pytorch: filename=segmentation_models_pytorch-0.1.3-cp37-none-any.whl size=83178 sha256=fcc0a40d4e807e7ca9135315cb9775a5de553db364bd32c10a6889b56a8de28e
  Stored in directory: /tmp/pip-ephem-wheel-cache-fmloz17d/wheels/79/3f/09/1587a252e0314d26ad242d6d2e165622ab95c95e5cfe4b942c
Successfully built segmentation-models-pytorch
Installing collected packages: segmentation-models-pytorch
  Found existing installation: segmentation-models-pytorch 0.1.3
    Uninstalling segmentation-models-pytorch-0.1.3:
      Successfully uninstalled s

In [2]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

import numpy as np
import cv2
import matplotlib.pyplot as plt

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
DATA_DIR = '/content/drive/MyDrive/Henry_Lab/Cap08102020_seg/'

x_finetune_dir = '/playpen1/qiuyang/refined_segmentation/pascal_image_50'
y_finetune_dir = '/playpen1/qiuyang/refined_segmentation/pascal_task_50'
x_test_dir = '/playpen1/qiuyang/refined_segmentation/pascal_image_400'

In [4]:
%ls

June_17_Correct_Copy_of_toy_sample_image_segmentation.ipynb  [0m[01;34mpascal_image_50[0m/
June_6_sample_image_segmentation.ipynb                       [01;34mpascal_mask_50[0m/
[01;34mpascal_image_400[0m/                                            [01;34mpascal_refined[0m/


In [5]:
# helper function for data visualization
def visualize(**images):
    """PLot images in one row."""
    n = len(images)
    plt.figure(figsize=(16, 5))
    for i, (name, image) in enumerate(images.items()):
        plt.subplot(1, n, i + 1)
        plt.xticks([])
        plt.yticks([])
        plt.title(' '.join(name.split('_')).title())
        plt.imshow(image)
    plt.show()

def bbox2_padding(img, mask):
    rows = np.any(mask, axis=1)
    cols = np.any(mask, axis=0)
    rmin, rmax = np.where(rows)[0][[0, -1]]
    height = rmax - rmin + 1
    #print(f"height is {height}")
                      
    cmin, cmax = np.where(cols)[0][[0, -1]]
    width = cmax - cmin + 1
    #print(f"width is {width}")

    rcenter = int((rmin+rmax) / 2)
    ccenter = int((cmin+cmax) / 2)
    
    half_length = max(abs(rcenter - rmin), abs(ccenter-cmin))
    length = max(height, width)
    # initialize new_image and new_mask
    new_image = np.zeros((length, length, 3), dtype=np.int32)
    new_mask = np.zeros((length, length, mask.shape[2]), dtype=np.int32)
    new_center = half_length

    start_row = max(0,new_center-int(height/2))
    start_col = max(0, new_center-int(width/2))
    new_image[start_row:start_row+height, start_col:start_col+width] = img[rmin:rmax+1,cmin:cmax+1]
    new_mask[start_row:start_row+height, start_col:start_col+width] = mask[rmin:rmax+1,cmin:cmax+1]

    #new_rmin = rcenter - max_dist
    #new_rmax = rcenter + max_dist
    #new_cmin = ccenter - max_dist
    #new_cmax = ccenter + max_dist
    new_image = new_image.astype('uint8')
    new_mask = new_mask.astype('uint8')
    return new_image, new_mask

In [6]:
from torch.utils.data import DataLoader
from torch.utils.data import Dataset as BaseDataset
!pip install -U git+https://github.com/albu/albumentations --no-cache-dir
import albumentations as albu

Collecting git+https://github.com/albu/albumentations
  Cloning https://github.com/albu/albumentations to /tmp/pip-req-build-bile5b6q
  Running command git clone -q https://github.com/albu/albumentations /tmp/pip-req-build-bile5b6q


In [7]:
class Dataset(BaseDataset):
    """Dataset. Read images, apply augmentation and preprocessing transformations.
    
    Args:
        images_dir (str): path to images folder
        masks_dir (str): path to segmentation masks folder
        class_values (list): values of classes to extract from segmentation mask
        augmentation (albumentations.Compose): data transfromation pipeline 
            (e.g. flip, scale, etc.)
        preprocessing (albumentations.Compose): data preprocessing 
            (e.g. noralization, shape manipulation, etc.)
    
    """
    
    CLASSES = ['sky', 'building', 'pole', 'road', 'pavement', 
               'tree', 'signsymbol', 'fence', 'car', 
               'pedestrian', 'bicyclist', 'unlabelled']
    
    def __init__(
            self, 
            images_dir, 
            masks_dir, 
            classes=None, 
            augmentation=None, 
            preprocessing=None,
            needBbox=False
    ):
        self.ids = os.listdir(images_dir)
        self.images_fps = [os.path.join(images_dir, image_id) for image_id in self.ids]
        self.masks_fps = [os.path.join(masks_dir, image_id.replace("img", "seg")) for image_id in self.ids]

        # convert str names to class values on masks
        # self.class_values = [self.CLASSES.index(cls.lower()) for cls in classes]    

        self.augmentation = augmentation
        self.preprocessing = preprocessing
        self.needBbox = needBbox
        self.resize = albu.Compose([albu.Resize(height=512, width=512)])
    def __getitem__(self, i):
        # read data
        image = cv2.imread(self.images_fps[i])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        mask = cv2.imread(self.masks_fps[i], 0)
        # unsqueeze to (512, 512, 1)
        mask = np.expand_dims(mask,-1)

        if self.needBbox:
          # cut out areas outside of bbox and pad with black pixels
          image, mask = bbox2_padding(image, mask)
          # resize to (512, 512) and change back type to int32
          sample = self.resize(image=image, mask=mask)
          image, mask = sample['image'], sample['mask']

        # apply augmentations
        if self.augmentation:
            sample = self.augmentation(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']

        if self.preprocessing:
            sample = self.preprocessing(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']
            mask = mask.squeeze(0)
        return image, mask
    def __len__(self):
        return len(self.ids)

In [8]:
def get_training_augmentation():
    train_transform = [

        albu.HorizontalFlip(p=0.5),
        albu.ShiftScaleRotate(scale_limit=0.01, rotate_limit=10, shift_limit=0.0625, p=1, border_mode=0),

        albu.PadIfNeeded(min_height=512, min_width=512, always_apply=True, border_mode=0),
        albu.RandomCrop(height=320, width=320, always_apply=True),
        albu.Resize(height=512, width=512)
        #albu.RandomResizedCrop(height=512, width=512, scale=(0.8, 0.8))
    ]
    return albu.Compose(train_transform)

def to_tensor(x, **kwargs):
    return x.transpose(2, 0, 1).astype('float32')

def to_image_scale(x, **kwargs):
    x = x.astype(np.float32)
    x = x /255.0
    x = x - 0.5
    x = x * 2
    return x
def to_mask_scale(x, **kwargs):
    x  = x.astype(np.float32)
    x = x/255.0
    return x
def get_transposed():
    _transform = [
      albu.Lambda(image=to_tensor, mask=to_tensor),
    ]
    return albu.Compose(_transform)

def get_preprocessing():
    _transform = [
      albu.Lambda(image=to_image_scale, mask=to_mask_scale),
      albu.Lambda(image=to_tensor, mask=to_tensor),
    ]
    return albu.Compose(_transform)
'''def get_preprocessing(preprocessing_fn):
    """Construct preprocessing transform
    
    Args:
        preprocessing_fn (callbale): data normalization function 
            (can be specific for each pretrained neural network)
    Return:
        transform: albumentations.Compose
    
    """
    
    _transform = [
        albu.Lambda(image=preprocessing_fn),
        albu.Lambda(image=to_tensor, mask=to_tensor),
    ]
    return albu.Compose(_transform)'''

'def get_preprocessing(preprocessing_fn):\n    """Construct preprocessing transform\n    \n    Args:\n        preprocessing_fn (callbale): data normalization function \n            (can be specific for each pretrained neural network)\n    Return:\n        transform: albumentations.Compose\n    \n    """\n    \n    _transform = [\n        albu.Lambda(image=preprocessing_fn),\n        albu.Lambda(image=to_tensor, mask=to_tensor),\n    ]\n    return albu.Compose(_transform)'

In [9]:
import torch
import numpy as np
import segmentation_models_pytorch as smp
import matplotlib.pyplot as plt

dict_keys(['encoder.conv1.weight', 'encoder.bn1.weight', 'encoder.bn1.bias', 'encoder.bn1.running_mean', 'encoder.bn1.running_var', 'encoder.bn1.num_batches_tracked', 'encoder.layer1.0.conv1.weight', 'encoder.layer1.0.bn1.weight', 'encoder.layer1.0.bn1.bias', 'encoder.layer1.0.bn1.running_mean', 'encoder.layer1.0.bn1.running_var', 'encoder.layer1.0.bn1.num_batches_tracked', 'encoder.layer1.0.conv2.weight', 'encoder.layer1.0.bn2.weight', 'encoder.layer1.0.bn2.bias', 'encoder.layer1.0.bn2.running_mean', 'encoder.layer1.0.bn2.running_var', 'encoder.layer1.0.bn2.num_batches_tracked', 'encoder.layer1.1.conv1.weight', 'encoder.layer1.1.bn1.weight', 'encoder.layer1.1.bn1.bias', 'encoder.layer1.1.bn1.running_mean', 'encoder.layer1.1.bn1.running_var', 'encoder.layer1.1.bn1.num_batches_tracked', 'encoder.layer1.1.conv2.weight', 'encoder.layer1.1.bn2.weight', 'encoder.layer1.1.bn2.bias', 'encoder.layer1.1.bn2.running_mean', 'encoder.layer1.1.bn2.running_var', 'encoder.layer1.1.bn2.num_batches_tra

In [None]:
# # Use pretrained model to predict (without bbox)
# new_model = smp.Unet(
#             'resnet18',
#             encoder_weights='imagenet', 
#             classes=2, 
#             # activation='softmax'
#         ).to(device)

# new_model.load_state_dict(state_dict2)
# test_dataset = Dataset(
#     x_valid_dir, 
#     y_valid_dir,  
#     preprocessing=get_preprocessing(),
#     #needBbox = True,
#     classes=CLASSES,
# )

# test_dataset_vis = Dataset(
#     x_valid_dir, y_valid_dir, 
#     #needBbox = True,
#     classes=CLASSES,
# )
# for i in range(5):
#   n = np.random.choice(len(test_dataset))

#   image_vis = test_dataset_vis[n][0].astype('uint8')
#   image, gt_mask = test_dataset[n]

#   gt_mask = gt_mask.squeeze()

#   x_tensor = torch.from_numpy(image).to(DEVICE).unsqueeze(0)
#   #pr_mask =  model.predict(x_tensor)
#   pr_mask =  new_model.predict(x_tensor)
#   pr_mask = (pr_mask.squeeze().cpu().numpy().round())

#   mask = pr_mask[1] > pr_mask[0]
#   new_img = np.zeros(gt_mask.shape)
#   new_img[mask] = 1

#   visualize(
#       image=image_vis,
#       ground_truth_mask=gt_mask, 
#       mask1 = pr_mask[0],
#       mask2 = pr_mask[1],
#       seg_image=new_img
#   )

OSError: ignored

In [None]:
# # Use pretrained model to predict (withbbox)
# new_model = smp.Unet(
#             'resnet18',
#             encoder_weights='imagenet', 
#             classes=2, 
#             # activation='softmax'
#         ).to(device)

# new_model.load_state_dict(state_dict2)
# test_dataset = Dataset(
#     x_valid_dir, 
#     y_valid_dir,  
#     preprocessing=get_preprocessing(),
#     needBbox = True,
#     classes=CLASSES,
# )

# test_dataset_vis = Dataset(
#     x_valid_dir, y_valid_dir, 
#     needBbox = True,
#     classes=CLASSES,
# )
# for i in range(5):
#   n = np.random.choice(len(test_dataset))

#   image_vis = test_dataset_vis[n][0].astype('uint8')
#   image, gt_mask = test_dataset[n]

#   gt_mask = gt_mask.squeeze()

#   x_tensor = torch.from_numpy(image).to(DEVICE).unsqueeze(0)
#   #pr_mask =  model.predict(x_tensor)
#   pr_mask =  new_model.predict(x_tensor)
#   pr_mask = (pr_mask.squeeze().cpu().numpy().round())

#   mask = pr_mask[1] > pr_mask[0]
#   new_img = np.zeros(gt_mask.shape)
#   new_img[mask] = 1

#   visualize(
#       image=image_vis,
#       ground_truth_mask=gt_mask, 
#       seg_image=new_img
#   )

In [None]:
pretrained_model = model

In [None]:
!pip install ninja



In [None]:
%cd /content/
!git clone https://github.com/PeikeLi/Self-Correction-Human-Parsing
%cd Self-Correction-Human-Parsing
!mkdir checkpoints
!mkdir inputs
!mkdir outputs

/content
fatal: destination path 'Self-Correction-Human-Parsing' already exists and is not an empty directory.
/content/Self-Correction-Human-Parsing
mkdir: cannot create directory ‘checkpoints’: File exists
mkdir: cannot create directory ‘inputs’: File exists
mkdir: cannot create directory ‘outputs’: File exists


In [None]:
dataset = 'pascal'         #select from ['lip', 'atr', 'pascal']
import gdown

if dataset == 'lip':
    url = 'https://drive.google.com/uc?id=1k4dllHpu0bdx38J7H28rVVLpU-kOHmnH'
elif dataset == 'atr':
    url = 'https://drive.google.com/uc?id=1ruJg4lqR_jgQPj-9K0PP-L2vJERYOxLP'
elif dataset == 'pascal':
    url = 'https://drive.google.com/uc?id=1E5YwNKW2VOEayK9mWCS3Kpsxf-3z04ZE'

output = 'checkpoints/final.pth'
gdown.download(url, output, quiet=False)

Downloading...
From: https://drive.google.com/uc?id=1E5YwNKW2VOEayK9mWCS3Kpsxf-3z04ZE
To: /content/Self-Correction-Human-Parsing/checkpoints/final.pth
267MB [00:01, 167MB/s]


'checkpoints/final.pth'

In [10]:
import networks
#device setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#dataset_settings
input_size = [512, 512]
num_classes = 7
label = ['Background', 'Head', 'Torso', 'Upper Arms', 'Lower Arms', 'Upper Legs', 'Lower Legs']

model = networks.init_model('resnet101', num_classes=num_classes, pretrained=None)
state_dict = torch.load('/content/Self-Correction-Human-Parsing/checkpoints/final.pth')['state_dict']
from collections import OrderedDict
new_state_dict = OrderedDict()
for k, v in state_dict.items():
    name = k[7:]  # remove `module.`
    new_state_dict[name] = v


ModuleNotFoundError: No module named 'networks'

In [None]:
# fine-tune the pretrained_model

from torch.optim.lr_scheduler import StepLR
import cv2
import matplotlib.pyplot as plt
import torch.nn as nn

train_dataset = Dataset(
    x_train_dir, 
    y_train_dir, 
    augmentation=get_training_augmentation(), 
    preprocessing = get_preprocessing(),
    needBbox = True,
    classes=CLASSES,
)
test_dataset = Dataset(
    x_valid_dir, 
    y_valid_dir,  
    preprocessing=get_preprocessing(),
    needBbox = True,
    classes=CLASSES,
)

test_dataset_vis = Dataset(
    x_valid_dir, y_valid_dir, 
    #needBbox = True,
    classes=CLASSES,
)

#initialize dataloader
batch_size = 2
train_dataloader = DataLoader(train_dataset, batch_size=batch_size)


# initialize parameters
loss_Softmax = nn.CrossEntropyLoss(ignore_index=255)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=1, gamma=0.99)
max_score = 0
best_epoch = 0
best_iou = 0
training_loss = []

# def train
def train(model, device, train_loader, optimizer, epoch):
  model.train()
  running_loss = 0
  iou = 0
  for batch_idx, (input, target) in enumerate(train_loader):
      input, target = input.to(device),target.to(device, dtype=torch.int64)
      optimizer.zero_grad()
      output = model(input)
      upsample = torch.nn.Upsample(size=input_size, mode='bilinear', align_corners=True)
      upsample_output = upsample(output[0][-1])
      logit0 = upsample_output[:, 0, :, :]
      logit1 = torch.sum(upsample_output[:, 1:, :, :], 1)
      pr_mask = torch.stack((logit0, logit1), 1)
      # pred = output[:, 1, :, :].detach().float()
      # pred = pred > 0.5
      # label = target > 0.5
      # iou += ((pred & label).sum((1, 2)).sum() + 1e-6) / ((pred | label).sum((1, 2)).sum() + 1e-6) 
      loss = loss_Softmax(pr_mask, target)
      loss.backward()
      optimizer.step()

      running_loss += loss.item()
      # print every 15 mini-batches
  print('[%d, %5d] loss: %.3f' %
        (epoch, batch_idx + 1, running_loss / batch_idx + 1))
  print(f"iou: {iou / len(train_loader.dataset) * pred.size(0)}")
  return iou
# load model
# model.load_state_dict(state_dict2)
model.load_state_dict(new_state_dict)
model = model.to(device)
model = model.eval()

# train model for 40 epochs
epoches = np.arange(0, 10)
for i in epoches:  
    image, gt_mask = test_dataset[0]
    gt_mask = gt_mask.squeeze()
    x_tensor = torch.from_numpy(image).to(DEVICE).unsqueeze(0)
    output = model(x_tensor)
    #prob = model(x_tensor)
    upsample = torch.nn.Upsample(size=input_size, mode='bilinear', align_corners=True)
    upsample_output = upsample(output[0][-1])
    logit0 = upsample_output[:, 0, :, :]
    logit1 = torch.sum(upsample_output[:, 1:, :, :], 1)
    pr_mask = torch.stack((logit0, logit1), 1)
    pr_mask = pr_mask.softmax(dim=1).argmax(dim=1)
    pr_mask = (pr_mask != 0)
    pr_mask = (pr_mask.squeeze().cpu().numpy().round().astype('uint8'))
    visualize(img=pr_mask)
    

    print('\nEpoch: {}'.format(i))
    print(f"lr: {optimizer.param_groups[0]['lr']}")
    iou = train(model, device, train_dataloader, optimizer, i)
    # do something (save model, change lr, etc.)
    if iou > best_iou:
      best_epoch = i+1
      best_iou = iou
      torch.save(model, os.path.join(DATA_DIR, 'best_model%d.pth'% (i+1)))
      print('Model saved!')
    '''if max_score < train_logs['iou_score']:
        best_epoch = i
        max_score = train_logs['iou_score']
        torch.save(model, os.path.join(DATA_DIR, 'best_model.pth'))
        print('Model saved!')'''
        
    '''if i % 1 == 0:
        image, gt_mask = test_dataset[0]
        gt_mask = gt_mask.squeeze()
        x_tensor = torch.from_numpy(image).to(DEVICE).unsqueeze(0)
        pr_mask = model.predict(x_tensor)
        pr_mask = (pr_mask.squeeze().cpu().numpy().round())
        mask = pr_mask[1] > pr_mask[0]
        new_img = np.zeros(gt_mask.shape)
        new_img[mask] = 1
        visualize(mask = gt_mask, seg_image=new_img)'''

    scheduler.step()
  
#print(f"Best epoch is {best_epoch}")
#plt.plot(epoches, training_loss)

RuntimeError: ignored

In [None]:
 best_model = torch.load(os.path.join(DATA_DIR, 'best_model8.pth'))
test_dataset = Dataset(
    x_valid_dir, 
    y_valid_dir,  
    preprocessing=get_preprocessing(),
    needBbox = True,
    classes=CLASSES,
)

test_dataset_vis = Dataset(
    x_valid_dir, y_valid_dir, 
    #needBbox = True,
    classes=CLASSES,
)
for i in range(5):
  n = np.random.choice(len(test_dataset))

  image_vis = test_dataset_vis[n][0].astype('uint8')
  image, gt_mask = test_dataset[n]

  gt_mask = gt_mask.squeeze()

  x_tensor = torch.from_numpy(image).to(DEVICE).unsqueeze(0)
  #pr_mask =  model.predict(x_tensor)
  pr_mask =  best_model.predict(x_tensor)
  pr_mask = (pr_mask.squeeze().cpu().numpy().round())

  mask = pr_mask[1] > pr_mask[0]
  new_img = np.zeros(gt_mask.shape)
  new_img[mask] = 1

  visualize(
      image=image_vis,
      ground_truth_mask=gt_mask, 
      seg_image=new_img
  )

In [None]:
# Use fine-tuned model to predict
'''test_dataset = Dataset(
    x_valid_dir, 
    y_valid_dir,  
    preprocessing=get_preprocessing(),
    needBbox = True,
    classes=CLASSES,
)

test_dataset_vis = Dataset(
    x_valid_dir, y_valid_dir, 
    classes=CLASSES,
)
for i in range(5):
  n = np.random.choice(len(test_dataset))

  image_vis = test_dataset_vis[n][0].astype('uint8')
  image, gt_mask = test_dataset[n]

  gt_mask = gt_mask.squeeze()

  x_tensor = torch.from_numpy(image).to(DEVICE).unsqueeze(0)
  pr_mask =  model.predict(x_tensor)
  #pr_mask =  pretrained_model.predict(x_tensor)
  pr_mask = (pr_mask.squeeze().cpu().numpy().round())

  mask = pr_mask[1] > pr_mask[0]
  new_img = np.zeros(gt_mask.shape)
  new_img[mask] = 1

  visualize(
      image=image_vis,
      ground_truth_mask=gt_mask, 
      seg_image=new_img
  )
'''
