In [1]:
import os
import sys
import time
import json
import argparse
import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import logging
from datetime import datetime
from tqdm import tqdm
from skimage.segmentation import relabel_sequential

import glob
import matplotlib.pyplot as plt
from torchvision import transforms
from PIL import Image
import cv2

In [2]:
from torch.nn.parallel.scatter_gather import gather
from src.models.model import LaneNet
from src.models.loss import DiscriminativeLoss
from src.utils.utils import AverageMeter, adjust_learning_rate
from src.utils.metrics import batch_pix_accuracy, batch_intersection_union
from src.utils.parallel import DataParallelModel



In [3]:
class dict2class():
    def __init__(self, opt):
        for key in opt.keys():
            setattr(self, key, opt[key])

In [4]:
# dataset and dataloader
class PlotDigitizerDataset(torch.utils.data.Dataset):
    def __init__(self, root, data_type, mode, transforms):
        super(PlotDigitizerDataset, self).__init__()
        self.root = root
        self.transforms = transforms
        self.max_num_plots = 20
        # load all image files
        self.imglist = sorted(glob.glob(os.path.join(root, data_type, mode, "*.png")))
    
    def __len__(self):
        return len(self.imglist)
    
    def __getitem__(self, idx):
        # load images ad masks
        img_path = self.imglist[idx]
        mask_path = img_path.replace("leftImg8bit", "gtFine")
        img = Image.open(img_path).convert("RGB")
        mask_img = np.array(Image.open(mask_path))
        w,h = img.size
        
        # generate binary segmentation image
        seg_img = np.zeros_like(mask_img)
        seg_img[mask_img>0] = 1
        seg_img = np.stack([1-seg_img, seg_img])
        
        
        # number of instances in the image
        num_instance = min(len(np.unique(mask_img))-1,self.max_num_plots)
        
        # generate instance image
        ins_img = np.zeros_like(mask_img)
        ins_img[mask_img>0] = relabel_sequential(mask_img[mask_img>0])[0]
        instance_img = np.zeros((self.max_num_plots, h, w))
        for i in range(1, num_instance+1):
            instance_img[i-1, ins_img == i] = 1
        
        sample = {}
        sample["img"] = img
        target = {}
        target["num_instance"] = num_instance
        target["seg_img"] = seg_img
        target["instance_img"] = instance_img
        sample["target"] = target
        
        if self.transforms is not None:
            sample = self.transforms(sample)
        target = sample["target"]
        img = sample["img"]
        
        return img, target["seg_img"], target["instance_img"], target["num_instance"]

In [5]:
# custom transform
class ToTensor(object):
    def __call__(self, sample):
        img, target = sample['img'], sample['target']

        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        image = transforms.ToTensor()(img)
        target["num_instance"] = torch.as_tensor(target["num_instance"], 
                                                 dtype=torch.int64)
        target["seg_img"] = torch.as_tensor(target["seg_img"], 
                                            dtype=torch.int64)
        target["instance_img"] = torch.as_tensor(target["instance_img"], 
                                                 dtype=torch.int64)
        return {"img": image, "target": target}
    
class Normalize():
    def __init__(self):
        self.mean = (0.485, 0.456, 0.406)
        self.std = (0.229, 0.224, 0.225)
        
    def __call__(self, sample):
        img = sample['img']
        sample['img'] = transforms.Normalize(self.mean, self.std)(img)
        return sample

class RandomRescale(object):
    def __init__(self, mode="train"):
        self.size = 512
        self.mode = mode
        
    def __call__(self,sample):
        img, target = sample['img'], sample['target']
        w,h = img.size
        if w>h:
            nw = self.size
            nh = int(nw/w*h)
        else:
            nh = self.size
            nw = nh/h*w
        if self.mode == "train":
            dw = np.random.randint(self.size-nw+1)
            dh = np.random.randint(self.size-nh+1)
        else:
            dw, dh = 0, 0
        seg_img = target["seg_img"]
        instance_img = target["instance_img"]
        
        new_seg_img = cv2.resize(seg_img.transpose(1,2,0), 
                                 (nw, nh), 
                                 interpolation = cv2.INTER_NEAREST).transpose(2,0,1)
        new_instance_img = cv2.resize(instance_img.transpose(1,2,0), 
                                     (nw, nh), 
                                     interpolation = cv2.INTER_NEAREST).transpose(2,0,1)
        new_img = img.resize((nw, nh))
        
        img = Image.new(mode=new_img.mode, size=(self.size, self.size))
        img.paste(new_img, (dw, dh))
        
        seg_img = np.zeros((new_seg_img.shape[0], self.size, self.size))
        seg_img[:, dh:dh+nh, dw:dw+nw] = new_seg_img
        instance_img = np.zeros((new_instance_img.shape[0], self.size, self.size))
        instance_img[:,dh:dh+nh, dw:dw+nw] = new_instance_img
        
        target["seg_img"] = seg_img
        target["instance_img"] = instance_img
        return {"img": img, "target": target} 

In [19]:
logger = logging.getLogger(__name__)

In [30]:
opt = {
    "seed": 123,
    "batch_size": 4,
    "num_workers": 8,
    "root": "/home/weixin/Documents/GitProjects/SpatialEmbeddings/data/tmp/leftImg8bit/",
    "output_file": None,
    "cnn_type": "unet",
    "embed_dim": 4,
    "learning_rate": 1e-4,
    "lr_update": 50,
}
opt = dict2class(opt)

In [13]:
custom_transform = transforms.Compose([RandomRescale(), 
                                       ToTensor(), 
                                       Normalize()])

In [14]:
data_type = "train"
mode = "simu"
dataset = PlotDigitizerDataset(opt.root, data_type, mode, custom_transform)

In [15]:
len(dataset)

400

In [16]:
shuffle = True
train_loader = torch.utils.data.DataLoader(dataset,
                                          batch_size=opt.batch_size,
                                          num_workers=opt.num_workers,
                                          shuffle=shuffle,
                                          pin_memory=True)

In [17]:
len(train_loader)

100

In [22]:
logger.info('Building model...')
model = LaneNet(cnn_type=opt.cnn_type, embed_dim=opt.embed_dim)
model = DataParallelModel(model)

In [25]:
criterion_disc = DiscriminativeLoss(delta_var=0.5,
                                    delta_dist=1.5,
                                    norm=2,
                                    usegpu=True)

criterion_ce = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.parameters(), lr=opt.learning_rate)

In [27]:
if torch.cuda.is_available():
    criterion_disc.cuda()
    criterion_ce.cuda()
    model = model.cuda()

In [31]:
epoch = 0
learning_rate = adjust_learning_rate(opt, optimizer, epoch)
logger.info('===> Learning rate: %f: ', learning_rate)

In [33]:
 train(
    opt,
    model,
    criterion_disc,
    criterion_ce,
    optimizer,
    train_loader)

  0%|          | 0/100 [00:02<?, ?it/s]


ValueError: not enough values to unpack (expected 5, got 4)

In [32]:
def train(opt, model, criterion_disc, criterion_ce, optimizer, loader):
    """
    Training the network in one epoch
    Args:
        opt (Namspace): training options
        model (LaneNet): a LaneNet model
        criterion_disc: a DiscriminativeLoss criterion
        criterion_ce: a CrossEntropyLoss criterion
        optimizer: optimizer (SGD, Adam, etc)
        loader: data loader
    Returns:
        None
    """

    batch_time = AverageMeter()
    data_time = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    pbar = tqdm(loader)
    for data in pbar:
        # measure data loading time
        data_time.update(time.time() - end)

        images, bin_labels, ins_labels, n_lanes = data

        images = Variable(images)
        bin_labels = Variable(bin_labels)
        ins_labels = Variable(ins_labels)

        if torch.cuda.is_available():
            images = images.cuda()
            bin_labels = bin_labels.cuda()
            ins_labels = ins_labels.cuda()
            n_lanes = n_lanes.cuda()


        if torch.cuda.device_count() <= 1:
            bin_preds, ins_preds, hnet_preds = model(images)
        else:
            bin_preds, ins_preds, hnet_preds = gather(model(images), 0, dim=0)

        _, bin_labels_ce = bin_labels.max(1)
        ce_loss = criterion_ce(
            bin_preds.permute(0, 2, 3, 1).contiguous().view(-1, 2),
            bin_labels_ce.view(-1))

        disc_loss = criterion_disc(ins_preds, ins_labels, n_lanes)
        loss = ce_loss + disc_loss 

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        batch_time.update(time.time() - end)

        pbar.set_description(
            '>>> Training loss={:.6f}, i/o time={data_time.avg:.3f}s, gpu time={batch_time.avg:.3f}s'.format(
                loss.item(),
                data_time=data_time,
                batch_time=batch_time))
        end = time.time()

In [10]:
root = "/home/weixin/Documents/GitProjects/SpatialEmbeddings/data/tmp/leftImg8bit/train/"
mode = "simu"
dataset = PlotDigitizerDataset(root=root, mode=mode, transforms=custom_transform)

In [12]:
img, seg, ins, num_ins = dataset[0]
img.shape, seg.shape, ins.shape, num_ins

(torch.Size([3, 512, 512]),
 torch.Size([2, 512, 512]),
 torch.Size([20, 512, 512]),
 tensor(15))

In [None]:
data_loader = data.DataLoader(dataset,
                              batch_size=opt.batch_size,
                              num_workers=opt.num_workers,
                              shuffle=shuffle,
                              pin_memory=True)

In [6]:
class dict2class():
    def __init__(self, opt):
        for key in opt.keys():
            setattr(self, key, opt[key])

In [7]:
opt = dict2class(opt)
opt.seed

123

In [3]:
logger = logging.getLogger(__name__)

In [8]:
# Set the random seed manually for reproducibility.
if torch.cuda.is_available():
    torch.cuda.manual_seed(opt.seed)
else:
    torch.manual_seed(opt.seed)

In [4]:
logger



In [11]:

a = np.linspace(4,10,7)
a

array([ 4.,  5.,  6.,  7.,  8.,  9., 10.])

In [13]:
relabel_sequential(a.astype(np.int32))

(array([1, 2, 3, 4, 5, 6, 7], dtype=int32),
 ArrayMap(array([ 4,  5,  6,  7,  8,  9, 10], dtype=int32), array([1, 2, 3, 4, 5, 6, 7], dtype=int32)),
 ArrayMap(array([1, 2, 3, 4, 5, 6, 7], dtype=int32), array([ 4,  5,  6,  7,  8,  9, 10], dtype=int32)))

In [14]:
a = np.random.randint(2, size=10)
a

array([0, 0, 0, 0, 0, 0, 1, 1, 1, 0])

In [18]:
np.stack([a,a]).shape

(2, 10)

In [17]:
~(a==1), a==1

(array([ True,  True,  True,  True,  True,  True, False, False, False,
         True]),
 array([False, False, False, False, False, False,  True,  True,  True,
        False]))

In [5]:

from src.dataloader import get_data_loader

ModuleNotFoundError: No module named 'utils'