### Import Library 

In [5]:
from __future__ import print_function, division
import os
import sys


import numpy as np
import cv2

import math


import time
import importlib
import argparse

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

from network.net import net_option
from utils.logger import MyLog
from utils.core_utils import count_parameters



In [8]:
######################################
# Global Parameter
######################################
softmax = torch.nn.Softmax(dim = 1)
SHOW_SEG_RESULT = True
confidence_threshold = 0.7
DEBUG_MIN_OBJ_WIDTH = True
wmin = 100

pick_interval = 8
min_distance_threshold = 50
min_angle_threshold = 60

LINE_WIDTH = 2

color_polate_4cls = {1: "#00FF00",
                     2: "#0000FF",
                     3: "#FFFF00",
                     4: "#00FFFF",
                     5: "#FF0000"}

color_polate_4cls_QT = {3: "FF0",
                        4: "0FF",
                        5: "F00"}


cmap_4cls = {1: (  0,255,  0),
             2: (  0,  0,255),
             3: (255,255,  0),
             4: (  0,255,255),
             5: (255,  0,  0)}
alpha = 0.5


INPUT_SHAPE = (256,512 ,3) #row col




######################################
# Global Parameter
######################################

In [18]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

In [16]:
def get_arguments():
    parser = argparse.ArgumentParser()
    parser.add_argument("-m", dest = "model", type = str, help = "Network archtecture.")
    parser.add_argument("-i", dest = "input_size", type = int, nargs = "+", default = [256,512], help = "Network input size.")
    parser.add_argument("-c", dest = "checkpoint", type = str, help = "Checkpoint file.")
    parser.add_argument("-iv", dest = "input_video", type = str, required = True, help = "Input video for demo.")
    parser.add_argument("-ot", dest = "od_threshold",  type = float, default = 0.3, help = "Detection Confidence threshold.")
    parser.add_argument("-ov", dest = "output_video", action = "store_true", help = "Input video for demo.")
    parser.add_argument("-camera", dest = "camera", action = "store_true", help = "Input video for demo.")
    return parser.parse_args()
# args = get_arguments()

model_type = 'Jacinto_256x512_v3'

In [6]:
######################################
# Global Parameter
######################################
cap = cv2.VideoCapture(0)

In [26]:
model_check_point = 'weights/sur_object_detection/Jacinto_ssd_256x512_256x512_detection_v2_sur4_bs_16_lr_1e-05_fixbackbone_False_freeze_bn_False_sampler_False_normalize_coor_False_Jacinto_SSD_neg_pos6_499.pt'

net = net_option(model_type, mode = "end2end")
net = net.to(device)

# resume from checkpoint
assert os.path.exists(model_check_point), "Checkpoint {} does not exist.".format(model_check_point)
state = torch.load(model_check_point)
net.load_state_dict(state["model_state"])



('num_classes_OD', 3)
('num_classes_seg', 8)
('use_focal_loss', False)
('self.pyramid_levels', [3, 4, 4, 5, 5, 6, 6, 7])
('self.min_sizes', [20.48, 25.6, 25.6, 51.2, 51.2, 76.3, 76.3, 128.0])
('self.max_sizes', [51.2, 51.2, 51.2, 76.8, 76.8, 128.0, 128.0, 176.17])
('self.offsets', [0.5, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5])
('self.normalize_anchor', False)


In [27]:
######################################
# Main Process
######################################




skip_frame = 1
count = 0
net.target_available = False
net.eval()


with torch.no_grad():
    while(cap.isOpened()):
        ret,frame = cap.read()
        assert frame.shape[0]>0
        
        # skip until spicific frame
        if(count%skip_frame!=0):
            count += 1
            continue
        else:
            count +=1
        
        start = time.time()
        RGB = np.zeros(INPUT_SHAPE,dtype=np.uint8)
        print("-------------------")
        tic = time.time()
        
        #resize image
        img = cv2.resize(frame, (INPUT_SHAPE[1], INPUT_SHAPE[0]), 0, 0, interpolation = cv2.INTER_LINEAR)
        #BRG to RGB
        img = img[:,:,[2,1,0]]
        # transfrom and expand dim
        input = img.transpose((2, 0, 1))
        input = np.expand_dims(input, axis = 0)
        # push to tensor
        input = torch.from_numpy(input).float()
        input = input.to(device)
        toc = time.time()
        print("Preprocess time: {:.3f}".format(toc - tic))
        # net forward
        tic =time.time()
        scores, classification, transformed_anchors, pred_seg_t = net(input)
        pred_seg = softmax(pred_seg_t)
        toc =time.time()
        print("Net forward time: {}".format(toc - tic))
        tic =time.time()
        
        pred_seg = pred_seg.squeeze(dim = 0)
        pred_seg = pred_seg.data.cpu().numpy()
        pred_seg[pred_seg < confidence_threshold] = 0
        img_draw = img.astype(np.uint8)
        prob_map = np.max(pred_seg[3:, :, :], axis = 0)
        pred_seg_max = np.argmax(pred_seg, axis = 0)
        toc =time.time()
        print("Pre-Post-process time: {}".format(toc - tic))
        
        
        tic = time.time()
        overlay_flag = np.zeros((INPUT_SHAPE[0], INPUT_SHAPE[1]))
        # draw
        for key, color in cmap_4cls.items():
        # for key, color in dataset.color_map.items():
            if key == 0:
                continue
            if key >= 3:
                continue
            else:
                RGB[pred_seg_max_up == key] = np.array(color)
                overlay_flag[pred_seg_max_up == key] = 1
        # overlay
        overlay = cv2.addWeighted(img_draw, alpha, RGB, (1-alpha), 0)
        # overlay2 = cv2.addWeighted(img_draw2, alpha, RGB2, (1-alpha), 0)
        img_draw[overlay_flag == 1] = overlay[overlay_flag == 1]
        img_draw_down = img_draw.copy()
        
        toc =time.time()
        print("Segmentation draw time: {}".format(toc - tic))



-------------------
Preprocess time: 0.001


ValueError: need more than 3 values to unpack

In [21]:
net(input)

NMS took 0.0349678993225


[tensor([0.2526, 0.2567, 0.2548,  ..., 0.2568, 0.2568, 0.2554], device='cuda:0',
        grad_fn=<MaxBackward0>),
 tensor([0, 1, 1,  ..., 1, 1, 0], device='cuda:0'),
 tensor([[  0.0000,   0.0000,  21.7824,   9.8759],
         [  0.0000,   0.0000,   9.9588,  21.6731],
         [  6.0516,   0.0000,  17.9619,  21.6628],
         ...,
         [101.4241, 146.9913, 282.1036, 237.2735],
         [229.4254, 146.9911, 410.1025, 237.2723],
         [384.1982, 128.0954, 512.0000, 255.6854]], device='cuda:0',
        grad_fn=<IndexBackward>)]