In [96]:
import os
from pelops.datasets.chipper import FrameProducer, ExtractedChip, Methods, Chipper
import cv2
from hdfs3 import HDFileSystem
import xml.etree.ElementTree as ET
from collections import namedtuple
import glob
from PIL import Image
import numpy as np
import itertools

%matplotlib inline
import matplotlib.pyplot as plt

In [97]:
# HDFS IP address or node name
hdfs_location = '0.0.0.0'

# Set Texas ('TX') data source, path to videos and camera name
data_source = 'TX'
video_location = '/datasets/texas_dot/Ending20160903/user/*.mp4'
camera_name = 'IH37_Jones' 
#camera_name = 'IH10_Martin'
#camera_name = 'IH37_9th'

# Set Louisiana ('LA') data source, path to videos and camera name
#data_source = 'LA'
#video_location = '/datasets/louisiana_dot/72/*.mp4'
#camera_name = 'Claiborne1'

# Set xml path and labeled path 
xml_basepath = '/data/fs4/teams/pelops/labelme/annotations/'
image_path = '/data/fs4/teams/pelops/labelme/images/{}'

# Set kernel, threshold and chipping method parameter space
candidate_kernel_sizes = [(5,5),(9,9),(11,11),(15,15),(21,21),(29,29),(33,33)]
candidate_thresholds = [2,4,6,8,10,12,15,17,20,27,33,40,50]
candidate_chipping_methods = [Methods.BACKGROUND_SUB, Methods.OPENCV]

# Set maximum number of top chip scoring results to display
topn = 10

# Toggle creation of chipping accuracy visualizations
retain_visualizations = False

# Toggle applying an input mask; Set mask input parameters for apply_mask function in next cell 
apply_input_mask = False

# Toggle expanding the boundary box dimensions; Set box parameter 'amount' for expand_box function in next cell 
apply_box_expander = False

In [98]:
# Connect to HDFS and obtain video filenames
hdfs = HDFileSystem(host=hdfs_location, port=8020)
filenames = hdfs.glob(video_location)

# Format filenames to use as key in sorting
def get_info(filename):
    bname = os.path.basename(filename)
    if data_source=='TX':
        # File naming convention for Texas uses dashes between timestamps
        return bname.split('-')[0], int(bname.split('-')[1])
    elif data_source=='LA':
        # File naming convention for Louisiana uses underscores inside timestamp elements
        return bname.split('_')[0], int(bname.split('_')[1]+bname.split('_')[2]+bname.split('_')[3]+bname.split('_')[4]+bname.split('_')[5].split('.')[0])

# Sort filenames and filter based on camera name, order by timestamp
filenames = sorted(filenames, key=get_info)
filenames_filtered = [filename for filename in filenames if camera_name in filename]

# Creates FrameProducer generator object from first video (by timestamp) in filenames_filtered 
# There needs to be truth data for this video in xml_basepath
fp = FrameProducer([filenames_filtered[0]], hdfs.open)

# Sets numpy array elements on image input_arry to 0 at specific indices based on input data
# ex: input_mask_top=60 will set first 60 rows of image array to 0
def apply_mask(input_arry,input_mask_top=80,input_mask_bottom=300):
    input_arry[:input_mask_top]=0
    input_arry[input_mask_bottom:]=0
    return input_arry

# Increase boundary box size; Used with box_expander parameter of Chipper class
def expand_box(x,y,w,h, amount=5):
    return x-amount, y-amount, w+amount*2, h+amount*2

In [99]:
# Function 
# get_frame_lookup - Takes frame and outputs frame_lookup and chips
# Input
# fp - FrameProducer object 
# kernel_size - Size of convolutional kernel, default=(25,25) and must be odd numbers
# threshold - Threshold value used in OpenCV's threshold function, default=30
# chipping_method - Type of chipping, default = Methods.BACKGROUND_SUB
# Output
# frame_lookup - Dictionary of chip reference values keyed on frame_numbers 
# chips - List of Chipper objects

def get_frame_lookup(fp, 
                     kernel_size=(25,25), 
                     threshold=30, 
                     chipping_method=Methods.BACKGROUND_SUB,
                     mask_modifier=None,
                     box_expander=None
                    ):
    chipper = Chipper(fp, 
                  mask_modifier=mask_modifier,
                  kernel_size = kernel_size,
                  threshold = threshold,
                  box_expander=box_expander,
                  chipping_method=chipping_method)

    # Get chips by frame number
    chips = []
    for frame_chips in chipper:
        chips.extend(frame_chips)

    from collections import defaultdict
    frame_lookup = defaultdict(list)
    for ec in chips:
        frame_lookup[ec.frame_number].append(ec)
    return frame_lookup, chips

In [100]:
# Get XML file name based on orignal image file name and frame
def get_xml_name(filename, frame_number):   
    return '{}_{}_.xml'.format(os.path.basename(filename),frame_number)

# Define namedtuple for bounding box attributes
BBox =namedtuple('BBox', ['x','y', 'w', 'h'])

# Take list of polygon dimensions and return bounding box dimensions of X,Y,width,height 
def get_x_y_w_h(bbox):
    xs = [x for x,y in bbox]
    ys = [y for x,y in bbox]
    min_x = min(xs)
    min_y = min(ys)
    max_x = max(xs)
    max_y = max(ys)
    return BBox(x=min_x, 
                y=min_y, 
                w=max_x-min_x, 
                h=max_y-min_y)

# Obtain bounding boxes from XML objects, return frame_filename and bounding boxes
def get_bboxes_from_xml(xml_obj):
    for top_level_tag in xml_obj.iter('filename'):
        frame_filename = top_level_tag.text
    bboxes = []
    for top_level_tag in xml_obj.iter('object'):
        for polygon in top_level_tag.iter('polygon'):
            pts = []
            for pt in polygon.iter('pt'):
                x = None
                y = None
                for dim in pt.iter():
                    if dim.tag == 'x':
                        x = int(dim.text)
                    elif dim.tag == 'y':
                        y = int(dim.text)
                if x and y:
                    pts.append((x,y))
            bboxes.append(get_x_y_w_h(pts))
    return frame_filename, bboxes

# Obtain bounding boxes from filename
def get_bboxes_from_file(filename):
    tree = ET.parse(filename)
    root = tree.getroot()
    return get_bboxes_from_xml(root)

In [101]:
# Build xml_files_lookup dictionary from xml_basepath
xml_files_lookup = {}
for filename in glob.glob(os.path.join(xml_basepath, '*')):
    xml_files_lookup[os.path.basename(filename)] = filename

In [102]:
# Find intersection union score for two bounding boxes and return X,Y,width,height
def get_intersection_union(bbox1, bbox2):
    # Top left of intersection
    xA = max(bbox1.x, bbox2.x)
    yA = max(bbox1.y, bbox2.y)
    
    # Bottom right
    xB = min(bbox1.x+bbox1.w, bbox2.x+bbox2.w)
    yB = min(bbox1.y+bbox1.h, bbox2.y+bbox2.h)
    width = xB-xA
    height = yB-yA
    if width < 0 or height < 0:
        return None
    return (xA, yA, width, height)

# Calculate intersection over union (IoU) score for two bounding boxes
def get_iou(bbox1, bbox2):
    # Top left of intersection
    xA = max(bbox1.x, bbox2.x)
    yA = max(bbox1.y, bbox2.y)
    
    # Bottom right
    xB = min(bbox1.x+bbox1.w, bbox2.x+bbox2.w)
    yB = min(bbox1.y+bbox1.h, bbox2.y+bbox2.h)
    width = xB-xA
    height = yB-yA
    if width < 0 or height < 0:
        return 0
    else:
        intersectionArea = (xB - xA + 1) * (yB - yA + 1)
        unionArea = bbox1.w*bbox1.h + bbox2.w*bbox2.h
        
        return intersectionArea/float(unionArea - intersectionArea)

    return (xA, yA, width, height)   

In [103]:
# Visually plot performance of predicted bounding box and ground truth bounding box for filename
def visualize_perf(filename, bboxes1, bboxes2):
    frame = Image.open(image_path.format(filename))
    frame = np.uint8(frame)
    for box in bboxes1:
        x, y, w, h = box.x, box.y, box.w, box.h
        cv2.rectangle(frame,(x,y),(x+w,y+h),(0,255,0),1)
    for box in bboxes2:
        x, y, w, h = box.x, box.y, box.w, box.h
        cv2.rectangle(frame,(x,y),(x+w,y+h),(255,0,0),1)    
    intersection = get_intersection_union(bboxes1[0], bboxes2[0])
    if intersection:
        x,y,w,h = intersection
        cv2.rectangle(frame,(x,y),(x+w,y+h),(0,0,255),5)
    return frame

In [104]:
# Score performance of chipping against ground truth for list of frames and associated chips
def score_frame_lookup(frame_lookup, chips):
    video_ious = []
    plots = []
    for frame_number in sorted(frame_lookup):
        chips = frame_lookup[frame_number]
        bname = os.path.basename(chips[0].filename)
        xml_filename = get_xml_name(bname, frame_number)
        if xml_filename in xml_files_lookup:
            xml_full_filename = xml_files_lookup[xml_filename]
            frame_image, truth_bboxes = get_bboxes_from_file(xml_full_filename)
            found_bboxes = []
            for chip in chips:
                found_bboxes.append((chip.x, chip.y, chip.w, chip.h))
            frame_ious = []
            for truth_bbox in truth_bboxes:
                box_ious = []
                for chip in chips:
                    box_ious.append(get_iou(chip, truth_bbox))
                frame_ious.append(max(box_ious))
            video_ious.append(np.mean(frame_ious))
            if retain_visualizations:
                frame = visualize_perf(frame_image, truth_bboxes, chips)
                plots.append(frame)
    return np.mean(video_ious), plots

In [105]:
# Calculate chipping scores
score_strings = []
visualizations = []

if apply_input_mask:
    mask_modifier = apply_mask
else:
    mask_modifier = None
if apply_box_expander:
    box_expander = expand_box
else:
    box_expander = None

for kernel_size in candidate_kernel_sizes:
    print("Processing kernel",kernel_size)
    for threshold in candidate_thresholds:
        for chipping_method in candidate_chipping_methods:
            frame_lookup, chips = get_frame_lookup(fp, 
                            kernel_size=kernel_size,
                            threshold=threshold, 
                            chipping_method=chipping_method,
                            mask_modifier=mask_modifier,
                            box_expander=box_expander)
            score, plots = score_frame_lookup(frame_lookup, chips)
            visualizations.extend(plots)
            score_strings.append([kernel_size, threshold, chipping_method, score])

Processing kernel (5, 5)
Processing kernel (9, 9)
Processing kernel (11, 11)
Processing kernel (15, 15)
Processing kernel (21, 21)
Processing kernel (29, 29)
Processing kernel (33, 33)


In [106]:
# Print out top n results from chipping scores
print("Video Location:",video_location)
print("Camera Name:",camera_name)
print("Input Mask:",apply_input_mask)
print("Box Expander:",apply_box_expander)
print("[Kernel, Threshold, Chipping Method, Score]")
for result in sorted(score_strings, key=lambda x: x[3], reverse=True)[:topn]:
   print('{}, {}, {}: {}'.format(result[0],result[1],result[2],result[3]))

Video Location: /datasets/texas_dot/Ending20160903/user/*.mp4
Camera Name: IH10_Martin
Input Mask: False
Box Expander: False
[Kernel, Threshold, Chipping Method, Score]
(9, 9), 4, Methods.OPENCV: 0.49292362082241353
(9, 9), 6, Methods.OPENCV: 0.49082251696370893
(11, 11), 6, Methods.OPENCV: 0.4906139308344932
(11, 11), 4, Methods.OPENCV: 0.48512228085112696
(11, 11), 8, Methods.OPENCV: 0.48397767725140955
(5, 5), 2, Methods.OPENCV: 0.48310739787825785
(9, 9), 2, Methods.OPENCV: 0.4811673714624358
(15, 15), 6, Methods.OPENCV: 0.4800441441760739
(15, 15), 8, Methods.OPENCV: 0.47229367775426506
(11, 11), 2, Methods.OPENCV: 0.4686243078555041


In [107]:
# Execute cell repeatedly to cycle through frame plots
if retain_visualizations:
    try:
        index+=1
        plt.imshow(visualizations[index])
    except:
        index=0
        if len(visualizations): plt.imshow(visualizations[index])
else:
    print("There are no plots because 'retain_visualizations' is", retain_visualizations)

There are no plots because 'retain_visualizations' is False


In [108]:
# Full experiment results
print("Video Location:",video_location)
print("Camera Name:",camera_name)
print("Input Mask:",apply_input_mask)
print("Box Expander:",apply_box_expander)
print("[Kernel, Threshold, Chipping Method, Score]")
for line in score_strings: 
    print('{}, {}, {}: {}'.format(line[0],line[1],line[2],line[3]))

Video Location: /datasets/texas_dot/Ending20160903/user/*.mp4
Camera Name: IH10_Martin
Input Mask: False
Box Expander: False
[Kernel, Threshold, Chipping Method, Score]
(5, 5), 2, Methods.BACKGROUND_SUB: 0.022396602204211465
(5, 5), 2, Methods.OPENCV: 0.48310739787825785
(5, 5), 4, Methods.BACKGROUND_SUB: 0.23852662814505324
(5, 5), 4, Methods.OPENCV: 0.4312071395481312
(5, 5), 6, Methods.BACKGROUND_SUB: 0.36563493078316023
(5, 5), 6, Methods.OPENCV: 0.41660533034859
(5, 5), 8, Methods.BACKGROUND_SUB: 0.38552925156310425
(5, 5), 8, Methods.OPENCV: 0.38974509471494284
(5, 5), 10, Methods.BACKGROUND_SUB: 0.37016902398817403
(5, 5), 10, Methods.OPENCV: 0.37725362124123063
(5, 5), 12, Methods.BACKGROUND_SUB: 0.3563109153717948
(5, 5), 12, Methods.OPENCV: 0.3552908418972148
(5, 5), 15, Methods.BACKGROUND_SUB: 0.3371846098110606
(5, 5), 15, Methods.OPENCV: 0.3177241766756718
(5, 5), 17, Methods.BACKGROUND_SUB: 0.3220181682957436
(5, 5), 17, Methods.OPENCV: 0.2988214291093638
(5, 5), 20, Meth