In [1]:
import os
from pelops.datasets.chipper import FrameProducer, ExtractedChip, Methods, Chipper
import cv2
from hdfs3 import HDFileSystem
import glob

In [2]:
# HDFS IP address or node name
hdfs_location = '0.0.0.0'

# Set Texas ('TX') data source, path to videos and camera name
data_source = 'TX'
video_location = '/datasets/texas_dot/Ending20160903/user/*.mp4'
camera_name = 'IH10_Martin'
#camera_name = 'IH37_Jones' 
#camera_name = 'IH37_9th'

# Set Louisiana ('LA') data source, path to videos and camera name
#data_source = 'LA'
#video_location = '/datasets/louisiana_dot/72/*.mp4'
#camera_name = 'Claiborne1'

# Set kernel, threshold and chipping method parameters
kernel_size=(11,11)
threshold=6
chipping_method=Methods.OPENCV

# Toggle applying a input mask; Set mask parameters in apply_mask function in next cell 
# ex: input_mask_top=60 will set first 60 elements of array to 0
apply_input_mask = False

# Toggle expanding the boundary box dimensions; Set box parameters in expand_box function in next cell 
apply_box_expander = False

# Set output directory for created chips, ex: '/datasets/chips/'
chip_output_location = '/tmp/'

In [3]:
# Connect to HDFS and obtain filenames
hdfs = HDFileSystem(host=hdfs_location, port=8020)
filenames = hdfs.glob(video_location)

# Format filenames to use as key in sorting
def get_info(filename):
    bname = os.path.basename(filename)
    if data_source=='TX':
        # File naming convention for Texas uses dashes between timestamps
        return bname.split('-')[0], int(bname.split('-')[1])
    elif data_source=='LA':
        # File naming convention for Louisiana uses underscores inside timestamp elements
        return bname.split('_')[0], int(bname.split('_')[1]+bname.split('_')[2]+bname.split('_')[3]+bname.split('_')[4]+bname.split('_')[5].split('.')[0])

# Sort filenames and filter based on camera name
filenames = sorted(filenames, key=get_info)
filenames_filtered = [filename for filename in filenames if camera_name in filename]

# Creates FrameProducer generator object from videos in filenames_filtered
# Note - Chip generation on multiple videos can take a long time
fp = FrameProducer(filenames_filtered, hdfs.open)

# Sets numpy array elements on image input_arry to 0 at specific indices based on input data
# ex: input_mask_top=60 will set first 60 elements of image array to 0
def apply_mask(input_arry,input_mask_top=60,input_mask_bottom=250):
    input_arry[:input_mask_top] = 0
    input_arry[input_mask_bottom:] = 0
    return input_arry

# Increase boundary box size; Used with box_expander parameter of Chipper class
def expand_box(x,y,w,h, amount=5):
    return x-amount, y-amount, w+amount*2, h+amount*2

In [4]:
# Function 
# get_frame_lookup - Takes frame and outputs frame_lookup and chips
# Input
# fp - FrameProducer object 
# kernel_size - Size of convolutional kernel, default=(25,25) and must be odd numbers
# threshold - Threshold value used in OpenCV's threshold function, default=30
# chipping_method - Type of chipping, default = Methods.BACKGROUND_SUB
# Output
# frame_lookup - Dictionary of chip reference values keyed on frame_numbers 
# chips - List of Chipper objects

def get_frame_lookup(fp, 
                     kernel_size=(25,25), 
                     threshold=30, 
                     chipping_method=Methods.BACKGROUND_SUB,
                     mask_modifier=None,
                     box_expander=None
                    ):
    chipper = Chipper(fp, 
                  mask_modifier=mask_modifier,
                  kernel_size = kernel_size,
                  threshold = threshold,
                  box_expander=box_expander,
                  chipping_method=chipping_method)

    # Get chips by frame number
    chips = []
    for frame_chips in chipper:
        chips.extend(frame_chips)

    from collections import defaultdict
    frame_lookup = defaultdict(list)
    for ec in chips:
        frame_lookup[ec.frame_number].append(ec)
    return frame_lookup, chips

In [5]:
# Create chips and frame_lookup
if apply_input_mask:
    mask_modifier = apply_mask
else:
    mask_modifier = None
if apply_box_expander:
    box_expander = expand_box
else:
    box_expander = None

frame_lookup, chips = get_frame_lookup(fp, 
                            kernel_size=kernel_size,
                            threshold=threshold, 
                            chipping_method=chipping_method,
                            mask_modifier=mask_modifier,
                            box_expander=box_expander)

In [6]:
# Write created chips to HDFS
# Output format is '[original file basename]-chip-[frame number]-[iterator].jpg'
# TODO - Find a way to have cv2 write directly to hdfs instead of using a local temp file
tmp_filename='temp.jpg'
for frame in frame_lookup:
    i=0
    for chip in frame_lookup[frame]:
        filename=os.path.basename(chip.filename).split('.')[0]+"-chip-"+str(chip.frame_number)+'-'+str(i)+'.jpg'
        cv2.imwrite(tmp_filename,chip.img_data)
        hdfs.put(tmp_filename,os.path.join(chip_output_location,filename))
        i+=1

# Delete temp file
if os.path.isfile(tmp_filename): os.remove(tmp_filename)
print("Video Location:",video_location)
print("Camera Name:",camera_name)
print("Chip images written to HDFS:",len(chips))

Chip images written to HDFS: 12954
