In [1]:
import numpy as np
import pandas as pd
import matplotlib as mat
import matplotlib.pyplot as plt
import caffe
import cv2
import json
import math
#MODEL = 'ILSVRC' # ImageNet, don't use ImageNet, it wasn't trained on all categories
MODEL = 'coco' # MS-Coco
IMAGE_SIZE = 300 # 300x300 trained on coco or ILSVRC 
# I wonder if we can take the coco model and further train it on
# http://image-net.org/synset?wnid=n02773838
#IMAGE_SIZE = 512 # for 512x512 trained on coco
THRESHOLD = 0.20 # for detection - percentage that the model is sure it's what you're looking for
# There are 21 categories.... pick one color for each
# just a tool for label finding
any_in = lambda a, b: bool(set(a).intersection(b)) #for checking if a list contains elements of another
COLORS = plt.cm.hsv(np.linspace(0, 1, 255)).tolist() #for picking colors of the boxes
caffe.set_device(0)
caffe.set_mode_gpu()
#caffe.set_mode_cpu()

%matplotlib inline

In [2]:
from google.protobuf import text_format
from caffe.proto import caffe_pb2

# load COCO labels
if MODEL == 'coco':
    labelmap_file = 'data/coco/labelmap_coco.prototxt'
else:
    labelmap_file = 'data/ILSVRC2016/labelmap_ilsvrc_det.prototxt'
file = open(labelmap_file, 'r')
labelmap = caffe_pb2.LabelMap()
text_format.Merge(str(file.read()), labelmap)

def get_labelname(labelmap, labels):
    num_labels = len(labelmap.item)
    labelnames = []
    if type(labels) is not list:
        labels = [labels]
    for label in labels:
        found = False
        for i in xrange(0, num_labels):
            if label == labelmap.item[i].label:
                found = True
                labelnames.append(labelmap.item[i].display_name)
                break
        assert found == True
    return labelnames

In [3]:
def loadmodel():
    if IMAGE_SIZE == 300 and MODEL == 'coco':
        model = 'deploy300.prototxt'
        weights = 'VGG_coco_SSD_300x300_iter_400000.caffemodel'
    elif IMAGE_SIZE == 512 and MODEL == 'coco':
        model = 'deploy512.prototxt'
        weights = 'VGG_coco_SSD_512x512_iter_360000.caffemodel'
    else:
        model = 'deploy300a.prototxt'
        weights = 'VGG_coco_SSD_300x300_iter'
        # model = 'deployILSVRC.prototxt'
        # weights = 'VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel'
    return caffe.Net(model, weights, caffe.TEST) #how you load a model with weights in Caffe

In [4]:
def preprocess(frame):
    # Frame must be IMG_SIZExIMG_SIZEx3
    frame = cv2.resize(frame, (IMAGE_SIZE, IMAGE_SIZE), interpolation=cv2.INTER_LANCZOS4)
    # Frame must then be BRG
    if len(frame.shape) == 3:
        frame = frame.transpose((2,0,1))
    return frame

In [5]:
def detect(image, net):
    # (Batch size, channels, Image size, Image size) 
    # I wonder if we can increase the batch size and 
    # put a list of images together, but I guess that's more for training
    net.blobs['data'].reshape(1,3,IMAGE_SIZE, IMAGE_SIZE)
    # Transform the image to 1x3xSxS
    net.blobs['data'].data[0,...] = image
    # See ssd_detect.ipynb from Wei Liu, author of SSD
    # https://github.com/weiliu89/caffe/blob/ssd/examples/ssd/ssd_detect.py
    detections = net.forward()['detection_out']
    # Parse the output tensors
    det_label = detections[0,0,:,1]
    det_conf = detections[0,0,:,2] #confidence
    det_xmin = detections[0,0,:,3] #for bounding boxes per frame
    det_ymin = detections[0,0,:,4]
    det_xmax = detections[0,0,:,5]
    det_ymax = detections[0,0,:,6]

    # Keep only indices of detections with confidence higher than THRESHOLD
    # in ssd_detect they keep it at 0.6, but that would be a confidence 
    # from the smaller set of PASCAL VOC cetegories. Coco has many more categories
    # So a lower confidence still means a decent probability over the other categories
    top_indices = [i for i, conf in enumerate(det_conf) if conf >= THRESHOLD]

    top_conf = det_conf[top_indices]
    top_label_indices = det_label[top_indices].tolist()
    top_labels = get_labelname(labelmap, top_label_indices)
    top_xmin = det_xmin[top_indices]
    top_ymin = det_ymin[top_indices]
    top_xmax = det_xmax[top_indices]
    top_ymax = det_ymax[top_indices]
    return (top_xmin, top_ymin, top_xmax, top_ymax, top_conf, top_labels, top_label_indices)

In [21]:
def calcDist(coords1, coords2):
    return np.linalg.norm(coords1-coords2)

class ItemEntry:
    def __init__(self, midx, midy, width, height, frame, label):
        self.midCoords = np.array([midx, midy])
        self.width = width
        self.height = height
        self.frame = frame
        self.label = label

class Item:
    entries = []
    ownerEntries = []
    label = ""
    notMoving = False
    abandoned = False
    ownerIdentified = False
    offScreen = False
    ownerDist = 999999
    
    def __init__(self, label):
        self.label = label
    def addEntry(self, entry):
        self.entries.append(entry)
    def addOwnerEntry(self, entry):
        self.ownerEntries.append(entry)
        self.ownerDist = calcDist(self.entries[-1].midCoords, entry.midCoords)
    
def loadvideo(filename, net):
    cap = cv2.VideoCapture(filename)
    k = 0 #counter for output file names of images that contain a detected bag
    personItems = [] #Not using right now
    bagItems = [] #Total catalog of bag items in the video
    frameCount = 0
    
    while cap.isOpened():
        frameCount += 1
        ret, frame = cap.read()
        if np.any(frame != 0):
            try:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 
            except:
                return
            frame_processed = preprocess(frame)
            processed_det = detect(frame_processed, net)
            top_xmin, top_ymin, top_xmax, top_ymax, top_conf, top_labels, top_label_indices = processed_det
            print_image = True
            if any_in(top_labels, ['suitcase', 'handbag', 'backpack', 'luggage', 'purse', 'baggage']) or print_image:
                print_image = True
                plt.rcParams['figure.figsize'] = (50, 50) #Matplotlib stuff that doesn't work?
                plt.imshow(frame)
                k += 1
                currentAxis = plt.gca()
                
            framePeople = [] #People found this frame
            frameBags = [] #Bags found this frame
            for i in range(top_conf.shape[0]):
                #print(top_conf)
                xmin = int(round(top_xmin[i] * frame.shape[1]))
                ymin = int(round(top_ymin[i] * frame.shape[0]))
                xmax = int(round(top_xmax[i] * frame.shape[1]))
                ymax = int(round(top_ymax[i] * frame.shape[0]))
                score = top_conf[i]
                label = int(top_label_indices[i])
                label_name = top_labels[i]
                display_txt = '%s: %.2f'%(label_name, score)
                width = xmax-xmin+1
                height = ymax-ymin+1
                midx = xmin+(width)/2
                midy = ymin+(height)/2
                coords = (xmin, ymin), width, height
                color = COLORS[label]
                
                
                confThresh = 0.1 #Confidence level to accept an object
                if score > confThresh and any_in([label_name], ['person', 'suitcase', 'handbag', 'backpack', 'luggage', 'purse', 'baggage']):
                    if label_name == 'person':
                        entry = ItemEntry(midx, midy, width, height, frameCount, 'person')
                        framePeople.append(entry)
                    else:
                        entry = ItemEntry(midx, midy, width, height, frameCount, 'bag')
                        frameBags.append(entry)
                #print('blah')
                #if any_in([label_name], ['suitcase', 'handbag', 'backpack', 'luggage', 'purse', 'baggage']):
                    #print(str(display_txt) + ' - ' + str(coords))
                    #centerCoords = (xmin+(xmax-xmin+1)/2, ymin+(ymax-ymin+1)/2), 3, 3
                    #currentAxis.add_patch(plt.Rectangle(*centerCoords, fill=False, edgecolor=color, linewidth=2))
                    #print(label_name)
                
                if print_image:
                    currentAxis.add_patch(plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=16))
                    currentAxis.text(xmin, ymin, display_txt, bbox={'facecolor':color, 'alpha':0.5}, fontsize=48)
                
                    centerCoords = (xmin+(xmax-xmin+1)/2, ymin+(ymax-ymin+1)/2), 3, 3
                    currentAxis.add_patch(plt.Rectangle(*centerCoords, fill=False, edgecolor=color, linewidth=16))
            
            #print(str(len(framePeople)) + ' * ' + str(len(frameBags)))
            
            #If no people/bags found this frame, go to next frame
            if len(framePeople) == 0 and len(frameBags) == 0:
                print('None: ' + str(frameCount))
                continue
             
            #Else if list of bags empty, initialize with first bags
            elif len(bagItems) == 0 and len(frameBags) > 0:
                for bagEntry in frameBags:
                    newBag = Item('bag')
                    newBag.addEntry(bagEntry)
                    bagItems.append(newBag)

            #Else if found bags, add frame information to bags that are closest
            #TODO: Unused frameBags should be added to new bags
            elif len(frameBags) > 0:
                closestDist = 99999
                closestIndex = -1
                i = 0 #bagItem counter
                j = 0 #frameBag counter
                
                while i < len(bagItems):
                    while j < len(frameBags):
                        dist = calcDist(bagItems[i].entries[-1].midCoords, frameBags[i].midCoords)
                        if(dist < closestDist):
                            closestDist = dist
                            closestIndex = j
                        j += 1
                    bagItems[i].addEntry(frameBags[closestIndex])
                    j = 0
                    i += 1
                
            #Check for bags that are not moving, mark them as notMoving
            notMovingFrameThresh = 5 #Number of frames needed to mark bags as notMoving
            notMovingDistThresh = 2.0 #Max cartesian distance to identify bags as notMoving
            for bag in bagItems:
                if len(bag.entries) < notMovingFrameThresh+1 or bag.notMoving == True:
                    continue
                i = 0
                #maxDist = -1
                dist = 0
                
                #Calculate distances between frames to judge movement
                while i < notMovingFrameThresh:
                    dist += calcDist(bag.entries[-1-i].midCoords, bag.entries[-2-i].midCoords)
                    i += 1
                
                #Bag is not moving
                if dist <= notMovingDistThresh:
                    bag.notMoving = True
                    print('Not Moving: ' + str(k) + ' - ' + str(len(bag.ownerEntries)))
                    
            #For bags that are notMoving, add the closest owner
            for bag in bagItems:
                if bag.notMoving == True and bag.abandoned == False:
                    
                    #If first time being abandoned, find closest person as the owner
                    if len(bag.ownerEntries) == 0:
                        ind = 0
                        closestDist = 99999
                        closestIndex = -1
                        while ind < len(framePeople):
                            bagDist = calcDist(framePeople[ind].midCoords, bag.entries[-1].midCoords)
                            if(bagDist < closestDist):
                                closestDist = bagDist
                                closestIndex = ind
                            ind += 1

                        #print(str(closestDist))
                        firstOwnerDistThresh = 150.0 #Distance below which people can be chosen as the first owners
                        if closestIndex != -1 and closestDist < firstOwnerDistThresh:
                            bag.addOwnerEntry(framePeople[closestIndex])
                            
                    #Otherwise find closest person to previous owner entry
                    else:
                        ind = 0
                        closestDist = 99999
                        closestIndex = -1
                        while ind < len(framePeople):
                            bagDist = calcDist(framePeople[ind].midCoords, bag.ownerEntries[-1].midCoords)
                            if(bagDist < closestDist):
                                closestDist = bagDist
                                closestIndex = ind
                            ind += 1
                        
                        #If owner is close enough to previous owner, add the owner entry
                        ownerDistThresh = 30.0 #Maximum distance from previous owner entry
                        if closestDist < ownerDistThresh:
                            bag.addOwnerEntry(framePeople[closestIndex])
                        
                        #Draw line connecting bag and owner
                        currentAxis.plot([bag.entries[-1].midCoords[0],bag.ownerEntries[-1].midCoords[0]],[bag.entries[-1].midCoords[1],bag.ownerEntries[-1].midCoords[1]], color='b', linewidth=20)
                        print(str(bag.ownerDist))
                        
                        #Check for bags that are too far from owner, mark them as abandoned
                        abandonedDistThresh = 100.0 #Pixel distance from owner to identify notMoving bags as abandoned
                        if bag.ownerDist > abandonedDistThresh:
                            bag.abandoned = True
                            print('Abandoned! ' + str(k) + ' - ' + str(len(bag.ownerEntries)))
                            
                elif bag.abandoned == True:
                    #Draw line connecting bag and owner
                    currentAxis.plot([bag.entries[-1].midCoords[0],bag.ownerEntries[-1].midCoords[0]],[bag.entries[-1].midCoords[1],bag.ownerEntries[-1].midCoords[1]], color='r', linewidth=3)
                    print('Abandoned! ' + str(k) + ' - ' + str(len(bag.ownerEntries)))

        figure_name = '%s_%05d.jpg' %('image', k)
        if print_image:
            plt.savefig(figure_name)
            plt.clf()
        if cv2.waitKey(1) & 0xFF == ord('q'):
            print 'how did we break?'
            break
    

In [None]:
net = loadmodel()
loadvideo('AVSS_AB_Easy_Clipped.mov', net)
print('Finished!')

In [None]:
# no longer outputs the images here, but they are all in the directory