In [1]:
import os
import glob
import cv2
import numpy as np
import matplotlib.pyplot as plt

In [2]:
video_files = glob.glob('video_footages/*')
video_files

['video_footages\\Raw Video_ Pittsburg Neighborhood Drive-By Shootings.mp4',
 'video_footages\\Road traffic video for object recognition.mp4',
 'video_footages\\Shooting captured by surveillance camera in Parma.mp4',
 'video_footages\\Shopping, People, Commerce, Mall, Many, Crowd, Walking   Free Stock video footage   YouTube.mp4',
 'video_footages\\Traffic Flow In The Highway - 4K Stock Videos _ NoCopyright _ AllVideoFree.mp4',
 'video_footages\\vecteezy_busy-street-in-evening-hong-kong_28840705 (1) (1).mp4',
 'video_footages\\vecteezy_people-crossing-the-road-on-zebra-tallin_28257759.mp4']

In [3]:
image_files = glob.glob('images/*')
image_files

['images\\test.png',
 'images\\woman1.jpg',
 'images\\woman2.jpg',
 'images\\woman3.jpg']

In [4]:
img = cv2.imread(image_files[0])
height, width, layers = img.shape
new_height = height/2
new_width = width/2
img = cv2.resize(img, (int(new_width), int(new_height)))
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
contours, _ = cv2.findContours(gray, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
boxes = []
for cnt in contours:
    [x, y, w, h] = cv2.boundingRect(cnt)
    area = w*h
    img = cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 1)
    boxes.append([x,y,w,h, area])
print(boxes)
cv2.imshow('test', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

[[97, 207, 165, 62, 10230], [98, 208, 163, 60, 9780], [503, 199, 62, 154, 9548], [504, 200, 60, 152, 9120], [703, 91, 82, 160, 13120], [704, 92, 80, 158, 12640], [310, 51, 174, 141, 24534], [311, 53, 172, 138, 23736]]


In [113]:
nms(boxes, 20000)

[[311, 53, 172, 138, 23736], [310, 51, 174, 141, 24534]]

In [96]:
def iou(box1, box2):
    """
    the boxes should be in format of (x, y, w, h, area)
    """
    xx = max(box1[0],box2[0])
    yy = max(box1[1],box2[1])
    xf = min(box1[0]+box1[2],box2[0]+box2[2])
    yf = min(box1[1]+box1[3],box2[1]+box2[3])
    area1 = box1[2]*box1[3]
    area2 = box2[2]*box2[3]

    w = max(0, xf-xx)
    h = max(0, yf-yy)

    intersection = w*h
    union = abs(area1) + abs(area2) - intersection + 1e-6
    result = intersection/union
    return result

In [97]:
iou(boxes[0], boxes[1])

0.9560117301118268

In [22]:
test_array = np.array([[1, 3, 2 ,3 ,0.26],
                       [2, 3, 3, 2, 0.98],
                       [-2, 0, 2, 1, 0.54],
                       [6, 2, 4, 2, 0.92]])

In [108]:
nms(test_array.tolist(), 0.9)

[[2.0, 3.0, 3.0, 2.0, 0.98], [6.0, 2.0, 4.0, 2.0, 0.92]]

In [109]:
def nms(bboxes, threshold):
    assert type(bboxes) == list
    bboxes = [box for box in bboxes if box[-1] > threshold]
    bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True)
    bboxes_after_nms = []
    while bboxes:
        chosen_box = bboxes.pop(0)

        bboxes = [box for box in bboxes if iou(chosen_box[:4],box[:4])< threshold]

        bboxes_after_nms.append(chosen_box)

    return bboxes_after_nms

In [114]:
def get_motion_mask(fg_mask, min_thresh=0, kernel=np.array((9,9), dtype=np.uint8)):
    """ Obtains image mask
        Inputs: 
            fg_mask - foreground mask
            kernel - kernel for Morphological Operations
        Outputs: 
            mask - Thresholded mask for moving pixels
        """
    _, thresh = cv2.threshold(fg_mask,min_thresh,255,cv2.THRESH_BINARY)
    motion_mask = cv2.medianBlur(thresh, 3)
    
    # morphological operations
    motion_mask = cv2.morphologyEx(motion_mask, cv2.MORPH_OPEN, kernel, iterations=1)
    motion_mask = cv2.morphologyEx(motion_mask, cv2.MORPH_CLOSE, kernel, iterations=1)

    return motion_mask

In [132]:
sub_type = 'MOG2' # 'KNN'


if sub_type == 'MOG2':
    backSub = cv2.createBackgroundSubtractorMOG2(varThreshold=16, detectShadows=False)
else:
    backSub = cv2.createBackgroundSubtractorKNN(dist2Threshold=1000, detectShadows=False)


thresh = 700


cap = cv2.VideoCapture(video_files[6])

while(cap.isOpened()):
    
    ret, frame = cap.read()
    if ret == True:
      
        height, width, layers = frame.shape
        new_h = height / 2
        new_w = width / 2
        frame = cv2.resize(frame, (int(new_w), int(new_h)))
        fgMask = backSub.apply(frame)
        motion_mask = get_motion_mask(fgMask, min_thresh=127)

        contours, _ = cv2.findContours(motion_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_L1)

        predictions = []
        for cnt in contours:
            x,y,w,h = cv2.boundingRect(cnt)
            area = w*h
            predictions.append([x,y,w,h,area])
        predictions = nms(predictions, thresh)
        for box in predictions:
            #if box[-1] > thresh:
            frame = cv2.rectangle(frame, (box[0], box[1]), (box[0]+box[2], box[1]+box[3]), (0, 255, 0), thickness = 2)


        cv2.imshow('normal video',frame)
        cv2.imshow('fg_mask',motion_mask)

    
        if cv2.waitKey(25) & 0xFF == ord('q'):
            break

    else:
        break

cap.release()

cv2.destroyAllWindows()