In [1]:
import os
import glob
import cv2
import numpy as np
import matplotlib.pyplot as plt

In [59]:
import torch
from torchvision.models import resnet50, ResNet50_Weights
from torchvision.models import mobilenet_v3_small, MobileNet_V3_Small_Weights

In [66]:
weights = MobileNet_V3_Small_Weights.IMAGENET1K_V1
model = mobilenet_v3_small(weights=weights)

In [2]:
video_files = glob.glob('video_footages/*')
video_files

['video_footages\\4K Road traffic video for object detection and tracking - free download now!.mp4',
 'video_footages\\Central Park People Watching.mp4',
 'video_footages\\Creatures of the Night - trail cam videos.mp4',
 'video_footages\\Dutch bike rush hour this morning was smooth as ever.mp4',
 'video_footages\\Free City Street Footage - Royalty Free Stock Footage - People Walking Stock Footage No Copyright.mp4',
 'video_footages\\morning bike traffic during commute to Dutch university.mp4',
 'video_footages\\People Walking Free Stock Footage, Royalty-Free No Copyright Content.mp4',
 'video_footages\\Raw Video_ Pittsburg Neighborhood Drive-By Shootings.mp4',
 'video_footages\\Road traffic video for object recognition.mp4',
 'video_footages\\Shooting captured by surveillance camera in Parma.mp4',
 'video_footages\\Shopping, People, Commerce, Mall, Many, Crowd, Walking   Free Stock video footage   YouTube.mp4',
 'video_footages\\snow dispute.mp4',
 'video_footages\\Surveillance camera 

In [3]:
def get_motion_mask(fg_mask, min_thresh=0, kernel=np.array((9,9), dtype=np.uint8)):
    """ Obtains image mask
        Inputs: 
            fg_mask - foreground mask
            kernel - kernel for Morphological Operations
        Outputs: 
            mask - Thresholded mask for moving pixels
        """
    _, thresh = cv2.threshold(fg_mask,min_thresh,255,cv2.THRESH_BINARY)
    #mask = cv2.adaptiveThreshold(fg_mask, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY_INV, 11, 3)
    motion_mask = cv2.medianBlur(thresh, 5) #use thresh variable if cv2.threshold
    #motion_mask = cv2.GaussianBlur(thresh, (15, 15))
    # morphological operations
    motion_mask = cv2.morphologyEx(motion_mask, cv2.MORPH_OPEN, kernel, iterations=1)
    motion_mask = cv2.morphologyEx(motion_mask, cv2.MORPH_CLOSE, kernel, iterations=1)

    return motion_mask

In [None]:
sub_type = 'MOG2' # 'KNN'


if sub_type == 'MOG2':
    backSub = cv2.createBackgroundSubtractorMOG2(varThreshold=16, detectShadows=False)
else:
    backSub = cv2.createBackgroundSubtractorKNN(dist2Threshold=1000, detectShadows=False)

thresh = 500

for video in video_files:

    cap = cv2.VideoCapture(video)
    
    while(cap.isOpened()):
        
        ret, frame = cap.read()
        if ret == True:
          
            height, width, layers = frame.shape
            new_h = height / 2
            new_w = width / 2
            frame = cv2.resize(frame, (int(new_w), int(new_h)))
            fgMask = backSub.apply(frame)
            motion_mask = get_motion_mask(fgMask, min_thresh = 30)
    
            contours, _ = cv2.findContours(motion_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_L1)
            
            for cnt in contours:
                x,y,w,h = cv2.boundingRect(cnt)
                area = w*h
                if area > thresh:
                    frame = cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), thickness = 2)
                    cropped_frame = frame[y:y+h, x:x+w]
                    cropped_frame = cv2.resize(cropped_frame, (224, 224))
                    tensor_img = torch.tensor(img, dtype=torch.float)
                    tensor_img = tensor_img.permute(2, 0, 1).unsqueeze(0)
                    prediction = model(tensor_img).squeeze(0).softmax(0)
                    class_id = prediction.argmax().item()
                    category_name = weights.meta["categories"][class_id]
                    
                    frame = cv2.putText(frame, category_name,
                                        (x, y-3), 
                                        cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.5,
                                        (255,
                                        255,
                                        255),
                                        thickness=1)
                    
            cv2.imshow('normal video',frame)
            cv2.imshow('fg_mask',motion_mask)
    
            k = cv2.waitKey(1) & 0xFF
            if k  == ord('q'):
                break
    
        else:
            break

    
    cap.release()
    
    cv2.destroyAllWindows()

In [54]:
img = cv2.imread(image_files[2], cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (224, 224))
tensor_img = torch.tensor(img, dtype=torch.float)
tensor_img = tensor_img.permute(2, 0, 1).unsqueeze(0)
tensor_img.shape
prediction = model(tensor_img).squeeze(0).softmax(0)
class_id = prediction.argmax().item()
score = prediction[class_id].item()
category_name = weights.meta["categories"][class_id]
category_name

torch.Size([1, 3, 224, 224])