In [1]:
from models import *
from utils import *

import os, sys, time, datetime, random
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms as T
from torch.autograd import Variable

import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import cv2

import numpy as np
import pandas as pd
import scipy
from scipy.spatial import distance
import argparse
import imutils
from imutils.video import VideoStream
from imutils.video import FPS
import torchvision
import dlib
import glob
import math
import shutil


In [2]:
FRAMES_DIR = "frames"  # Output dir to hold/cache the original frames
OUTPUT_DIR = "output"  # Output dir to hold the annotated frames
SAMPLING = 25 # Classify every n frames (use tracking in between)
CONFIDENCE = 0.80  # Confidence threshold to filter iffy objects
COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

In [2]:


# Load model and weights
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()


FasterRCNN(
  (transform): GeneralizedRCNNTransform()
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d()
      (relu): ReLU(inplace)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d()
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d()
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d()
          (relu): ReLU(inplace)
          (downsample): Sequential(
            (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): FrozenBatchNorm2d()
          )
        )
  

In [3]:
def get_prediction(img, threshold):
    #img = Image.open(img) # Load the image
    #img = cv2.imread(img, cv2.IMREAD_GRAYSCALE)
    transform = T.Compose([T.ToTensor()]) # Defing PyTorch Transform
    img = transform(img) # Apply the transform to the image
    pred = model([img]) # Pass the image to the model
    #pred_class = [COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(pred[0]['labels'].numpy())] # Get the Prediction Score
    pred_boxes = [[i[0], i[1], i[2], i[3]] for i in list(pred[0]['boxes'].detach().numpy())] # Bounding boxes
    pred_score = list(pred[0]['scores'].detach().numpy())
    pred_labels = list(pred[0]['labels'].detach().numpy())
    
    
    pred_t = [pred_score.index(x) for x in pred_score if x > threshold][-1] # Get list of index with score greater than threshold.
    
    p_boxes = []
    p_score = []
    p_labels = []
    for i in range(pred_t+1):
        if pred_labels[i] == 1:   ## Selects only Person class
            p_boxes.append(pred_boxes[:][i])
            p_score.append(pred_score[i])
            p_labels.append(pred_labels[i])
            
    zipped = zip(p_boxes, p_score, p_labels)
    dets = [[i[0], i[1], i[2], i[3], j, j, k] for i,j, k in zipped]
    
    
    return p_boxes, dets

In [4]:
def get_ref_distances(boxes):
    obj_dtls = {'obj_id': [], 'x1': [], 'y1': [], 'x2': [], 'y2': [],
                'cX': [], 'cY': [], 'ref_cent_dist': [],'ref_botm_dist': [], 'ref_top_dist': []}
    
    ref_pt = (960, 1200)
    cnt=0
    for i in range(len(boxes)):
        #print(boxes[i])
        obj_id=cnt
        cnt=cnt+1
        x1=int(boxes[i][0])
        y1=int(boxes[i][1])
        x2=int(boxes[i][2])
        y2=int(boxes[i][3])
        cX=int((x1+x2)/2)
        cY=int((y1+y2)/2)
        ref_cent_dist = distance.euclidean(ref_pt,(cX, cY))
        ref_botm_dist = distance.euclidean(ref_pt,(cX, y2))
        ref_top_dist = distance.euclidean(ref_pt,(cX, y1))
           
        obj_dtls['obj_id'].append(obj_id)
        obj_dtls['x1'].append(x1)
        obj_dtls['y1'].append(y1)
        obj_dtls['x2'].append(x2)
        obj_dtls['y2'].append(y2)
        obj_dtls['cX'].append(cX)
        obj_dtls['cY'].append(cY)
        obj_dtls['ref_cent_dist'].append(ref_cent_dist)
        obj_dtls['ref_botm_dist'].append(ref_botm_dist)
        obj_dtls['ref_top_dist'].append(ref_top_dist)
    return obj_dtls

In [5]:
def get_spacial_distance(obj_dtls):
    obj_feature = pd.DataFrame(obj_dtls)
    obj_feature['obj_ht']=np.abs(obj_feature.y1-obj_feature.y2)
    
    eu_dist = np.round(scipy.spatial.distance.cdist(obj_feature.iloc[:,5:7], 
                                                    obj_feature.iloc[:,5:7], 
                                                    metric='euclidean'),2)
    for i in range(len(eu_dist)):
        col_name = 'eu_dist_'+str(i)
        obj_feature[col_name]=np.vstack(eu_dist[:,i])
    
    return obj_feature

In [6]:
def get_red_obj(obj_feature, thres):
    no_obj_pf = obj_feature.obj_id.max()+1
    #print('no_obj_pf:', no_obj_pf)
    red_id=[]
    red_pair=[]
    for index, row in obj_feature.iterrows():
        for i in range(no_obj_pf):
            if (row['eu_dist_'+str(i)] < thres) & (row['eu_dist_'+str(i)] != 0.00) :
                #print('ith object:', obj_feature.iloc[i]['ref_botm_dist'])
                #print('current object:', obj_feature.iloc[int(row.obj_id)]['ref_botm_dist'] )
                bt_eu = distance.euclidean(obj_feature.iloc[i]['ref_botm_dist'], obj_feature.iloc[int(row.obj_id)]['ref_botm_dist'])
                #print('diff:', bt_eu )
                if (bt_eu < thres):
                    red_id.append(i)
                    red_id.append(row.obj_id)
                    src = (int(row.cX), int(row.cY))
                    dst = (int(obj_feature.iloc[i]['cX']),int(obj_feature.iloc[i]['cY']))
                    red_pair.append((src,dst))

    red_id=list(set(red_id))   
    
    return red_id, red_pair


In [7]:
videopath = 'StreetVideo2.mp4'

%pylab inline 
import cv2
from IPython.display import clear_output

cmap = plt.get_cmap('tab20b')
colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

# initialize Sort object and video capture
from sort import *
if os.path.isfile(videopath):
    vid = cv2.VideoCapture(videopath)
else:
    raise Exception("File %s doesn't exist!" % input_video)
    
mot_tracker = Sort() 
countFrames = 0


# initialize the frame dimensions (we'll set them as soon as we read
# the first frame from the video)
W = None
H = None
countFrames=0
FRAME_FPS = int(vid.get(cv2.CAP_PROP_FPS))
FRAME_WIDTH = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
FRAME_HEIGHT = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))

print("Frame Dimensions: %sx%s" % (FRAME_WIDTH, FRAME_HEIGHT))   
total_frames = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
print("Frame count estimate is %d" % total_frames)

# start the frames per second throughput estimator
fps = FPS().start()
while vid.get(cv2.CAP_PROP_POS_FRAMES) < vid.get(cv2.CAP_PROP_FRAME_COUNT):
    ret, frame = vid.read()
    frame1 = frame.copy()
    frame1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2RGB)
    pilimg = Image.fromarray(frame1)
    
    rect_th=3
    boxes, detections = get_prediction(pilimg, 0.85)
    obj_dtls=get_ref_distances(boxes)
    obj_feature = get_spacial_distance(obj_dtls)

    red_id, red_pair = get_red_obj(obj_feature, 100) #Get the objects to be marked in Red color
    obj_id_local = obj_feature['obj_id']
    detections = torch.FloatTensor(detections)
    
    #img = np.array(pilimg)
    max_obj = 0
    prev_max = 0
    red_cnt = 0
    green_cnt = 0
    
    if detections is not None:
        tracked_objects = mot_tracker.update(detections.cpu())
        #print('tracked-obj:',tracked_objects)
        unique_labels = detections[:, -1].cpu().unique()
        n_cls_preds = len(unique_labels)

        zipped_obj = zip(boxes, tracked_objects[:,4], obj_id_local)
        for box, obj_id, local_id in zipped_obj:
            
            x1= int(box[0])
            y1= int(box[1])
            x2= int(box[2])
            y2= int(box[3])
            cX=int((x1+x2)/2)
            cY=int((y1+y2)/2)
            cls = 'person'
            max_obj = max(max_obj, obj_id)
            if (local_id not in red_id):
                # Draw Rectangle with the coordinates for Green IDs
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255,0), 3)
                cv2.rectangle(frame, (x1, y1-35), (x1+len(cls)*19+60, y1), (0, 255, 0), -1)
                cv2.putText(frame, cls + "-" + str(int(obj_id)), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 3)
                cv2.drawMarker(frame, (cX,cY), (0,255,0), markerType=cv2.MARKER_TILTED_CROSS, 
                         markerSize=15, thickness=2, line_type=cv2.LINE_AA)
            else:
                # Draw Rectangle with the coordinates for Red IDs
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 3)
                cv2.rectangle(frame, (x1, y1-35), (x1+len(cls)*19+60, y1), (0, 0, 255), -1)
                cv2.putText(frame, cls + "-" + str(int(obj_id)), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 3)

                cv2.drawMarker(frame, (cX,cY), (0,0,255), markerType=cv2.MARKER_TILTED_CROSS, 
                         markerSize=15, thickness=2, line_type=cv2.LINE_AA)
                if prev_max <  max_obj:
                    red_cnt=red_cnt+1
                
            for i in red_pair:
                cv2.line(frame, i[0], i[1], (0,0,255), thickness=2)
        if prev_max <  max_obj:
            prev_max = max_obj
            blue_cnt = max_obj - red_cnt
        # construct a tuple of information we will be displaying on the
        # frame
        info = [
            ("Distance Violation", red_cnt),
            ("Distance Maintained", blue_cnt),
            ("Total Persons", red_cnt+blue_cnt)
        ]
        
        if red_cnt >0:
            cv2.putText(frame, "ALERT: Social Distance Violated", (500, 30 ),
                            cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2)
        # loop over the info tuples and draw them on our frame
        for (i, (k, v)) in enumerate(info):
            text = "{}: {}".format(k, v)
            cv2.putText(frame, text, (20, ((i * 40) + 25)),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 139), 2)
        
        cv2.imwrite('{frames_dir}/Output_{num:05d}.jpg'.format(frames_dir=OUTPUT_DIR, num=countFrames), frame)
        print("Processed file {num} of {total_frames}".format(num=countFrames, total_frames=total_frames), end="\r")


        # increment the total number of frames processed thus far and
        # then update the FPS counter
        countFrames += 1
        fps.update()        

# stop the timer and display FPS information
fps.stop()
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
video_capture.release()
cv2.destroyAllWindows()

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


Frame Dimensions: 1920x1080
Frame count estimate is 903
Processed file 0 of 903

Compilation is falling back to object mode WITH looplifting enabled because Function "iou" failed type inference due to: [1m[1mnon-precise type pyobject[0m
[0m[1m[1] During: typing of argument at C:\Users\Suvralipi\Desktop\DataAnalytics\Video Analytics\SocialDistancing\pytorch_objectdetecttrack-master\sort.py (37)[0m
[1m
File "sort.py", line 37:[0m
[1mdef iou(bb_test,bb_gt):
    <source elided>
  """
[1m  xx1 = np.maximum(bb_test[0], bb_gt[0])
[0m  [1m^[0m[0m
[0m
  @jit
[1m
File "sort.py", line 33:[0m
[1m@jit
[1mdef iou(bb_test,bb_gt):
[0m[1m^[0m[0m
[0m
  state.func_ir.loc))
Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.

For more information visit http://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit
[1m
File "sort.py", line 33:[0m
[1m@jit
[1mdef iou(bb_test,bb_gt):
[0m[1m^[0m[0m
[0m
  state.func

Processed file 77 of 903

KeyboardInterrupt: 

In [4]:
img_array = []
for filename in sorted(glob.glob(os.path.join(os.path.abspath(OUTPUT_DIR),
                                              'Output_*.jpg'))):
    frame = cv2.imread(filename)
    height, width, layers = frame.shape
    size = (width,height)
    img_array.append(frame)

out = cv2.VideoWriter('SocialOutput.avi',cv2.VideoWriter_fourcc(*'DIVX'),28, size)
for i in range(len(img_array)):
    out.write(img_array[i])
out.release()
print("\nDone")


Done
