# Input setup

In [13]:
import torch
import torchvision
import torchvision.transforms as T
import cv2
import numpy as np

cap = cv2.VideoCapture('./testVideos/vid2.mp4')
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
print(width,height,fps)

# Create a video writer object to save the output video in MP4 format
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('./outputs/vid2RCNN_Sort.mp4', fourcc, fps, (width, height))

1280 720 29


# FasterRCNN

In [14]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
transform = T.Compose([T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

# weights (COCOv1 dataset trained)
weights=torchvision.models.detection.faster_rcnn.FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT
# model
model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(weights)
model = model.to(device)
model.eval()

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
       

# Sort

In [15]:
from sort.sort import Sort

# Set up the SORT tracker
tracker = Sort(max_age=10000)

# Detection and Display

In [16]:
import time
start_time = time.time()
num_frames = 0

ids = []
while(cap.isOpened()):
      
# Capture frame-by-frame
    ret, frame = cap.read()
    if ret == True:
        # Increment the frame count
        num_frames += 1
        # Calculate fps
        elapsed_time = time.time() - start_time
        fps = num_frames / elapsed_time
        # Display the fps
        cv2.putText(frame, f"FPS: {round(fps, 2)}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        
        #detecting any object using FasterRCNN
        img = transform(frame).to(device)
        with torch.no_grad():
            result = model([img])[0]

        totaldetections = len(np.array(result['boxes'].to('cpu')))

        #converting output to sort input
        boxes = []
        for i in range(totaldetections):
            label = np.array(result['labels'].to('cpu'))[i]
            score = np.array(result['scores'].to('cpu'))[i]
            bbox = np.array(result['boxes'].to('cpu'))[i]
            # if(result['name'][i] == 'car' or result['name'][i] == 'truck' or result['name'][i] == 'motorcycle' or result['name'][i] == 'bicycle'):
            if(label == 3 or label == 8):
                bbox = [bbox[0],bbox[1],bbox[2],bbox[3]]
                boxes.append(bbox)

        boxes = np.array(boxes)

        # indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,scores)
        # detections = [detections[i] for i in indices]

        # Use SORT to track the cars across frames
        track_bbs_ids = tracker.update(boxes)

        for track in track_bbs_ids:
            xmin, ymin, xmax, ymax, track_id = track
            ids.append(track_id)
            cv2.rectangle(frame, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (255,0,0), 2)
            cv2.putText(frame, str(track_id), (int(xmin), int(ymin) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 2)


        cv2.putText(frame, "Total Cars: "+str(len(set(ids))), (10,10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 2)
        cv2.imshow('Frame', frame)
        # write frame to output video
        out.write(frame)
            

    # Press Q on keyboard to exit
        if cv2.waitKey(25) & 0xFF == ord('q'):
            break
  
# Break the loop
    else:
        break

1   HIToolbox                           0x00000001a40ced4c _ZN15MenuBarInstance21IsAutoShowHideAllowedEv + 284
2   HIToolbox                           0x00000001a3fa8230 _ZN15MenuBarInstance24UpdateAutoShowVisibilityE5Pointh + 40
3   HIToolbox                           0x00000001a3fac380 _ZL19AutoShowHideHandlerP25OpaqueEventHandlerCallRefP14OpaqueEventRefPv + 104
4   HIToolbox                           0x00000001a3f0c4c0 _ZL23DispatchEventToHandlersP14EventTargetRecP14OpaqueEventRefP14HandlerCallRec + 1092
5   HIToolbox                           0x00000001a3f0b940 _ZL30SendEventToEventTargetInternalP14OpaqueEventRefP20OpaqueEventTargetRefP14HandlerCallRec + 356
6   HIToolbox                           0x00000001a3f0b7d0 SendEventToEventTargetWithOptions + 44
7   HIToolbox                           0x00000001a3f4b9cc _ZL29ToolboxEventDispatcherHandlerP25OpaqueEventHandlerCallRefP14OpaqueEventRefPv + 472
8   HIToolbox                           0x00000001a3f0c968 _ZL23DispatchEventToHandl