In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/videodataset/video1.mp4
/kaggle/input/videodataset/video2.mp4


In [2]:
# !pip install filterpy deep-sort-realtime lap

In [3]:
import torch
import torchvision
import cv2
import numpy as np
from deep_sort_realtime.deepsort_tracker import DeepSort

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True).to(device)
model.eval()



FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [6]:
COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

In [7]:
def detect_cars(img, model, threshold = 0.9):

    transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])
    img1 = transform(img).to(device)

    with torch.no_grad():
        pred = model([img1])

    detections = []

    pred_class1 = [COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(pred[0]['labels'].cpu().numpy())] # Get the Prediction Score
    pred_boxes1 = [[i[0], i[1], i[2], i[3]] for i in list(pred[0]['boxes'].detach().cpu().numpy())] # Bounding boxes
    pred_score1 = list(pred[0]['scores'].detach().cpu().numpy())
    pred_labels1 = list(pred[0]['labels'].detach().cpu().numpy())

    pred_class = []
    pred_boxes = []
    pred_score = []
    pred_labels = []

    for i in range(len(pred_class1)):
        if(pred_class1[i] == 'car'):
            pred_class.append(pred_class1[i])
            pred_boxes.append(pred_boxes1[i])
            pred_score.append(pred_score1[i])
            pred_labels.append(pred_labels1[i])

    for i in range(len(pred_labels)):
        if(pred_score[i] > threshold):
            cv2.rectangle(frame, (int(pred_boxes[i][0]), int(pred_boxes[i][1])), (int(pred_boxes[i][2]), int(pred_boxes[i][3])), (255, 255, 0), 2)
            cv2.putText(frame, f"Car : {pred_score[i]:.2f}", (int(pred_boxes[i][0])-10, int(pred_boxes[i][1])-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 255), 2)
            detections.append((pred_boxes[i], pred_score[i]))

    return detections

In [8]:
def update_track(tracks, count, track_ids):
    for t in tracks:
        if not t.is_confirmed():
            continue
        t_id = t.track_id
        if(t_id not in track_ids):
            count += 1
            track_ids.append(t_id)

        ltrb = t.to_ltrb()
        bbox = ltrb

    return track_ids, count

In [9]:
for i in range(1, 3):
    object_tracker = DeepSort(max_iou_distance=0.3, max_age=30, nms_max_overlap=0.5)

    cap = cv2.VideoCapture(f'/kaggle/input/videodataset/video{i}.mp4')

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    result = cv2.VideoWriter(f'FasterRCNNDeepSort{i}.mp4', fourcc, 25.0, (int(cap.get(3)), int(cap.get(4))))

    track_ids = []
    count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        detections = detect_cars(frame, model, 0.9)
        tracks = object_tracker.update_tracks(detections, frame=frame)
        track_ids, count = update_track(tracks, count, track_ids)
        cv2.putText(frame, f'CAR COUNT: {int(count)}', (20, 100), cv2.FONT_HERSHEY_SIMPLEX, 2.5, (255,255,0), 3)

        result.write(frame)
        
    cap.release()
    result.release()
    
    print(count)


17
31
