In [1]:
! pip install filterpy
! pip install scikit-image

# Install TensorFlow
! pip install tensorflow

# Install TensorFlow-Object-Detection
! pip install tensorflow-object-detection-api

# Install OpenCV
! pip install opencv-python

# Install Keras (usually comes bundled with TensorFlow)
# Check if Keras is already installed:
! pip show keras

# If not installed, install it:
! pip install keras

Name: keras
Version: 3.1.1
Summary: Multi-backend Keras.
Home-page: https://github.com/keras-team/keras
Author: Keras team
Author-email: keras-users@googlegroups.com
License: Apache License 2.0
Location: c:\users\91939\appdata\local\programs\python\python310\lib\site-packages
Requires: absl-py, h5py, ml-dtypes, namex, numpy, optree, rich
Required-by: tensorflow-intel


In [2]:
# to use YOLOv8
! pip install ultralytics
! pip install deep-sort-realtime

# needed libraries
! pip install torch torchvision torchaudio
! pip install opencv-python
! pip install Cython



In [3]:
"""
    SORT: A Simple, Online and Realtime Tracker
    Copyright (C) 2016-2020 Alex Bewley alex@bewley.ai

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""
from __future__ import print_function

import os
import numpy as np
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from skimage import io

import glob
import time
import argparse
from filterpy.kalman import KalmanFilter

np.random.seed(0)


def linear_assignment(cost_matrix):
  try:
    import lap
    _, x, y = lap.lapjv(cost_matrix, extend_cost=True)
    return np.array([[y[i],i] for i in x if i >= 0]) #
  except ImportError:
    from scipy.optimize import linear_sum_assignment
    x, y = linear_sum_assignment(cost_matrix)
    return np.array(list(zip(x, y)))


def iou_batch(bb_test, bb_gt):
  """
  From SORT: Computes IOU between two bboxes in the form [x1,y1,x2,y2]
  """
  bb_gt = np.expand_dims(bb_gt, 0)
  bb_test = np.expand_dims(bb_test, 1)
  
  xx1 = np.maximum(bb_test[..., 0], bb_gt[..., 0])
  yy1 = np.maximum(bb_test[..., 1], bb_gt[..., 1])
  xx2 = np.minimum(bb_test[..., 2], bb_gt[..., 2])
  yy2 = np.minimum(bb_test[..., 3], bb_gt[..., 3])
  w = np.maximum(0., xx2 - xx1)
  h = np.maximum(0., yy2 - yy1)
  wh = w * h
  o = wh / ((bb_test[..., 2] - bb_test[..., 0]) * (bb_test[..., 3] - bb_test[..., 1])                                      
    + (bb_gt[..., 2] - bb_gt[..., 0]) * (bb_gt[..., 3] - bb_gt[..., 1]) - wh)                                              
  return(o)  


def convert_bbox_to_z(bbox):
  """
  Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
    [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
    the aspect ratio
  """
  w = bbox[2] - bbox[0]
  h = bbox[3] - bbox[1]
  x = bbox[0] + w/2.
  y = bbox[1] + h/2.
  s = w * h    #scale is just area
  r = w / float(h)
  return np.array([x, y, s, r]).reshape((4, 1))


def convert_x_to_bbox(x,score=None):
  """
  Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
    [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
  """
  w = np.sqrt(x[2] * x[3])
  h = x[2] / w
  if(score==None):
    return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.]).reshape((1,4))
  else:
    return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.,score]).reshape((1,5))


class KalmanBoxTracker(object):
  """
  This class represents the internal state of individual tracked objects observed as bbox.
  """
  count = 0
  def __init__(self,bbox):
    """
    Initialises a tracker using initial bounding box.
    """
    #define constant velocity model
    self.kf = KalmanFilter(dim_x=7, dim_z=4) 
    self.kf.F = np.array([[1,0,0,0,1,0,0],[0,1,0,0,0,1,0],[0,0,1,0,0,0,1],[0,0,0,1,0,0,0],  [0,0,0,0,1,0,0],[0,0,0,0,0,1,0],[0,0,0,0,0,0,1]])
    self.kf.H = np.array([[1,0,0,0,0,0,0],[0,1,0,0,0,0,0],[0,0,1,0,0,0,0],[0,0,0,1,0,0,0]])

    self.kf.R[2:,2:] *= 10.
    self.kf.P[4:,4:] *= 1000. #give high uncertainty to the unobservable initial velocities
    self.kf.P *= 10.
    self.kf.Q[-1,-1] *= 0.01
    self.kf.Q[4:,4:] *= 0.01

    self.kf.x[:4] = convert_bbox_to_z(bbox)
    self.time_since_update = 0
    self.id = KalmanBoxTracker.count
    KalmanBoxTracker.count += 1
    self.history = []
    self.hits = 0
    self.hit_streak = 0
    self.age = 0

  def update(self,bbox):
    """
    Updates the state vector with observed bbox.
    """
    self.time_since_update = 0
    self.history = []
    self.hits += 1
    self.hit_streak += 1
    self.kf.update(convert_bbox_to_z(bbox))

  def predict(self):
    """
    Advances the state vector and returns the predicted bounding box estimate.
    """
    if((self.kf.x[6]+self.kf.x[2])<=0):
      self.kf.x[6] *= 0.0
    self.kf.predict()
    self.age += 1
    if(self.time_since_update>0):
      self.hit_streak = 0
    self.time_since_update += 1
    self.history.append(convert_x_to_bbox(self.kf.x))
    return self.history[-1]

  def get_state(self):
    """
    Returns the current bounding box estimate.
    """
    return convert_x_to_bbox(self.kf.x)


def associate_detections_to_trackers(detections,trackers,iou_threshold = 0.3):
  """
  Assigns detections to tracked object (both represented as bounding boxes)

  Returns 3 lists of matches, unmatched_detections and unmatched_trackers
  """
  if(len(trackers)==0):
    return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int)

  iou_matrix = iou_batch(detections, trackers)

  if min(iou_matrix.shape) > 0:
    a = (iou_matrix > iou_threshold).astype(np.int32)
    if a.sum(1).max() == 1 and a.sum(0).max() == 1:
        matched_indices = np.stack(np.where(a), axis=1)
    else:
      matched_indices = linear_assignment(-iou_matrix)
  else:
    matched_indices = np.empty(shape=(0,2))

  unmatched_detections = []
  for d, det in enumerate(detections):
    if(d not in matched_indices[:,0]):
      unmatched_detections.append(d)
  unmatched_trackers = []
  for t, trk in enumerate(trackers):
    if(t not in matched_indices[:,1]):
      unmatched_trackers.append(t)

  #filter out matched with low IOU
  matches = []
  for m in matched_indices:
    if(iou_matrix[m[0], m[1]]<iou_threshold):
      unmatched_detections.append(m[0])
      unmatched_trackers.append(m[1])
    else:
      matches.append(m.reshape(1,2))
  if(len(matches)==0):
    matches = np.empty((0,2),dtype=int)
  else:
    matches = np.concatenate(matches,axis=0)

  return matches, np.array(unmatched_detections), np.array(unmatched_trackers)


class Sort(object):
  def __init__(self, max_age=1, min_hits=3, iou_threshold=0.3):
    """
    Sets key parameters for SORT
    """
    self.max_age = max_age
    self.min_hits = min_hits
    self.iou_threshold = iou_threshold
    self.trackers = []
    self.frame_count = 0

  def update(self, dets=np.empty((0, 5))):
    """
    Params:
      dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]
    Requires: this method must be called once for each frame even with empty detections (use np.empty((0, 5)) for frames without detections).
    Returns the a similar array, where the last column is the object ID.

    NOTE: The number of objects returned may differ from the number of detections provided.
    """
    self.frame_count += 1
    # get predicted locations from existing trackers.
    trks = np.zeros((len(self.trackers), 5))
    to_del = []
    ret = []
    for t, trk in enumerate(trks):
      pos = self.trackers[t].predict()[0]
      trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]
      if np.any(np.isnan(pos)):
        to_del.append(t)
    trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
    for t in reversed(to_del):
      self.trackers.pop(t)
    matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets,trks, self.iou_threshold)

    # update matched trackers with assigned detections
    for m in matched:
      self.trackers[m[1]].update(dets[m[0], :])

    # create and initialise new trackers for unmatched detections
    for i in unmatched_dets:
        trk = KalmanBoxTracker(dets[i,:])
        self.trackers.append(trk)
    i = len(self.trackers)
    for trk in reversed(self.trackers):
        d = trk.get_state()[0]
        if (trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits):
          ret.append(np.concatenate((d,[trk.id+1])).reshape(1,-1)) # +1 as MOT benchmark requires positive
        i -= 1
        # remove dead tracklet
        if(trk.time_since_update > self.max_age):
          self.trackers.pop(i)
    if(len(ret)>0):
      return np.concatenate(ret)
    return np.empty((0,5))

def parse_args():
    """Parse input arguments."""
    parser = argparse.ArgumentParser(description='SORT demo')
    parser.add_argument('--display', dest='display', help='Display online tracker output (slow) [False]',action='store_true')
    parser.add_argument("--seq_path", help="Path to detections.", type=str, default='data')
    parser.add_argument("--phase", help="Subdirectory in seq_path.", type=str, default='train')
    parser.add_argument("--max_age", 
                        help="Maximum number of frames to keep alive a track without associated detections.", 
                        type=int, default=1)
    parser.add_argument("--min_hits", 
                        help="Minimum number of associated detections before track is initialised.", 
                        type=int, default=3)
    parser.add_argument("--iou_threshold", help="Minimum IOU for match.", type=float, default=0.3)
    args = parser.parse_args()
    return args

In [10]:
import cv2
import torch
import torchvision
from torchvision import transforms
import numpy as np
import time as TIME
from deep_sort_realtime.deepsort_tracker import DeepSort

# Load pre-trained Faster R-CNN model (using a more up-to-date approach)
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()  # Set the model to evaluation mode

# Get the class names
COCO_INSTANCE_CATEGORY_NAMES = ['__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table', 'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

# Print the class names
# print(class_names)


# Define image transformation (consider normalization if needed)
transform = transforms.Compose([
    transforms.ToTensor()
])

# Function to perform object detection on a single frame
def detect_objects(frame):
    # Apply the transformation
    input_tensor = transform(frame).unsqueeze(0)

    # Pass the input through the model
    with torch.no_grad():
        predictions = model(input_tensor)[0]

    # Convert predictions to a more convenient format (optional)
    boxes = predictions['boxes'].numpy()  # Assuming detections are in xyxy format
    labels = predictions['labels'].numpy()
    scores = predictions['scores'].numpy()  # If scores are available

    return boxes, labels, scores  # Return all relevant information

# Function to filter detections by vehicle types
def filter_vehicles(boxes, labels, scores, vehicle_types):
    filtered_boxes = []
    box_labels = []
    for box, label, score in zip(boxes, labels, scores):
        labels_list.append(COCO_INSTANCE_CATEGORY_NAMES[label.item()])
        if (COCO_INSTANCE_CATEGORY_NAMES[label.item()] in vehicle_types) and (score>0.92):  # Adjust the threshold accordingly to omit or include certain boxes.
            filtered_boxes.append([[box[0], box[1], box[2]-box[0], box[3]-box[1]], score, label.item()])
            box_labels.append(COCO_INSTANCE_CATEGORY_NAMES[label.item()])
    return filtered_boxes, box_labels  # Return filtered detections as NumPy array

# Function to calculate centroid of a bounding box
def calculate_centroid(box):
    return ((box[0] + box[2]) / 2, (box[1] + box[3]) / 2)

# Function to calculate distance between centroids
def calculate_distance(centroid1, centroid2):
    return np.sqrt((centroid1[0] - centroid2[0])**2 + (centroid1[1] - centroid2[1])**2)

# Function to count distinct vehicles (assuming unique IDs are in the 5th position)
def count_distinct_vehicles(tracked_objects):
    unique_ids = set()
    for obj in tracked_objects:
        unique_ids.add(obj[4])
    return len(unique_ids)

def getClosestBoxIndex(filtered_boxes, x1, y1, x2, y2 ):
    min_dist = 100000000
    idx = -1
    min_idx = -1
    for box in filtered_boxes:
        idx+=1
        x_c, y_c = calculate_centroid(box)
        x_t, y_t = calculate_centroid([x1, y1, x2, y2])
        if(calculate_distance((x_c, y_c), (x_t, y_t)) < min_dist):
            min_idx = idx
    return min_idx


# Read video file
video_path = './TrafficJunction9.mp4'
cap = cv2.VideoCapture(video_path)

# Get video properties (handle potential errors)
try:
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
except:
    print("Error getting video properties. Using defaults.")
    fps = 25
    frame_width = 640
    frame_height = 480

# Define vehicle types to count
vehicle_types = ['car'] #, 'bicycle', 'motorcycle', 'bus', 'train', 'truck']

# Initialize SORT tracker
tracker = DeepSort(max_age=50)

distinct_ids_seen = set()
total_vehicle_count = 0
avg_frame_rate = 0

labels_list = []

GREEN = (0, 255, 0)
WHITE = (255, 255, 255)

N = 0

total_start = TIME.time()
# Loop through the frames of the video
while cap.isOpened():
    N += 1 
    start_time = TIME.time()
    ret, frame = cap.read()
    if not ret:
        break

    # Detect objects in the frame
    boxes, labels, scores = detect_objects(frame)
    print(scores[0])
    # Filter detections by vehicle types
    filtered_boxes, box_labels = filter_vehicles(boxes, labels, scores, vehicle_types)

    # # Update SORT tracker
    # tracked_objects = tracker.update()
    
    # # Count distinct vehicles
    # # total_vehicles += count_distinct_vehicles(tracked_objects)
    # # Count distinct vehicles
    # tracked_ids = {obj[4] for obj in tracked_objects}  # Extract unique IDs from tracked objects
    # distinct_ids_seen = distinct_ids_seen.union(set(tracked_ids))  # Update the set of all distinct IDs seen
    # total_vehicles = len(distinct_ids_seen)
    
    # # Display tracked vehicles on the frame (consider visual improvements)
    # for obj in tracked_objects:
    #     x1, y1, x2, y2, track_id = obj
    #     idx = getClosestBoxIndex(filtered_boxes, x1, y1, x2, y2)
    #     cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
    #     cv2.putText(frame, str(track_id), (int(x1), int(y1 - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1, cv2.LINE_AA)
    
    # update the tracker with the new detections
    tracks = tracker.update_tracks(filtered_boxes, frame=frame)
    # loop over the tracks
    for track in tracks:
        # if the track is not confirmed, ignore it
        if not track.is_confirmed():
            continue

        # get the track id and the bounding box
        track_id = track.track_id
        ltrb = track.to_ltrb()

        distinct_ids_seen = distinct_ids_seen.union(set([str(track_id)]))  # Update the set of all distinct IDs seen
        total_vehicle_count = len(distinct_ids_seen)

        xmin, ymin, xmax, ymax = int(ltrb[0]), int(
            ltrb[1]), int(ltrb[2]), int(ltrb[3])
        # draw the bounding box and the track id
        cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), GREEN, 2)
        cv2.putText(frame, str(track_id), (xmin + 5, ymin - 8),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, WHITE, 2)


    end_time = TIME.time()
    time_taken = float(end_time - start_time )
    frame_rate = 1/time_taken
    avg_frame_rate = (avg_frame_rate* (N-1) + frame_rate)/N
    # Display total number of distinct vehicles
    cv2.putText(frame, "Total Vehicles: {} \n Frame Rate: {}".format(total_vehicle_count, frame_rate), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)

    # Display the frame
    cv2.imshow('Object Tracking', frame)

    # Check for key press to exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

total_end = TIME.time()
total_time = (total_end - total_start)
print("Total Time Taken: ", total_time, " seconds")

# Release the video capture object and close OpenCV windows
cap.release()
cv2.destroyAllWindows()

print("Total vehicles counted: ", total_vehicle_count)
print("Average Frame Rate: ", avg_frame_rate)

0.99454594
0.99374557
0.9959065
0.99633706
0.99731606
0.9969098
0.99702233
0.9969663
0.99615896
0.9948835
0.9961809
0.99600524
0.9968669
0.99686533
0.9973013
0.9980531
0.9982078
0.9979809
0.9970124
0.9972915
0.99726474
0.9976463
0.9983193
0.9980989
0.99789196
0.9984571
0.9987827
0.99684924
0.99759775
0.9967631
0.9969169
0.99681276
0.99722195
0.9973207
0.99743384
0.9969568
0.9972474
0.9981688
0.99767894
0.9984969
0.9986505
0.9981673
0.99793905
0.9974952
0.9970632
0.9983987
0.99792767
0.99891245
0.9976695
0.99609935
0.9949516
0.99727327
0.9989679
0.9986494
0.9992293
0.99876815
0.9987947
0.99855906
0.998632
0.9986167
0.9975183
0.99895287
0.998906
0.9988311
0.99901223
0.99900824
0.9989901
0.99815756
0.9979386
0.9987006
0.9985065
0.99847287
0.9987998
0.9979419
0.9987287
0.9992343
0.9986676
0.9985892
0.99859935
0.99827576
0.99764293
0.9986123
0.998803
0.998777
0.99815035
Total Time Taken:  355.10262751579285  seconds
Total vehicles counted:  6
Average Frame Rate:  0.24779925018012042


In [None]:
print(labels_list)

['car', 'car', 'car', 'truck', 'car', 'person', 'truck', 'person', 'person', 'fire hydrant', 'person', 'bus', 'bicycle', 'bicycle', 'car', 'potted plant', 'bottle', 'bicycle', 'car', 'motorcycle', 'cow', 'bicycle', 'bicycle', 'truck', 'person', 'person', 'truck', 'truck', 'person', 'person', 'bicycle', 'bicycle', 'bicycle', 'motorcycle', 'bicycle', 'person', 'horse', 'person', 'car', 'umbrella', 'bicycle', 'car', 'person', 'potted plant', 'car', 'bicycle', 'car', 'car', 'truck', 'car', 'car', 'car', 'person', 'person', 'fire hydrant', 'person', 'car', 'bottle', 'truck', 'motorcycle', 'bicycle', 'person', 'car', 'person', 'bicycle', 'potted plant', 'horse', 'person', 'person', 'bus', 'potted plant', 'truck', 'bicycle', 'person', 'bicycle', 'car', 'person', 'person', 'person', 'cow', 'horse', 'motorcycle', 'truck', 'person', 'bicycle', 'person', 'truck', 'truck', 'bicycle', 'person', 'person', 'parking meter', 'car', 'car', 'car', 'car', 'truck', 'car', 'person', 'car', 'bottle', 'bicycl