In [1]:
inp_vid='./IP_Vid/v1.mp4'
out_vid='output_v1.mp4'

**FasterRCNN with SORT**

In [2]:
import cv2
import numpy as np
import torch
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F
from sort import Sort

def load_model():
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    model.eval()
    return model

def detect_cars(model, image,confidence_threshold):
    # Convert image to tensor
    image_tensor = F.to_tensor(image)
    image_tensor = image_tensor.unsqueeze(0)  

    # Predict bounding boxes and labels
    with torch.no_grad():
        prediction = model(image_tensor)

    # Filter out only cars
    boxes = prediction[0]['boxes'].cpu().numpy()
    labels = prediction[0]['labels'].cpu().numpy()
    scores = prediction[0]['scores'].cpu().numpy()

    # Filter detections for cars
    car_boxes = boxes[(labels == 3) & (scores > confidence_threshold)]  
    car_scores = scores[(labels == 3) & (scores > confidence_threshold)]
    return car_boxes, car_scores

In [3]:
 # Load the Faster R-CNN model
model = load_model()
# Initialize SORT tracker
tracker = Sort(max_age=2500, min_hits=5)
# Open video file
cap = cv2.VideoCapture(inp_vid)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('./FasterRCNN/SORT/'+out_vid, fourcc, fps, (frame_width, frame_height))

# Initialize variables
car_count = 0
prev_car_boxes = []
skip=1 # Dentoes frequency of skipped frames for a faster implementation 
ind = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    ind+=1
    if ind==skip:
        ind=0
    if ind!=0:
        continue

    # Detect cars in the frame
    car_boxes, car_scores = detect_cars(model, frame, confidence_threshold=0.97)

    # Apply SORT tracking
    if len(car_boxes) > 0:
        trackers = tracker.update(car_boxes)
        # Update object IDs and count cars
        for track in trackers:
            object_id = int(track[4])
            if object_id not in prev_car_boxes:
                prev_car_boxes.append(object_id)
                car_count += 1
     # Draw bounding boxes, labels, and scores on the frame
    for i, (box, score) in enumerate(zip(car_boxes, car_scores)):
        x1, y1, x2, y2 = box.astype(int)
        label = f'Car {i+1}'
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, f'{label} - {score:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Display car count on the frame
    cv2.putText(frame, f'Car Count: {car_count}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    # Write the frame to the output video
    out.write(frame)
    # cv2.imshow('Traffic Junction', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
out.release()
cv2.destroyAllWindows()



KeyboardInterrupt: 

**FasterRCNN with DeepSORT**

In [None]:
import cv2
import numpy as np
import torch
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F
from DEEPSORT.deep_sort_realtime.deepsort_tracker import DeepSort


def load_model():
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    model.eval()
    return model

def detect_cars(model, image,confidence_threshold):
    # Convert image to tensor
    image_tensor = F.to_tensor(image)
    image_tensor = image_tensor.unsqueeze(0)  

    # Predict bounding boxes and labels
    with torch.no_grad():
        prediction = model(image_tensor)

    # Filter out only cars
    boxes = prediction[0]['boxes'].cpu().numpy()
    labels = prediction[0]['labels'].cpu().numpy()
    scores = prediction[0]['scores'].cpu().numpy()

    # Filter detections for cars
    car_boxes = boxes[(labels == 3) & (scores > confidence_threshold)]  
    car_scores = scores[(labels == 3) & (scores > confidence_threshold)]
    return car_boxes, car_scores

In [None]:
 # Load the Faster R-CNN model
model = load_model()
# Initialize SORT tracker
tracker = DeepSort(max_age=300,max_iou_distance=0.3)
# Open video file
cap = cv2.VideoCapture(inp_vid)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('./FasterRCNN/DeepSORT/'+out_vid, fourcc, fps, (frame_width, frame_height))

# Initialize variables
car_count = 0
skip=1 # Dentoes frequency of skipped frames for a faster implementation 
ind = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    ind+=1
    if ind==skip:
        ind=0
    if ind!=0:
        continue
    # Detect cars in the frame
    car_boxes, car_scores = detect_cars(model, frame, confidence_threshold=0.97)

    car_det=[]
    #  Draw bounding boxes, labels, and scores on the frame
    for i, (box, score) in enumerate(zip(car_boxes, car_scores)):
        car_det.append((box,score,'car'))
        x1, y1, x2, y2 = box.astype(int)
        label = f'Car {i+1}'
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, f'{label} - {score:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    tracks = tracker.update_tracks(car_det, frame=frame)
    for track in tracks:
        if not track.is_confirmed():
            continue
        track_id = track.track_id
        ltrb = track.to_ltrb()
    car_count = tracker.tracker._next_id-1 
    # Display car count on the frame
    cv2.putText(frame, f'Car Count: {car_count}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    # Write the frame to the output video
    out.write(frame)
    # cv2.imshow('Traffic Junction', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
out.release()
cv2.destroyAllWindows()

KeyboardInterrupt: 

**YOLOV5 with SORT**

In [None]:
import cv2
import numpy as np
import torch
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F
from sort import Sort

def load_model():
    model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
    model.eval()
    return model

def detect_cars(model, image,confidence_threshold):
    # Predict bounding boxes and labels
    with torch.no_grad():
        prediction = model(image)

    # Filter out only cars
    boxes = prediction.xyxy[0][:, :4].cpu().numpy()
    labels = prediction.xyxy[0][:, 5].cpu().numpy()
    scores = prediction.xyxy[0][:, 4].cpu().numpy()

    # Filter detections for cars
    car_boxes = boxes[(labels == 2) & (scores > confidence_threshold)] 
    car_scores = scores[(labels == 2) & (scores > confidence_threshold)]
    return car_boxes, car_scores

In [None]:
 # Load the YOLO V5 model
model = load_model()
# Initialize SORT tracker
tracker = Sort(max_age=2500, min_hits=5)
# Open video file
cap = cv2.VideoCapture(inp_vid)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('./YOLOV5/SORT/'+out_vid, fourcc, fps, (frame_width, frame_height))

# Initialize variables
car_count = 0
prev_car_boxes = []
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Detect cars in the frame
    car_boxes, car_scores = detect_cars(model, frame, 0.7)

    # Apply SORT tracking
    if len(car_boxes) > 0:
        trackers = tracker.update(car_boxes)
        # Update object IDs and count cars
        for track in trackers:
            object_id = int(track[4])
            if object_id not in prev_car_boxes:
                prev_car_boxes.append(object_id)
                car_count += 1
     # Draw bounding boxes, labels, and scores on the frame
    for i, (box, score) in enumerate(zip(car_boxes, car_scores)):
        x1, y1, x2, y2 = box.astype(int)
        label = f'Car {i+1}'
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, f'{label} - {score:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Display car count on the frame
    cv2.putText(frame, f'Car Count: {car_count}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    # Write the frame to the output video
    out.write(frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
out.release()
cv2.destroyAllWindows()

Using cache found in /home/sheikh/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2024-3-21 Python-3.10.12 torch-2.2.1+cu121 CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


**YOLOV5 with DeepSORT**

In [None]:
import cv2
import numpy as np
import torch
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F
from DEEPSORT.deep_sort_realtime.deepsort_tracker import DeepSort

def load_model():
    model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
    model.eval()
    return model

def detect_cars(model, image,confidence_threshold):

    # Predict bounding boxes and labels
    with torch.no_grad():
        prediction = model(image)

    # Filter out only cars
    boxes = prediction.xyxy[0][:, :4].cpu().numpy()
    labels = prediction.xyxy[0][:, 5].cpu().numpy()
    scores = prediction.xyxy[0][:, 4].cpu().numpy()

    # Filter detections for cars
    car_boxes = boxes[(labels == 2) & (scores > confidence_threshold)]  # Label 3 corresponds to car in COCO dataset
    car_scores = scores[(labels == 2) & (scores > confidence_threshold)]
    return car_boxes, car_scores

In [None]:
 # Load the YOLO V5 model
model = load_model()
# Initialize DeepSORT tracker
tracker = DeepSort(max_age=500,max_iou_distance=0.3)
# Open video file
cap = cv2.VideoCapture(inp_vid)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('./YOLOV5/DeepSORT/'+out_vid, fourcc, fps, (frame_width, frame_height))

# Initialize variables
car_count = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Detect cars in the frame
    car_boxes, car_scores = detect_cars(model, frame, 0.7)
    car_det=[]
     # Draw bounding boxes, labels, and scores on the frame
    for i, (box, score) in enumerate(zip(car_boxes, car_scores)):
        car_det.append((box,score,'car'))
        x1, y1, x2, y2 = box.astype(int)
        label = f'Car {i+1}'
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, f'{label} - {score:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    tracks = tracker.update_tracks(car_det, frame=frame) # bbs expected to be a list of detections, each in tuples of ( [left,top,w,h], confidence, detection_class )
    for track in tracks:
        if not track.is_confirmed():
            continue
        track_id = track.track_id
        ltrb = track.to_ltrb()
    car_count = tracker.tracker._next_id-1 
    # Display car count on the frame
    cv2.putText(frame, f'Car Count: {car_count}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    # Write the frame to the output video
    out.write(frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
out.release()
cv2.destroyAllWindows()

Using cache found in /home/sheikh/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2024-3-21 Python-3.10.12 torch-2.2.1+cu121 CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 
