YOLO - You Only Look Once

COCO labels: https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/

download yolov3.weights and yolov3.cfg files from 

https://github.com/AlexeyAB/darknet#how-to-evaluate-ap-of-yolov4-on-the-ms-coco-evaluation-server

and save them in 'Data' folder

# Example 1

In [None]:
import math
class EuclideanDistTracker:
    def __init__(self):
        # store the center position of the object
        self.center_points = {}
        # keep the count of the IDs
        # each time a new object id detected, the count will increase by one
        self.id_count = 0
        
    def update(self, objects_rect):
        # Objects boxes and ids
        objects_bbs_ids = []
        
        for rect in objects_rect:
            x, y, w, h = rect
            cx = (x+x+w)//2
            cy = (y+y+h)//2
            
            # find out if the object was detected already
            same_object_detected = False
            for id, pt in self.center_points.items():
                dist = math.hypot(cx - pt[0], cy - pt[1])
                
                if dist < 25:
                    self.center_points[id] = (cx, cy)
                    print(self.center_points)
                    objects_bbs_ids.append([x,y,w,h, id])
                    same_object_detected = True
                    break
            # New object is detected, we assign the Id to that object
            if same_object_detected is False:
                self.center_points[self.id_count] = (cx, cy)
                objects_bbs_ids.append([x,y,w,h, self.id_count])
                self.id_count +=1
        # Clean the dictionary by center points to remove IDs not used anymore
        new_center_points = {}
        for obj_bb_id in objects_bbs_ids:
            _,_,_,_, object_id = obj_bb_id
            center = self.center_points[object_id]
            new_center_points[object_id] = center
            
        # Update dictionary with IDs not used - remove them
        self_center_points = new_center_points.copy()
        return objects_bbs_ids

In [None]:
import cv2

tracker = EuclideanDistTracker()
cap = cv2.VideoCapture('Data/highway.mp4')

# Object detecting from stable camera
object_detector = cv2.createBackgroundSubtractorMOG2()

try:
    while True:
        ret, frame = cap.read()
        height, width, _ = frame.shape

        # Extract region of interest (cropping the region of interest)
        roi = frame[340:720, 500:800]

        # Part1 - Object Detection
        mask = object_detector.apply(roi)
        _, mask = cv2.threshold(mask, 254, 255, cv2.THRESH_BINARY)
        contours,_ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        detections = []

        for cnt in contours:
            area = cv2.contourArea(cnt)
            if area > 100:
                x, y, w, h = cv2.boundingRect(cnt)
                detections.append([x, y, w, h])

        # Part2 - Object Tracking
        boxes_ids = tracker.update(detections)
        for box_id in boxes_ids:
            x, y, w, h, id = box_id
            cv2.putText(roi, str(id), (x, y -15), cv2.FONT_HERSHEY_PLAIN, 2, (255, 0, 0), 2)
            cv2.rectangle(roi, (x,y), (x+w, y+h),(0,255,0),3)
        cv2.imshow("roi", roi)
        cv2.imshow('Frame', frame)
        cv2.imshow("mask", mask)

        key = cv2.waitKey(30)
        if key ==27:
            break
            
except:
    pass
cap.release()
cv2.destroyAllWindows()

# Count of vehicles in a video footage

In [6]:
import cv2
import numpy as np

# Read the input video
cap = cv2.VideoCapture('Data/bangkok2.mp4')

# Get the video properties
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# Define the codec for the output video
fourcc = cv2.VideoWriter_fourcc(*'XVID')

# Create the video writer
out = cv2.VideoWriter('Data/output_traffic_video.avi', fourcc, fps, (width, height))

# Load the YOLO model
net = cv2.dnn.readNetFromDarknet('Data/yolov3.cfg', 'Data/yolov3.weights')
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

# Get the names of the output layers
output_layers = net.getUnconnectedOutLayersNames()

# Initialize the list of tracked vehicles and their unique IDs
tracked_vehicles = []
vehicle_id = 0

while True:
    # Read a frame from the video
    ret, frame = cap.read()
    
    if not ret:
        break
    
    # Resize the frame for faster processing (optional)
    resized_frame = cv2.resize(frame, None, fx=0.6, fy=0.6)
    
    # Perform object detection using YOLO
    blob = cv2.dnn.blobFromImage(resized_frame, 0.00392, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    layer_outputs = net.forward(output_layers)
    
    # Initialize lists for bounding boxes, confidences, and class IDs
    boxes = []
    confidences = []
    class_ids = []
    
    # Process each output layer
    for output in layer_outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            
            if confidence > 0.5 and class_id in [2, 3, 5]:  # Class ID 2 represents vehicles in the COCO dataset
                # Scale the bounding box coordinates to match the original image size
                box = detection[0:4] * np.array([width, height, width, height])
                (center_x, center_y, box_width, box_height) = box.astype('int')
                x = int(center_x - (box_width / 2))
                y = int(center_y - (box_height / 2))
                
                # Add the bounding box, confidence, and class ID to the respective lists
                boxes.append([x, y, int(box_width), int(box_height)])
                confidences.append(float(confidence))
                class_ids.append(class_id)
    
    # Perform non-maximum suppression to eliminate redundant overlapping boxes
    indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
    
    # Initialize the list of detected vehicle centroids for this frame
    centroids = []
    
    # Process each detected bounding box
    if len(indices) > 0:
        for i in indices.flatten():
            x, y, w, h = boxes[i]
            
            # Calculate the centroid of the bounding box
            centroid_x = x + (w // 2)
            centroid_y = y + (h // 2)
            
            # Add the centroid to the list
            centroids.append((centroid_x, centroid_y))
            
            # Draw the bounding box and centroid on the frame
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
            cv2.circle(frame, (centroid_x, centroid_y), 4, (0, 255, 255), -1)
    
    # Update the list of tracked vehicles
    for centroid in centroids:
        x, y = centroid
        
        # Check if the current centroid is close to an existing tracked vehicle
        close_vehicle = False
        for vehicle in tracked_vehicles:
            distance = np.linalg.norm(np.array(centroid) - np.array(vehicle['centroid']))
            if distance < 50:
                close_vehicle = True
                break
        
        # If not close to any existing vehicle, add it to the list with a new ID
        if not close_vehicle:
            tracked_vehicles.append({
                'centroid': centroid,
                'id': vehicle_id
            })
            vehicle_id += 1
    
    # Display the count of unique vehicles in real-time
    cv2.putText(frame, f"Vehicle Count: {len(tracked_vehicles)}", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    
    # Write the frame with bounding boxes and count to the output video
    out.write(frame)
    
    # Display the frame with bounding boxes and count
    cv2.imshow('Vehicle Tracking', frame)
    
    # Check if the 'x' key is pressed to exit
    if cv2.waitKey(1) & 0xFF == ord('x'):
        break

# Release the video capture and video writer
cap.release()
out.release()

# Close all OpenCV windows
cv2.destroyAllWindows()


# Detect and label all objects on the road & save as csv file with Timestamp

In [7]:
import cv2
import numpy as np
import csv

# Load the YOLO model
net = cv2.dnn.readNetFromDarknet('Data/yolov3.cfg', 'Data/yolov3.weights')
layer_names = net.getLayerNames()
output_layers = net.getUnconnectedOutLayersNames()

# Load the classes
with open('Data/coco.names', 'r') as f:
    classes = [line.strip() for line in f.readlines()]

# Open the video file
cap = cv2.VideoCapture('Data/adas2.mp4')

# Get original video properties
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# Calculate new dimensions for 1080p resolution
new_width = 1920  
new_height = 1080  

# Create video writer for resized video
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
resized_video = cv2.VideoWriter('resized_traffic_video.mp4', fourcc, fps, (new_width, new_height))

# Create CSV writer
csv_file = open('object_detection_results.csv', 'w', newline='')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['Timestamp', 'Class', 'Confidence', 'X', 'Y', 'Width', 'Height'])

# Set NMS parameters
conf_threshold = 0.6
nms_threshold = 0.25

# Process each frame of the video
frame_index = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    # Resize frame to 1080p
    resized_frame = cv2.resize(frame, (new_width, new_height))
    
    # Detect objects in the resized frame
    blob = cv2.dnn.blobFromImage(resized_frame, 0.00392, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    detections = net.forward(output_layers)
    
    # Process each detection
    class_ids = []
    confidences = []
    boxes = []
    
    for detection in detections:
        for obj in detection:
            scores = obj[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            
            # Filter objects by class and confidence threshold
            if confidence > conf_threshold and class_id in [i for i in range(80)]:
                # Calculate object bounding box coordinates
                center_x = int(obj[0] * new_width)
                center_y = int(obj[1] * new_height)
                width = int(obj[2] * new_width)
                height = int(obj[3] * new_height)
                x = int(center_x - width / 2)
                y = int(center_y - height / 2)
                
                class_ids.append(class_id)
                confidences.append(float(confidence))
                boxes.append([x, y, width, height])
    
    # Apply NMS to eliminate redundant detections
    indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
    
    # Process the remaining detections after NMS
    for i in indices:
        class_id = class_ids[i]
        confidence = confidences[i]
        box = boxes[i]

        # Unpack the box coordinates
        x, y, width, height = box

        # Write object information to CSV
        csv_writer.writerow([frame_index / fps, classes[class_id], confidence, x, y, width, height])

        # Draw object bounding box and label on the frame
        cv2.rectangle(resized_frame, (x, y), (x + width, y + height), (0, 255, 0), 2)
        cv2.putText(resized_frame, f'{classes[class_id]}: {confidence:.2f}', (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                    (0, 255, 0), 2)
    # Write the resized frame to the resized video
    resized_video.write(resized_frame)
    
    # Display the frame
    cv2.imshow('Object Detection', resized_frame)
    
    # Break the loop if the 'x' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('x'):
        break
    
    frame_index += 1

# Release the video capture, close CSV file, and close video writer
cap.release()
csv_file.close()
resized_video.release()

# Close OpenCV windows
cv2.destroyAllWindows()
