# Vision Hack 2024
Hello, we are the Pixel Pros, and, for our Computer Vision Hackathon __Vision Hack 2024__, decided to create a Traffic Congestion Detection system using YOLO V8.  
While it is not difficult to collect data from intersections and use them as a testing input for the model, controlling traffic and its flow is not possible in a real world scenario.  
To overcome this, we used synthetic data generated using a Video Game.

To do this, we:
- Used the popular video game GTA-V to create synthetic data of an intersection for about 2 minutes.
- In the game, there are two in-roads the video focuses on, the Left and the Right in-road.
- We are Detecting and counting the number of cars in both the sections and determining if that segment is congested or not.
- This can be particulary useful when we want to build an automated traffic signal that changes based on the congestion detected.  

Where this approach will truely be benificial is in using modifications to the game to simulate extremely rare scenarios and teach the model based on those scenarios.

Done by:
- Pearl Veola Dsilva (2347245)
- Kenneth Dominic Fernandes (2347231)

In [5]:
# Required libraries
%pip install torch ultralytics cvzone shapely

Collecting shapely
  Downloading shapely-2.0.6-cp312-cp312-win_amd64.whl.metadata (7.2 kB)
Downloading shapely-2.0.6-cp312-cp312-win_amd64.whl (1.4 MB)
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   ------- -------------------------------- 0.3/1.4 MB ? eta -:--:--
   ------- -------------------------------- 0.3/1.4 MB ? eta -:--:--
   -------------- ------------------------- 0.5/1.4 MB 929.6 kB/s eta 0:00:01
   --------------------- ------------------ 0.8/1.4 MB 985.5 kB/s eta 0:00:01
   --------------------- ------------------ 0.8/1.4 MB 985.5 kB/s eta 0:00:01
   ----------------------------- ---------- 1.0/1.4 MB 762.0 kB/s eta 0:00:01
   ----------------------------- ---------- 1.0/1.4 MB 762.0 kB/s eta 0:00:01
   ------------------------------------ --- 1.3/1.4 MB 713.8 kB/s eta 0:00:01
   ------------------------------------ --- 1.3/1.4 MB 713.8 kB/s eta 0:00:01
   -----------------------

In [7]:
import cv2
import pandas as pd
from ultralytics import YOLO
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
import numpy as np
import math
import cvzone

In [8]:
class Tracker:
    def __init__(self):
        self.center_points = {}
        self.id_count = 0


    def update(self, objects_rect):
        objects_bbs_ids = []

        for rect in objects_rect:
            x, y, w, h = rect
            cx = (x + x + w) // 2
            cy = (y + y + h) // 2

            same_object_detected = False
            for id, pt in self.center_points.items():
                dist = math.hypot(cx - pt[0], cy - pt[1])

                if dist < 35:
                    self.center_points[id] = (cx, cy)
                    objects_bbs_ids.append([x, y, w, h, id])
                    same_object_detected = True
                    break

            if same_object_detected is False:
                self.center_points[self.id_count] = (cx, cy)
                objects_bbs_ids.append([x, y, w, h, self.id_count])
                self.id_count += 1

        new_center_points = {}
        for obj_bb_id in objects_bbs_ids:
            _, _, _, _, object_id = obj_bb_id
            center = self.center_points[object_id]
            new_center_points[object_id] = center

        self.center_points = new_center_points.copy()
        return objects_bbs_ids

In [9]:
# Load the YOLO model
model = YOLO('./model/yolov8s.pt')

In [19]:
# Create a named window and set a mouse callback function
cap = cv2.VideoCapture('./data/CV Traffic.avi')  # Initialize video capture with the video file

In [20]:
with open("./labels/label.txt", "r") as my_file:
    class_list = my_file.read().split("\n")  # Split the content by newline to get a list of class names

In [21]:
# Initialize counters and trackers
count = 0
car_count = 0
bus_count = 0
truck_count = 0
tracker = Tracker()

In [22]:
# initialize colors
white = (255, 255, 255)
orange = (0, 165, 255)
red = (0, 0, 255)

# initialize offset
x_shift = -250
y_shift = -100

In [23]:
# Define the four points of the polygon (plane)
polygon_points_left = [(1, 100), (1, 350), (400, 250), (400, 100)]
polygon_left = Polygon(polygon_points_left)  # Create a polygon object using the points
polygon_points_right = [
    (859 + x_shift, 368 + y_shift),
    (1255 + x_shift, 426 + y_shift),
    (1245 + x_shift, 204 + y_shift),
    (854 + x_shift, 206 + y_shift),
]
polygon_right = Polygon(
    polygon_points_right
)  # Create a polygon object using the points

In [24]:
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Use 'XVID', 'MP4V', etc. based on the codec you want
out = cv2.VideoWriter('./output/output_video.mp4', fourcc, 20.0, (1020, 500))  # Change the frame size as needed


In [25]:
# Start processing the video frame by frame
while True:
    ret, frame = cap.read()  # Read a frame from the video
    if not ret:  # If no frame is read (end of video), break the loop
        break

    count += 1  # Increment frame count
    if count % 3 != 0:  # Process every third frame
        continue

    car_count_left = 0
    bus_count_left = 0
    truck_count_left = 0

    car_count_right = 0
    bus_count_right = 0
    truck_count_right = 0

    frame = cv2.resize(frame, (1020, 500))  # Resize the frame for consistent processing

    # Predict objects in the frame using YOLO model
    results = model.predict(frame)
    detections = results[0].boxes.data
    px = pd.DataFrame(detections).astype(
        "float"
    )

    # Initialize a list to store bounding boxes for each vehicle type
    cars, buses, trucks = [], [], []

    # Iterate over the detection results and categorize them into cars, buses, or trucks
    for index, row in px.iterrows():
        x1 = int(row[0])
        y1 = int(row[1])
        x2 = int(row[2])
        y2 = int(row[3])
        d = int(row[5])
        c = class_list[d]
        if "car" in c:
            cars.append([x1, y1, x2, y2])
        elif "bus" in c:
            buses.append([x1, y1, x2, y2])
        elif "truck" in c:
            trucks.append([x1, y1, x2, y2])

    # Update tracker for each vehicle type
    cars_boxes = tracker.update(cars)
    buses_boxes = tracker.update(buses)
    trucks_boxes = tracker.update(trucks)

    # Check if the bounding box center of each vehicle is inside the polygon
    for bbox in cars_boxes + buses_boxes + trucks_boxes:
        cx = int((bbox[0] + bbox[2]) / 2)  # Vehicle center X
        cy = int((bbox[1] + bbox[3]) / 2)  # Vehicle center Y
        point = Point(cx, cy)  # Create a point object for the vehicle center

        if polygon_left.contains(point):  # Check if the center is inside the polygon
            if "car" in c:
                car_count += 1
                car_count_left += 1
            elif "bus" in c:
                bus_count += 1
                bus_count_left += 1
            elif "truck" in c:
                truck_count += 1
                truck_count_left += 1

        if polygon_right.contains(point):  # Check if the center is inside the polygon
            if "car" in c:
                car_count += 1
                car_count_right += 1
            elif "bus" in c:
                bus_count += 1
                bus_count_right += 1
            elif "truck" in c:
                truck_count += 1
                truck_count_right += 1

    total_left = car_count_left + bus_count_left + truck_count_left
    total_right = car_count_right + bus_count_right + truck_count_right

    # Draw the polygon on the frame
    pts_left = np.array(polygon_points_left, np.int32)
    pts_right = np.array(polygon_points_right, np.int32)
    cv2.polylines(
        frame,
        [pts_left],
        isClosed=True,
        color=(red if total_left > 10 else orange if total_left > 5 else white),
        thickness=2,
    )
    cv2.polylines(
        frame,
        [pts_right],
        isClosed=True,
        color=(red if total_left > 10 else orange if total_left > 5 else white),
        thickness=2,
    )

    cv2.putText(
        frame,
        f"Car count (left): {car_count_left}",
        (10, 30),
        cv2.FONT_HERSHEY_SIMPLEX,
        1,
        (0, 255, 0),
        2,
    )
    cv2.putText(
        frame,
        f"Bus count (left): {bus_count_left}",
        (10, 60),
        cv2.FONT_HERSHEY_SIMPLEX,
        1,
        (0, 255, 0),
        2,
    )
    cv2.putText(
        frame,
        f"Truck count (left): {truck_count_left}",
        (10, 90),
        cv2.FONT_HERSHEY_SIMPLEX,
        1,
        (0, 255, 0),
        2,
    )

    cv2.putText(
        frame,
        f"Car count (right): {car_count_right}",
        (650, 30),
        cv2.FONT_HERSHEY_SIMPLEX,
        1,
        (0, 255, 0),
        2,
    )

    cv2.putText(
        frame,
        f"Bus count (right): {bus_count_right}",
        (650, 60),
        cv2.FONT_HERSHEY_SIMPLEX,
        1,
        (0, 255, 0),
        2,
    )

    cv2.putText(
        frame,
        f"Truck count (right): {truck_count_right}",
        (650, 90),
        cv2.FONT_HERSHEY_SIMPLEX,
        1,
        (0, 255, 0),
        2,
    )

    # Draw and annotate each vehicle
    for bbox in cars_boxes + buses_boxes + trucks_boxes:
        cx = int((bbox[0] + bbox[2]) / 2)  # Vehicle center X
        cy = int((bbox[1] + bbox[3]) / 2)  # Vehicle center Y
        point = Point(cx, cy)  # Create a point object for the vehicle center

        if polygon_left.contains(point) or polygon_right.contains(point):
            cv2.rectangle(
                frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 0, 255), 2
            )
            cvzone.putTextRect(frame, f"{bbox[4]}", (bbox[0], bbox[1]), 1, 1)

    cv2.imshow("Traffic Congestion using Synthetic Data from GTA V", frame)
    if cv2.waitKey(1) & 0xFF == 27:
        break

    # Write frame to the output file
    out.write(frame)




0: 320x640 9 cars, 1 truck, 145.0ms
Speed: 1.0ms preprocess, 145.0ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 10 cars, 1 truck, 88.8ms
Speed: 2.0ms preprocess, 88.8ms inference, 1.5ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 12 cars, 1 motorcycle, 1 truck, 88.7ms
Speed: 1.0ms preprocess, 88.7ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 12 cars, 1 truck, 87.9ms
Speed: 1.0ms preprocess, 87.9ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 10 cars, 1 motorcycle, 1 truck, 84.4ms
Speed: 1.0ms preprocess, 84.4ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 11 cars, 1 motorcycle, 1 truck, 84.6ms
Speed: 2.0ms preprocess, 84.6ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 10 cars, 1 motorcycle, 1 truck, 85.4ms
Speed: 2.0ms preprocess, 85.4ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 32

In [26]:
out.release()
cap.release()
cv2.destroyAllWindows()