In [1]:
import math
import glob
import sys 
import cv2 as cv
import pdb
import numpy as np
import os
import copy

In [2]:
# Load YOLO
net = cv.dnn.readNet("yolov4-p6.weights", "yolov4-p6.cfg")

# Load classes
classes = []

with open("coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]
print(classes)
# Get the indices of the output layers
layer_ids = net.getUnconnectedOutLayers()
layer_names = net.getLayerNames()
# Retrieve the names of the output layers
output_layers = [layer_names[layer_id - 1] for layer_id in layer_ids]

['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


In [3]:
# Create lane polygons
drawed_lanes = [
    np.array([[401, 388], [191, 195], [102, 220], [224, 408]], np.int32),
    np.array([[407, 389], [191, 195], [208, 137], [529, 377]], np.int32),
    np.array([[531, 376], [208, 137], [268, 120], [645, 370]], np.int32),
    np.array([[1177, 305], [1648, 305], [1665, 340], [1177, 388]], np.int32),
    np.array([[1177,390], [1665, 345], [1682, 364], [1235,414]], np.int32),
    np.array([[1235,415], [1660, 366], [1701, 396], [1288,440]], np.int32),
    np.array([[1411, 608], [1878, 839], [1639, 876], [1237, 638]], np.int32),
    np.array([[1237, 638], [1639, 876], [1396, 876], [1075, 660]], np.int32),
    np.array([[1075, 660], [1396, 876], [1183, 875], [944, 686]], np.int32)
]

In [4]:
#Main block that generate all the txt files.
def solve_all_images(images_path, queries_path,drawed_lanes):
    queries = glob.glob(os.path.join(queries_path, "*_*_query.txt"))
    all_queries =[]
    all_images = []

    # Loop through the file list and read in each file
    for filename in queries:
        with open(filename, 'r') as file:
            file_contents = file.readlines()
            all_queries.append(file_contents)
                
    #The local path for the images path from a single game
    images = glob.glob(os.path.join(images_path,"*_*.jpg"))
    for image_path in images:
        image = cv.imread(image_path)
        all_images.append(image)

    #iterate over every image and compute the txt file
    for i in range(0,len(images)):
        image = all_images[i]
        height, width, channels = image.shape

        # Preprocess image
        blob = cv.dnn.blobFromImage(image, 0.00892, (1280, 1280), (0, 0, 0), True, crop=False)

        # Set input to the network
        net.setInput(blob)

        # Run forward pass
        outs = net.forward(output_layers)

        # Initialize lists for bounding boxes, confidences, and class IDs
        boxes = []
        confidences = []
        class_ids = []

        for out in outs:
            for detection in out:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]

                if class_id != 9 and class_id != 58 and class_id != 0:  # Class ID 2 represents cars in COCO dataset
                    # Scale bounding box coordinates to the original image size
                    center_x = int(detection[0] * width)
                    center_y = int(detection[1] * height)
                    w = int(detection[2] * width)
                    h = int(detection[3] * height)

                    # Calculate top-left coordinates of bounding box
                    x = int(center_x - w / 2)
                    y = int(center_y - h / 2)

                    # Store bounding box, confidence, and class ID
                    boxes.append([x, y, w, h])
                    confidences.append(float(confidence))
                    class_ids.append(class_id)

        # Apply non-maximum suppression to remove overlapping bounding boxes
        indexes = cv.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
        # Initialize a list to store occupied lane indices
        occupied_lanes = []

        # Iterate through the lanes
        for lane_index, drawed_lane in enumerate(drawed_lanes):
            # Check if any vehicle overlaps with the current lane
            for box_index in range(len(boxes)):
                if box_index in indexes:
                    x, y, w, h = boxes[box_index]

                    # Calculate the y-coordinate of the lower third of the bounding box
                    lower_third_y = y + int(h * 2 / 3.1)

                    #For lane 4 all the area is representative
                    if lane_index == 3:
                        if cv.pointPolygonTest(drawed_lane, (x + w / 2, y + h / 2), False) >= 0:
                            # Lane is occupied by a vehicle
                            occupied_lanes.append(lane_index + 1)
                            break
                    # Check if the lower third of the bounding box overlaps with the lane
                    else:
                        if cv.pointPolygonTest(drawed_lane, (x + w / 2, lower_third_y), False) >= 0:
                            # Lane is occupied by a vehicle
                            occupied_lanes.append(lane_index + 1)
                            break

        # Get the number of queries and lanes from the corresponding files
        num_queries = int(all_queries[i][0].strip())
        lanes = [int(line.strip()) for line in all_queries[i][1:]]

        # Determine the image prefix based on the image number
        image_num = i + 1
        if image_num <= 30:
            image_prefix = "{:02d}".format((image_num - 1) // 3 + 1)
            query_range = range(1, 4)  # Set query_suffix range to 1-3
            query_suffix = str(query_range[(image_num - 1) % len(query_range)])
        else:
            image_prefix = "{:02d}".format((image_num - 31) // 4 + 11)
            query_range = range(1, 5)  # Set query_suffix range to 1-4
            query_suffix = str(query_range[(image_num - 3) % len(query_range)])
        

        # Construct the output file name
        output_file_name = "results_task1/{}_{}_predicted.txt".format(image_prefix, query_suffix)


        # Create the output .txt file
        with open(output_file_name, "w") as output_file:
            output_file.write(str(num_queries) + "\n")

            for lane in lanes:
                if lane in occupied_lanes:
                    output_file.write(str(lane) + " 1\n")
                else:
                    output_file.write(str(lane) + " 0\n")

In [5]:
images = "test/Task1"
queries = "test/Task1"
solve_all_images(images,queries,drawed_lanes)