In [1]:
import cv2
import numpy as np
import json

# Load the YOLOv3 model
net = cv2.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights')

# Load the COCO class labels
with open('coco_classes.txt', 'r') as f:
    classes = [line.strip() for line in f.readlines()]

# Read the input video clip
video = cv2.VideoCapture('Intersection.mp4')

# Initialize variables
frame_id = 0
detected_objects = []

while True:
    # Read the current frame
    ret, frame = video.read()
    if not ret:
        break

    # Perform object detection
    blob = cv2.dnn.blobFromImage(frame, 1/255, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    output_layers_names = net.getUnconnectedOutLayersNames()
    layer_outputs = net.forward(output_layers_names)

    # Process the detections
    for output in layer_outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            if confidence > 0.5:  # Minimum confidence threshold
                # Calculate the bounding box coordinates
                box = detection[0:4] * np.array([frame.shape[1], frame.shape[0], frame.shape[1], frame.shape[0]])
                (center_x, center_y, width, height) = box.astype("int")
                x = int(center_x - (width / 2))
                y = int(center_y - (height / 2))

                # Save the detected object information
                detected_object = {
                    'FrameID': frame_id,
                    'Class': classes[class_id],
                    'BoundingBox': [x, y, int(width), int(height)],
                    'Confidence': float(confidence)
                }
                detected_objects.append(detected_object)

    frame_id += 1

# Release the video capture and destroy any remaining windows
video.release()
cv2.destroyAllWindows()

# Save the detected information to a JSON file
output_file = 'detected_objects.json'
with open(output_file, 'w') as f:
    json.dump(detected_objects, f)

print(f"Detected objects saved to {output_file}")


Detected objects saved to detected_objects.json


In [2]:
import cv2
import json

# Read the JSON file with detected object information
with open('detected_objects.json', 'r') as f:
    detected_objects = json.load(f)

# Read the input video clip
video = cv2.VideoCapture('Intersection.mp4')

# Get the video's frame rate and dimensions
fps = video.get(cv2.CAP_PROP_FPS)
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Create a VideoWriter object to save the annotated video
output_file = 'output_clip.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
output_video = cv2.VideoWriter(output_file, fourcc, fps, (width, height))

# Process each frame of the input video
frame_id = 0
while True:
    ret, frame = video.read()
    if not ret:
        break

    # Check if there are detections for the current frame
    detections = [obj for obj in detected_objects if obj['FrameID'] == frame_id]
    if len(detections) > 0:
        for detection in detections:
            class_name = detection['Class']
            bbox = detection['BoundingBox']
            confidence = detection['Confidence']

            # Draw bounding box and label on the frame
            x, y, w, h = bbox
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
            label = f"{class_name}: {confidence:.2f}"
            cv2.putText(frame, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Write the annotated frame to the output video
    output_video.write(frame)

    # Display the annotated frame
    cv2.imshow('Annotated Video', frame)
    if cv2.waitKey(1) == ord('q'):
        break

    frame_id += 1

# Release the video capture, video writer, and close any remaining windows
video.release()
output_video.release()
cv2.destroyAllWindows()

print(f"Annotated video saved to {output_file}")


Annotated video saved to output_clip.mp4
