In [1]:
# Import necessary packages
import numpy as np
import time
import cv2
import os

# Mount Google Drive (if using Google Colab)
from google.colab import drive
drive.mount('/content/drive')

# Define paths
input_video_path = "/content/drive/My Drive/overpass.mp4"
output_video_path = "/content/drive/My Drive/output/overpass_output.avi"
yolo_folder_path = "/content/drive/My Drive/yolo-coco"

# Create output folder if it doesn’t exist
if not os.path.exists("/content/drive/My Drive/output"):
    os.makedirs("/content/drive/My Drive/output")

# Create YOLO folder and download files if they don’t exist
if not os.path.exists(yolo_folder_path):
    os.makedirs(yolo_folder_path)

    # Download YOLO files if not found in yolo-coco directory
    !wget -O "{yolo_folder_path}/yolov3.cfg" https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg
    !wget -O "{yolo_folder_path}/yolov3.weights" https://pjreddie.com/media/files/yolov3.weights
    !wget -O "{yolo_folder_path}/coco.names" https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names

# Set paths to YOLO files
args = {
    "input": input_video_path,
    "output": output_video_path,
    "yolo": yolo_folder_path,
    "confidence": 0.5,
    "threshold": 0.3
}

# Load the COCO class labels YOLO was trained on
labelsPath = os.path.sep.join([args["yolo"], "coco.names"])
LABELS = open(labelsPath).read().strip().split("\n")

# Initialize colors for each class label
np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")

# Paths to YOLO weights and config
weightsPath = os.path.sep.join([args["yolo"], "yolov3.weights"])
configPath = os.path.sep.join([args["yolo"], "yolov3.cfg"])

# Load YOLO object detector
print("[INFO] Loading YOLO from disk...")
net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)
ln = net.getLayerNames()
try:
    ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
except IndexError:
    ln = [ln[i - 1] for i in net.getUnconnectedOutLayers()]

# Initialize video stream
vs = cv2.VideoCapture(args["input"])

# Check if the video was opened successfully
if not vs.isOpened():
    print("[ERROR] Could not open video file.")
else:
    print("[INFO] Video file opened successfully.")

# Try reading the first frame to confirm successful capture
(grabbed, frame) = vs.read()
if grabbed:
    print("[INFO] First frame read successfully.")
    print(f"[INFO] Frame dimensions: {frame.shape[1]}x{frame.shape[0]}")
else:
    print("[ERROR] Could not read the first frame. Check the video file.")

# Proceed only if the video was successfully opened and the first frame was read
if not grabbed:
    vs.release()
    raise Exception("Failed to capture video. Ensure the video path is correct and accessible.")

writer = None
(W, H) = (None, None)
frame_count = 0

# Try to determine the total number of frames
try:
    total = int(vs.get(cv2.CAP_PROP_FRAME_COUNT))
    print(f"[INFO] {total} total frames in video")
except:
    print("[INFO] Could not determine # of frames in video")
    total = -1

# Loop over frames from video
while True:
    (grabbed, frame) = vs.read()
    if not grabbed:
        break

    frame_count += 1
    print(f"[INFO] Processing frame {frame_count}")

    if W is None or H is None:
        (H, W) = frame.shape[:2]

    # YOLO forward pass
    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    layerOutputs = net.forward(ln)

    boxes = []
    confidences = []
    classIDs = []

    # Process detections
    for output in layerOutputs:
        for detection in output:
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]

            if confidence > args["confidence"]:
                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")

                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))

                boxes.append([x, y, int(width), int(height)])
                confidences.append(float(confidence))
                classIDs.append(classID)

    idxs = cv2.dnn.NMSBoxes(boxes, confidences, args["confidence"], args["threshold"])

    if len(idxs) > 0:
        for i in idxs.flatten():
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])

            color = [int(c) for c in COLORS[classIDs[i]]]
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            text = f"{LABELS[classIDs[i]]}: {confidences[i]:.4f}"
            cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    if writer is None:
        fourcc = cv2.VideoWriter_fourcc(*"MJPG")
        writer = cv2.VideoWriter(args["output"], fourcc, 30, (W, H), True)
        print("[INFO] VideoWriter initialized")

    writer.write(frame)

# Release pointers
print("[INFO] Cleaning up...")
writer.release()
vs.release()


Mounted at /content/drive
[INFO] Loading YOLO from disk...
[INFO] Video file opened successfully.
[INFO] First frame read successfully.
[INFO] Frame dimensions: 1280x720
[INFO] 812 total frames in video
[INFO] Processing frame 1
[INFO] VideoWriter initialized
[INFO] Processing frame 2
[INFO] Processing frame 3
[INFO] Processing frame 4
[INFO] Processing frame 5
[INFO] Processing frame 6
[INFO] Processing frame 7
[INFO] Processing frame 8
[INFO] Processing frame 9
[INFO] Processing frame 10
[INFO] Processing frame 11
[INFO] Processing frame 12
[INFO] Processing frame 13
[INFO] Processing frame 14
[INFO] Processing frame 15
[INFO] Processing frame 16
[INFO] Processing frame 17
[INFO] Processing frame 18
[INFO] Processing frame 19
[INFO] Processing frame 20
[INFO] Processing frame 21
[INFO] Processing frame 22
[INFO] Processing frame 23
[INFO] Processing frame 24
[INFO] Processing frame 25
[INFO] Processing frame 26
[INFO] Processing frame 27
[INFO] Processing frame 28
[INFO] Processing f