In [1]:
import numpy as np
import cv2

In [2]:

configPath = "yolov3.cfg"
weightsPath = "yolov3 (1).weights"
net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)


In [3]:

output_layer_indices = net.getUnconnectedOutLayers()
layer_names = net.getLayerNames()
output_layer_names = [layer_names[i - 1] for i in output_layer_indices]


In [4]:

video_path = "People Walking Free Stock Footage, Royalty-Free No Copyright Content.mp4"
cap = cv2.VideoCapture(video_path)


In [5]:

labelsPath = 'coco.names'
LABELS = open(labelsPath).read().strip().split("\n")
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")


In [6]:

args = {
    "confidence": 0.5,  
    "threshold": 0.3  
}

frame_skip_factor = 1  

frame_count = 2 

In [7]:

ret, frame = cap.read()
if not ret:
    raise ValueError("Failed to read video frame")
(H, W) = frame.shape[:2]


fourcc = cv2.VideoWriter_fourcc(*'mp4v') 
out = cv2.VideoWriter('output.mp4', fourcc, 30, (W, H))  


In [8]:

while True:
    
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1
    if frame_count % frame_skip_factor != 0:
        continue  

    
    set1 = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416),
                                  swapRB=True, crop=False)

    
    net.setInput(set1)

   
    layerOutputs = net.forward(output_layer_names)

    
    boxes = []
    confidences = []
    classIDs = []
    for output in layerOutputs:
        for detection in output:
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]

            if confidence > args["confidence"]:
                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")
                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))

                boxes.append([x, y, int(width), int(height)])
                confidences.append(float(confidence))
                classIDs.append(classID)

   
    idxs = cv2.dnn.NMSBoxes(boxes, confidences, args["confidence"], args["threshold"])

    
    if len(idxs) > 0:
        for i in idxs.flatten():
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])

            color = [int(c) for c in COLORS[classIDs[i]]]
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i])
            cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,
                        0.5, color, 2)

    
    out.write(frame)

    
    cv2.imshow("Video", frame)
    if cv2.waitKey(2) & 0xFF == ord('x'):
        break

In [9]:
cap.release()
out.release()
cv2.destroyAllWindows()