In [1]:
%pip install opencv-python opencv-python-headless numpy pytube


Collecting opencv-python-headless
  Downloading opencv_python_headless-4.10.0.84-cp37-abi3-win_amd64.whl.metadata (20 kB)
Downloading opencv_python_headless-4.10.0.84-cp37-abi3-win_amd64.whl (38.8 MB)
   ---------------------------------------- 0.0/38.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/38.8 MB ? eta -:--:--
   ---------------------------------------- 0.1/38.8 MB 812.7 kB/s eta 0:00:48
   ---------------------------------------- 0.1/38.8 MB 1.2 MB/s eta 0:00:33
   ---------------------------------------- 0.2/38.8 MB 1.4 MB/s eta 0:00:29
   ---------------------------------------- 0.3/38.8 MB 1.5 MB/s eta 0:00:26
   ---------------------------------------- 0.3/38.8 MB 1.4 MB/s eta 0:00:27
   ---------------------------------------- 0.4/38.8 MB 1.2 MB/s eta 0:00:32
   ---------------------------------------- 0.5/38.8 MB 1.4 MB/s eta 0:00:28
    --------------------------------------- 0.5/38.8 MB 1.3 MB/s eta 0:00:30
    -------------------------------------


[notice] A new release of pip is available: 24.0 -> 24.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import cv2
import numpy as np
import os

# Function to load YOLO model
def load_yolo_model():
    net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
    with open("coco.names", "r") as f:
        classes = [line.strip() for line in f.readlines()]
    layer_names = net.getLayerNames()
    output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
    return net, classes, output_layers

# Function to detect objects in a frame
def detect_objects(net, output_layers, frame):
    height, width, channels = frame.shape
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)
    outs = net.forward(output_layers)
    
    class_ids = []
    confidences = []
    boxes = []

    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)
                
    return boxes, confidences, class_ids

# Function to save cropped objects
def save_cropped_objects(frame, boxes, output_path, frame_idx):
    for idx, (x, y, w, h) in enumerate(boxes):
        crop = frame[y:y+h, x:x+w]
        filename = os.path.join(output_path, f"frame{frame_idx}_obj{idx}.jpg")
        cv2.imwrite(filename, crop)

def main(video_path, output_path):
    net, classes, output_layers = load_yolo_model()
    cap = cv2.VideoCapture(video_path)
    frame_idx = 0

    if not os.path.exists(output_path):
        os.makedirs(output_path)
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        boxes, confidences, class_ids = detect_objects(net, output_layers, frame)
        save_cropped_objects(frame, boxes, output_path, frame_idx)
        frame_idx += 1

    cap.release()

# Path to the video file and output folder
video_path = 'path_to_your_video.mp4'
output_path = 'cropped_objects'

# Run the main function
main(video_path, output_path)
