In [1]:
import cv2
import numpy as np
import os

In [2]:
def load_yolo_model(cfg_path, weights_path, names_path):
    net = cv2.dnn.readNet(weights_path, cfg_path)
    with open(names_path, 'r') as f:
        classes = f.read().strip().split('\n')
    return net, classes

# Paths to YOLO files
cfg_path = 'yolov3.cfg'
weights_path = 'yolov3.weights'
names_path = 'coco.names'

# Load YOLO model
yolo_model, classes = load_yolo_model(cfg_path, weights_path, names_path)
print("YOLO model loaded.")

YOLO model loaded.


In [3]:
def detect_objects(net, classes, frame, conf_threshold=0.5, nms_threshold=0.4):
    blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    layer_names = net.getLayerNames()
    output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
    detections = net.forward(output_layers)

    boxes = []
    confidences = []
    class_ids = []
    h, w = frame.shape[:2]

    for output in detections:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > conf_threshold:
                center_x = int(detection[0] * w)
                center_y = int(detection[1] * h)
                width = int(detection[2] * w)
                height = int(detection[3] * h)
                x = int(center_x - width / 2)
                y = int(center_y - height / 2)
                boxes.append([x, y, x + width, y + height])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
    filtered_boxes = []
    if len(indices) > 0:
        for i in indices.flatten():
            box = boxes[i]
            filtered_boxes.append(box + [confidences[i], class_ids[i]])

    return filtered_boxes

In [1]:
def process_video(video_path, yolo_model, classes, output_dir, target_fps=20):
    cap = cv2.VideoCapture(video_path)
    frame_features = []
    box_features_list = []
    detections = []
    labels = []
    frame_ids = []

    frame_count = 0
    saved_frame_indices = [0, 10, 20, 30, 40]  # Indices of frames to save with bounding boxes

    original_fps = cap.get(cv2.CAP_PROP_FPS)
    frame_interval = int(original_fps / target_fps)

    while cap.isOpened() and frame_count < 50:
        ret, frame = cap.read()
        if not ret:
            break

        if frame_count % frame_interval == 0:
            # Extract frame-level features (using a CNN, here we just flatten the frame as a placeholder)
            frame_feature = cv2.resize(frame, (224, 224)).flatten()[:4096]  # Resize to ensure it fits 4096 elements
            frame_features.append(frame_feature)

            # Detect objects
            boxes = detect_objects(yolo_model, classes, frame)
            if len(boxes) > 19:
                boxes = boxes[:19]  # Ensure only 19 boxes are considered

            # Create box-level features (using the same placeholder approach for now)
            box_features = []
            for box in boxes:
                x1, y1, x2, y2, _, _ = box
                box_frame = frame[y1:y2, x1:x2]
                if box_frame.size > 0:
                    box_feature = cv2.resize(box_frame, (64, 64)).flatten()[:4096]
                    box_features.append(box_feature)
                else:
                    box_features.append(np.zeros(4096))  # In case of invalid box

            while len(box_features) < 19:
                box_features.append(np.zeros(4096))  # Pad with zero features if less than 19 boxes

            box_features_list.append(box_features)
            # Convert boxes to a fixed size array
            while len(boxes) < 19:
                boxes.append([0, 0, 0, 0, 0.0, -1])  # Pad with default values if less than 19 boxes
            detections.append(np.array(boxes))

            # Save frames with bounding boxes
            if len(frame_features) - 1 in saved_frame_indices:
                for box in boxes:
                    x1, y1, x2, y2, confidence, class_id = box
                    label = f"{classes[class_id]}: {confidence:.2f}"
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
                output_frame_path = os.path.join(output_dir, f"frame_{len(frame_features) - 1}.jpg")
                cv2.imwrite(output_frame_path, frame)

            # Assign a label (placeholder, replace with actual logic)
            label = [1, 0] if len(frame_features) % 2 == 0 else [0, 1]  # Replace with actual condition
            labels.append(label)

            # Collect frame IDs (placeholder, replace with actual video name logic)
            frame_ids.append(f'frame_{len(frame_features) - 1}')  # Replace with actual frame ID or video ID logic

        frame_count += 1

    cap.release()

    # Ensure we have exactly 50 frames
    while len(frame_features) < 50:
        frame_features.append(np.zeros(4096))
        detections.append(np.array([[0, 0, 0, 0, 0.0, -1]] * 19))
        box_features_list.append([np.zeros(4096)] * 19)
        labels.append([0, 1])
        frame_ids.append(f'frame_{len(frame_features)}')

    # Convert lists to numpy arrays
    frame_features = np.array(frame_features).reshape((50, 1, 4096))
    box_features = np.array(box_features_list).reshape((50, 19, 4096))
    detections = np.array(detections).reshape((50, 19, 6))
    labels = np.array(labels)
    frame_ids = np.array(frame_ids)

    # Combine frame-level and box-level features
    combined_features = np.concatenate((frame_features, box_features), axis=1)

    return combined_features, detections, labels, frame_ids

In [2]:
# Directory to save frames with bounding boxes
output_dir = 'output_frames'
os.makedirs(output_dir, exist_ok=True)

NameError: name 'os' is not defined

In [6]:
# Process the video
video_path = 'input_video.mp4'
data, det, labels, ID = process_video(video_path, yolo_model, classes, output_dir, target_fps=20)

In [7]:
# Print the sizes of the arrays
print("Size of data:", data.shape)
print("Size of det:", det.shape)
print("Size of labels:", labels.shape)
print("Size of ID:", ID.shape)

Size of data: (50, 20, 4096)
Size of det: (50, 19, 6)
Size of labels: (50, 2)
Size of ID: (50,)


In [8]:
# Save to .npz file
np.savez('output_data.npz', data=data, det=det, labels=labels, ID=ID)
print("NPZ file successfully created.")

NPZ file successfully created.


In [9]:
def read_npz_file(npz_file_path):
    # Load the .npz file
    npzfile = np.load(npz_file_path)

    # Print the sizes of the available arrays
    for array_name in npzfile.files:
        array = npzfile[array_name]
        print(f"Size of {array_name}: {array.shape}")

# Path to the .npz file
npz_file_path = 'output_data.npz'

# Read and print sizes of arrays in the .npz file
read_npz_file(npz_file_path)

Size of data: (50, 20, 4096)
Size of det: (50, 19, 6)
Size of labels: (50, 2)
Size of ID: (50,)
