# task 
Computer Vision Engineer Intern Practical Test: Scene-Based Video
Segmentation

wget https://pjreddie.com/media/files/yolov3.weights
wget https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg


In [14]:
import cv2
import numpy as np
import os

# Load pre-trained YOLO model for people detection
def load_yolo():
    # Ensure the paths to the YOLO files are correct
    net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
    
    # Debugging output to check the layers
    layer_names = net.getLayerNames()
    unconnected_out_layers = net.getUnconnectedOutLayers()
    print("Unconnected Out Layers:", unconnected_out_layers)
    
    # Convert to integer if the layer index is a scalar
    if isinstance(unconnected_out_layers[0], (list, tuple, np.ndarray)):
        output_layers = [layer_names[i[0] - 1] for i in unconnected_out_layers]
    else:
        output_layers = [layer_names[i - 1] for i in unconnected_out_layers]
    
    return net, output_layers

# Simple heuristic to classify scene as indoor or outdoor using color histograms
def classify_scene(frame):
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [1], None, [256], [0, 256])
    # Heuristic: If the peak of the histogram is in the lower half, assume indoor (less green)
    peak_hue = np.argmax(hist)
    return "outdoor" if peak_hue < 128 else "indoor"

# Detect people in a frame using YOLO
def detect_people(frame, net, output_layers):
    height, width = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)
    outs = net.forward(output_layers)

    class_ids = []
    confidences = []
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if class_id == 0 and confidence > 0.5:  # 0 is the class ID for 'person' in COCO dataset
                confidences.append(float(confidence))
    return len(confidences) > 0

# Process the video and segment it based on categories
def segment_video(video_path, output_dir):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Error opening video file.")
        return
    
    net, output_layers = load_yolo()
    frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
    current_scene = None

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Classify scene and detect people
        scene = classify_scene(frame)
        people_present = detect_people(frame, net, output_layers)

        # Here, you would segment and save the video based on the classifications
        # This is a simplified approach just to illustrate the concept
        print(f"Scene: {scene}, People Present: {people_present}")
    
    cap.release()
    cv2.destroyAllWindows()

# Example usage
segment_video('task people with outdoor.mp4', 'output_segments')


Unconnected Out Layers: [200 227 254]
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True


Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
Scene: outdoor, People Present: True
S