In [22]:
import cv2
import torch
from ultralytics import YOLO
import numpy as np
import copy

# Load YOLO model (modify this for your model path)
model = YOLO("/home/stevenyang/Desktop/cmput469/YOLO/runs/detect/crowd_counting_v11/weights/best.pt")

# Open input video
input_video = "/home/stevenyang/Desktop/cmput469/YOLO/Moose_Jaw/MJ1.mov"
cap = cv2.VideoCapture(input_video)

fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define output video writer
output_video = "output.mov"
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(output_video, fourcc, fps, (width, height))

# Process each frame
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    org_frame = copy.copy(frame)
    
    # Separate top and bottom halves
    top_half = frame[:height//2, :]
    bottom_half = frame[height//2:, :]
    
    # Apply different contrast for top and bottom
    alpha_top = 0.8  # Contrast control for top
    alpha_bottom = 1.2  # Contrast control for bottom
    beta = 100  # Brightness control
    
    top_half = cv2.convertScaleAbs(top_half, alpha=alpha_top, beta=beta)
    bottom_half = cv2.convertScaleAbs(bottom_half, alpha=alpha_bottom, beta=beta)
    
    # Merge adjusted halves back
    frame[:height//2, :] = top_half
    frame[height//2:, :] = bottom_half
    
    # Run YOLO detection
    results = model(frame)
    
    # Count the number of detections
    total_detections = sum(len(result.boxes) for result in results)
    
    # Draw bounding boxes in blue with brightness reflecting confidence
    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            conf = float(box.conf[0])
            brightness = int(255 * conf)  # Adjust brightness based on confidence
            color = (brightness, brightness, 255)  # Blue tone
            cv2.rectangle(org_frame, (x1 + 2, y1 + 2), (x2 - 2, y2 - 2), color, 1)  # Narrow box
    
    # Display count on the top left corner
    cv2.putText(org_frame, f"Count: {total_detections}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
    
    # Write frame to output video
    out.write(org_frame)

# Release resources
cap.release()
out.release()
cv2.destroyAllWindows()



0: 736x1280 8 pps, 51.7ms
Speed: 6.0ms preprocess, 51.7ms inference, 0.9ms postprocess per image at shape (1, 3, 736, 1280)

0: 736x1280 9 pps, 45.7ms
Speed: 4.5ms preprocess, 45.7ms inference, 1.7ms postprocess per image at shape (1, 3, 736, 1280)

0: 736x1280 9 pps, 37.6ms
Speed: 5.8ms preprocess, 37.6ms inference, 1.2ms postprocess per image at shape (1, 3, 736, 1280)

0: 736x1280 9 pps, 37.8ms
Speed: 4.5ms preprocess, 37.8ms inference, 2.1ms postprocess per image at shape (1, 3, 736, 1280)

0: 736x1280 7 pps, 37.6ms
Speed: 5.7ms preprocess, 37.6ms inference, 2.1ms postprocess per image at shape (1, 3, 736, 1280)

0: 736x1280 7 pps, 37.4ms
Speed: 4.2ms preprocess, 37.4ms inference, 1.0ms postprocess per image at shape (1, 3, 736, 1280)

0: 736x1280 10 pps, 38.4ms
Speed: 4.9ms preprocess, 38.4ms inference, 1.0ms postprocess per image at shape (1, 3, 736, 1280)

0: 736x1280 10 pps, 39.2ms
Speed: 4.3ms preprocess, 39.2ms inference, 1.0ms postprocess per image at shape (1, 3, 736, 1280

KeyboardInterrupt: 

In [2]:
import cv2
import torch
from ultralytics import YOLO
import numpy as np
import copy

# Load YOLO model (modify this for your model path)
# model = YOLO("/home/stevenyang/Desktop/cmput469/YOLO/runs/detect/crowd_counting_v11/weights/best.pt")
model = YOLO("yolo11s.pt")
# Open input video
input_video = "/home/stevenyang/Desktop/cmput469/YOLO/Moose_Jaw/MJ1.mov"
cap = cv2.VideoCapture(input_video)

# Get video properties
fps = int(cap.get(cv2.CAP_PROP_FPS))
print(fps)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define output video writer
output_video = "output.mov"
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(output_video, fourcc, fps, (width, height))

# Process each frame
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    org_frame = copy.copy(frame)

    alpha = 1.3  # Contrast control for top
    beta = 100  # Brightness control
    
    # Apply Histogram Equalization
    frame_yuv = cv2.cvtColor(frame, cv2.COLOR_BGR2YUV)
    frame_yuv[:, :, 0] = cv2.equalizeHist(frame_yuv[:, :, 0])
    
    # Apply Sharpening
    kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])

    # Apply Gamma Correction
    gamma = 1.2  # Adjust gamma value as needed
    look_up_table = np.array([((i / 255.0) ** (1.0 / gamma)) * 255 for i in range(256)]).astype("uint8")
    

    # frame = cv2.cvtColor(frame_yuv, cv2.COLOR_YUV2BGR)
    frame = cv2.convertScaleAbs(frame, alpha=alpha, beta=beta)
    frame = cv2.filter2D(frame, -1, kernel)
    # frame = cv2.LUT(frame, look_up_table)


    
    # Run YOLO detection
    results = model(frame)
    
    # Count the number of detections
    total_detections = sum(len(result.boxes) for result in results)
    
    # Draw bounding boxes in blue with brightness reflecting confidence
    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            conf = float(box.conf[0])
            if conf > 0.3:
                brightness = int(255 * conf)  # Adjust brightness based on confidence
                color = (0,0, conf*255)  # Blue tone
                cv2.rectangle(org_frame, (x1 + 2, y1 + 2), (x2 - 2, y2 - 2), color, 1)  # Narrow box
    
    # Display count on the top left corner
    cv2.putText(org_frame, f"Count: {total_detections}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
    
    # Write frame to output video
    out.write(org_frame)

# Release resources
cap.release()
out.release()
cv2.destroyAllWindows()


30

0: 384x640 7 persons, 26.4ms
Speed: 3.4ms preprocess, 26.4ms inference, 352.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 17.1ms
Speed: 1.9ms preprocess, 17.1ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 persons, 17.1ms
Speed: 1.7ms preprocess, 17.1ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 persons, 1 tv, 17.1ms
Speed: 1.6ms preprocess, 17.1ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 17.3ms
Speed: 2.6ms preprocess, 17.3ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 persons, 17.2ms
Speed: 3.0ms preprocess, 17.2ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 1 sink, 17.0ms
Speed: 1.6ms preprocess, 17.0ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 1 tv, 17.1ms
Speed: 1.6ms preprocess, 17.1ms inference, 1.5ms post