In [2]:
import cv2
import json
import torch
import torchvision
import torchvision.transforms as T
from PIL import Image
import numpy as np

# -----------------
# STEP 1: Select ROI from first video frame
# -----------------
roi_points = []

def draw_points(event, x, y, flags, param):
    global roi_points, frame_copy
    if event == cv2.EVENT_LBUTTONDOWN:
        roi_points.append((x, y))
        cv2.circle(frame_copy, (x, y), 5, (0, 0, 255), -1)
        cv2.imshow("ROI Selector", frame_copy)

# Load video and capture first frame
video_path = "/Users/farahalhanaya/computer-vision-project-mawqif/models_training/istockphoto-845341376-640_adpp_is.mp4"  # change this
cap = cv2.VideoCapture(video_path)
ret, frame = cap.read()
cap.release()

frame_copy = frame.copy()
cv2.imshow("ROI Selector", frame_copy)
cv2.setMouseCallback("ROI Selector", draw_points)

print("👉 Left-click to add ROI points, ENTER to save, ESC to cancel")

while True:
    key = cv2.waitKey(0)
    if key == 13:  # ENTER key
        break
    elif key == 27:  # ESC key
        roi_points = []
        print("❌ ROI selection cancelled")
        break

cv2.destroyAllWindows()

# Save ROI points
if roi_points:
    with open("roi_points.json", "w") as f:
        json.dump(roi_points, f)
    print("✅ ROI points saved:", roi_points)

    # Show ROI drawn
    roi_preview = frame.copy()
    cv2.polylines(roi_preview, [np.array(roi_points, np.int32)], True, (0, 0, 255), 3)
    cv2.imshow("ROI Preview", roi_preview)
    cv2.waitKey(2000)
    cv2.destroyAllWindows()
else:
    print("No ROI saved, exiting...")
    exit()

# -----------------
# STEP 2: Load trained model
# -----------------

def load_model(weights_path="/Users/farahalhanaya/computer-vision-project-mawqif/faster_rcnn_car (1).pth"):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=None)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(
        in_features, num_classes=2
    )
    model.load_state_dict(torch.load(weights_path, map_location="cpu"))
    model.eval()
    return model

model = load_model("/Users/farahalhanaya/computer-vision-project-mawqif/faster_rcnn_car (1).pth")

# -----------------
# Helper: ROI violation check
# -----------------
def check_roi_overlap(roi_points, car_box, H, W, threshold=0.2):
    mask = np.zeros((H, W), dtype=np.uint8)
    cv2.fillPoly(mask, [roi_points], 255)

    car_mask = np.zeros((H, W), dtype=np.uint8)
    x1, y1, x2, y2 = map(int, car_box)
    cv2.rectangle(car_mask, (x1, y1), (x2, y2), 255, -1)

    intersection = cv2.bitwise_and(mask, car_mask)
    inter_area = cv2.countNonZero(intersection)
    car_area = (x2 - x1) * (y2 - y1)
    return inter_area / car_area > threshold

# -----------------
# STEP 3: Process video with ROI
# -----------------
with open("roi_points.json") as f:
    roi_points = np.array(json.load(f), dtype=np.int32)

cap = cv2.VideoCapture(video_path)
out = cv2.VideoWriter("output.mp4", cv2.VideoWriter_fourcc(*'mp4v'), 20.0,
                      (int(cap.get(3)), int(cap.get(4))))

transform = T.ToTensor()

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    H, W = frame.shape[:2]

    # Run detection
    img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    img_tensor = transform(img)

    with torch.no_grad():
        outputs = model([img_tensor])

    boxes = outputs[0]["boxes"].numpy()
    scores = outputs[0]["scores"].numpy()
    labels = outputs[0]["labels"].numpy()

    # Draw ROI
    cv2.polylines(frame, [roi_points], True, (0, 0, 255), 3)

    # Draw detections
    for box, score, label in zip(boxes, scores, labels):
        if score > 0.5 and label == 1:
            x1, y1, x2, y2 = map(int, box)
            violation = check_roi_overlap(roi_points, box, H, W)
            color = (0, 0, 255) if violation else (0, 255, 0)
            status = "Violation" if violation else "OK"
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            cv2.putText(frame, f"{status} {score:.2f}", (x1, y1 - 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    out.write(frame)
    cv2.imshow("Video Detection", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
out.release()
cv2.destroyAllWindows()
print("✅ Finished! Video saved as output.mp4")


👉 Left-click to add ROI points, ENTER to save, ESC to cancel
✅ ROI points saved: [(435, 65), (483, 65), (486, 162), (434, 160)]
✅ Finished! Video saved as output.mp4
