In [2]:
import cv2
from ultralytics import YOLO
import numpy as np

# Load the YOLO model
#model = YOLO('yolov5su.pt')  # Improved YOLOv5 model
model = YOLO(r'C:\\Users\\17038\\a_Fall_2024\\vision\\final_project\\runs\detect\\train2\\weights\\best.pt')


In [3]:
# Video file path
video_path = "sample3.mp4"

# Initialize video capture
cap = cv2.VideoCapture(video_path)

# Check if the video file is loaded
if not cap.isOpened():
    print(f"Error: Cannot open video {video_path}")
    exit()

# Get video frame rate and total frames
fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
frame_interval = int(fps)  # Adjust frame_interval dynamically based on the video FPS to process fewer frames for faster results

# Initialize results matrix
results_matrix = []

# Process the video
frame_idx = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break  # Exit loop if no more frames

    # Process every 10th frame
    if frame_idx % frame_interval == 0:
        # Convert frame to RGB (YOLO expects RGB images)
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Perform object detection
        results = model(frame_rgb)

        # Extract timestamp
        timestamp = frame_idx / fps

        # Extract detected objects and save to the results matrix
        frame_results = {"timestamp": timestamp, "detections": []}
        for result in results[0].boxes:
            label = model.names[int(result.cls.cpu())]  # Object class label
            conf = result.conf.cpu().item()  # Confidence score
            box = result.xyxy[0].cpu().numpy().tolist()  # Bounding box [x1, y1, x2, y2]

            # Append detection to frame results
            frame_results["detections"].append({
                "label": label,
                "confidence": conf,
                "box": box
            })

        # Append frame results to the matrix
        results_matrix.append(frame_results)

        # Optional: Print progress
        print(f"Processed frame {frame_idx}/{total_frames}, timestamp: {timestamp:.2f}s")

    # Increment frame index
    frame_idx += 1

# Release video capture
cap.release()

# Print results (or save to file)
for frame_result in results_matrix:
    print(frame_result)

# Optionally save results to a file
import json
with open("results.json", "w") as f:
    json.dump(results_matrix, f, indent=4)



0: 352x640 (no detections), 77.4ms
Speed: 4.0ms preprocess, 77.4ms inference, 0.0ms postprocess per image at shape (1, 3, 352, 640)
Processed frame 0/3513, timestamp: 0.00s

0: 352x640 (no detections), 74.3ms
Speed: 0.0ms preprocess, 74.3ms inference, 1.0ms postprocess per image at shape (1, 3, 352, 640)
Processed frame 30/3513, timestamp: 1.00s

0: 352x640 (no detections), 57.8ms
Speed: 1.0ms preprocess, 57.8ms inference, 0.0ms postprocess per image at shape (1, 3, 352, 640)
Processed frame 60/3513, timestamp: 2.00s

0: 352x640 (no detections), 53.2ms
Speed: 2.3ms preprocess, 53.2ms inference, 0.0ms postprocess per image at shape (1, 3, 352, 640)
Processed frame 90/3513, timestamp: 3.00s

0: 352x640 (no detections), 64.1ms
Speed: 0.0ms preprocess, 64.1ms inference, 0.0ms postprocess per image at shape (1, 3, 352, 640)
Processed frame 120/3513, timestamp: 4.00s

0: 352x640 (no detections), 94.5ms
Speed: 0.0ms preprocess, 94.5ms inference, 0.0ms postprocess per image at shape (1, 3, 35

In [None]:
import cv2

# Prompt user for an animal
animal = input("Enter the name of the animal to find (e.g., 'dog', 'cat'): ").strip()

# Search for all timestamps where the animal appears
timestamps = []
for frame_result in results_matrix:
    for detection in frame_result["detections"]:
        if detection["label"].lower() == animal.lower():
            timestamps.append(frame_result["timestamp"])
            break  # Only log the timestamp once per frame

if timestamps:
    print(f"{animal.capitalize()} found at the following timestamps: {', '.join(f'{t:.2f}' for t in timestamps)} seconds.")

    # Start video playback at the first timestamp
    cap = cv2.VideoCapture(video_path)
    current_index = 0  # Index to track which timestamp we're at

    while cap.isOpened():
        # Get the current timestamp to play
        timestamp = timestamps[current_index]
        cap.set(cv2.CAP_PROP_POS_MSEC, timestamp * 1000)  # Jump to the specified timestamp

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            # Display the frame
            cv2.imshow("Video Playback", frame)

            # Key handling: 'q' to quit, 'n' to skip to the next timestamp
            key = cv2.waitKey(20) & 0xFF
            if key == ord('q'):  # Quit playback
                cap.release()
                cv2.destroyAllWindows()
                exit()
            elif key == ord('n'):  # Skip to the next instance of the animal
                current_index += 1
                if current_index >= len(timestamps):
                    print("No more instances of the animal found.")
                    cap.release()
                    cv2.destroyAllWindows()
                    exit()
                print(f"Skipping to next {animal} at {timestamps[current_index]:.2f} seconds.")
                break  # Break inner loop to jump to the next timestamp
else:
    print(f"{animal.capitalize()} not found in the video.")


: 

In [None]:
# Load the YOLOv5 model directly from the file
# Load YOLOv5 model directly
model = YOLO('yolov5su.pt')
 # The "u" models are available for all sizes:
# yolov5su.pt (small)
# yolov5mu.pt (medium)
# yolov5lu.pt (large)
# yolov5xu.pt (extra-large)