In [None]:
%pip install ultralytics

In [None]:

%pip install git+https://github.com/facebookresearch/segment-anything-2.git

# Just using YOLO (opening webcam)

In [31]:
import cv2
import time

# Run inference on webcam
# Note: This will open a separate window to display the video feed
cap = cv2.VideoCapture(0)

prev_frame_time = time.perf_counter()

if not cap.isOpened():
    print("Error: Could not open webcam")
else:
    print("Webcam opened successfully!")
    print("A separate window will open showing the detection results.")
    print("Press 'q' in the video window to quit, or interrupt the kernel to stop")
    
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                print("Failed to grab frame")
                break

            new_frame_time = time.perf_counter()
            fps = 1 / (new_frame_time - prev_frame_time)
            prev_frame_time = new_frame_time
            
            # Run inference
            results = yolo_model(frame)

            cv2.putText(frame, f"FPS: {fps}", (10, 30), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

            # Draw results on frame
            annotated_frame = results[0].plot()
            
            # Display the frame in a separate window
            cv2.imshow('Tennis Ball Detection', annotated_frame)
            cv2.plot
            
            # Break loop on 'q' key press (make sure the video window is focused)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                print("Stopping...")
                break
    except KeyboardInterrupt:
        print("\nInterrupted by user")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        # Always clean up, even if interrupted
        cap.release()
        cv2.destroyAllWindows()
        print("Webcam released and windows closed")


Webcam opened successfully!
A separate window will open showing the detection results.
Press 'q' in the video window to quit, or interrupt the kernel to stop
Stopping...
Webcam released and windows closed


# Import sam2 (not ousing anymore)

In [None]:
import torch
import os
from sam2.build_sam import build_sam2
from sam2.sam2_image_predictor import SAM2ImagePredictor

# 1. Define paths
current_dir = os.getcwd()
local_config_path = os.path.join(current_dir, "sam2", "sam2_hiera_t.yaml")
checkpoint_path = os.path.join(current_dir, "sam2", "sam2_hiera_tiny.pt")

# 2. Verify files exist
if not os.path.exists(local_config_path):
    raise FileNotFoundError(f"Config not found at: {local_config_path}")
if not os.path.exists(checkpoint_path):
    raise FileNotFoundError(f"Checkpoint not found at: {checkpoint_path}")

print("Loading SAM2 model...")

# 3. Build video predictor in one step (builds model + creates predictor)
# This is simpler than: build_sam2() then SAM2VideoPredictor()
sam2_model = build_sam2(
    config_file=local_config_path,
    ckpt_path=checkpoint_path,
    device="cuda" if torch.cuda.is_available() else "cpu"
)

sam2_predictor = SAM2ImagePredictor(sam2_model)

print("SAM2 model loaded successfully!")




In [26]:
from ultralytics import YOLO
import cv2
import time
import numpy as np

yolo_model = YOLO("weights/best (1).pt")
TRACKER_TYPE = "MOSSE"          # CSRT is best for accuracy on CPU

# Open webcam and run tracking

In [32]:
# Setup Camera
from time import perf_counter


# --- TENNIS BALL COLOR DEFINITION (HSV) ---
# You might need to tune these for your specific lighting!
# "Optic Yellow" is usually around Hue 30-50
LOWER_GREEN = np.array([17, 35, 6])
UPPER_GREEN = np.array([64, 255, 255])

def is_track_good(frame, bbox):
    """
    Verifies if the tracker's bbox likely contains a tennis ball.
    Returns: True if good, False if bad.
    """
    x, y, w, h = [int(v) for v in bbox]


    # COLOR CHECK (The most important one)
    # Extract the image inside the box
    roi = frame[y:y+h, x:x+w]
    if roi.size == 0: return False
    
    # Convert to HSV and create a mask for green/yellow
    hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
    mask = cv2.inRange(hsv_roi, LOWER_GREEN, UPPER_GREEN)
    
    # Count how many pixels are "ball colored"
    ball_pixels = cv2.countNonZero(mask)
    total_pixels = w * h
    
    # If less than 50% of the box is green, we lost it.
    confidence_proxy = ball_pixels / total_pixels
    
    if confidence_proxy < 0.05: 
        return False
        
    return True


cap = cv2.VideoCapture(0)
if not cap.isOpened():
    raise IOError("Cannot open webcam")

# State variables
tracker = None
tracking_active = False
prev_frame_time = time.perf_counter()

print("üéæ Tennis Tracker Started. Press 'q' to quit.")

while True:
    ret, frame = cap.read()
    if not ret:
        break




    new_frame_time = time.perf_counter()
    fps = 1 / (new_frame_time - prev_frame_time)
    prev_frame_time = new_frame_time

    cv2.putText(frame, f"FPS: {fps}", (10, 30), 
        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    # We use a flag to decide if we need to run YOLO this frame
    # By default, if we are tracking, we assume we don't need YOLO yet
    run_yolo = not tracking_active

    # === PHASE 1: TRY TRACKING ===
    if tracking_active:

        start_time = time.perf_counter()

        success, box = tracker.update(frame)
        
        end_time = time.perf_counter()

        print(f"CSRT tracking took {end_time - start_time}ms")

        if success:
            if is_track_good(frame, box):
                # Tracker is happy
                x, y, w, h = [int(v) for v in box]
                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                cv2.putText(frame, "CSRT TRACKER", (x, y - 10), 
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
            else:
                # Tracker says "True", but our check says "That's not a ball!"
                print("‚ö†Ô∏è Tracker drifted (Color/Shape mismatch). Resetting...")
                tracking_active = False
                tracker = None
                run_yolo = True 
        else:
            # Tracker FAILED this frame (ball moved too fast or occlusion)
            print("Tracking failed! Switching to YOLO immediate recovery...")
            tracking_active = False
            tracker = None
            run_yolo = True # Force YOLO to run on THIS frame

    # === PHASE 2: SEARCHING (YOLO) ===
    # This runs if we weren't tracking, OR if tracking just failed above
    if run_yolo:

        start_time = time.perf_counter()

        results = yolo_model(frame, verbose=False)

        end_time = time.perf_counter()

        print(f"YOLO took: {end_time - start_time} ms")
        
        best_box = None
        max_conf = 0.0
        
        for r in results:
            boxes = r.boxes
            for box in boxes:
                # Change class_id to 0 if using your custom trained model
                # Change to 32 if using standard YOLOv8n (sports ball)
                class_id = int(box.cls[0])
                conf = float(box.conf[0])
                
                # Filter for tennis ball (Class 0 usually for custom)
                if class_id == 0 and conf > 0.5:
                    if conf > max_conf:
                        max_conf = conf
                        best_box = box.xyxy[0].cpu().numpy()

        if best_box is not None:
            # Ball found! Initialize tracker for next frame
            x1, y1, x2, y2 = best_box
            w = x2 - x1
            h = y2 - y1
            
            # Create a new tracker instance
            if TRACKER_TYPE == "CSRT":
                tracker = cv2.legacy.TrackerCSRT_create()
            elif TRACKER_TYPE == "MOSSE":
                tracker = cv2.legacy.TrackerMOSSE_create()
            elif TRACKER_TYPE == "KCF":
                tracker = cv2.legacy.TrackerKCF_create()
            else:
                raise IOError("Unrecognized tracker type")  
            
            tracker.init(frame, (int(x1), int(y1), int(w), int(h)))
            tracking_active = True
            
            # Visual feedback for detection
            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
            cv2.putText(frame, f"YOLO DETECT ({max_conf:.2f})", (int(x1), int(y1) - 10), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
        else:
            cv2.putText(frame, "Searching...", (20, 50), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 165, 255), 2)

    cv2.imshow("Tennis Tracker (Auto-Recovery)", frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

üéæ Tennis Tracker Started. Press 'q' to quit.
YOLO took: 0.06600510003045201 ms
CSRT tracking took 0.0006688000285066664ms
CSRT tracking took 0.0009870999492704868ms
CSRT tracking took 0.0006343999993987381ms
CSRT tracking took 0.0006212000153027475ms
CSRT tracking took 0.0009681999799795449ms
CSRT tracking took 0.001018799957819283ms
CSRT tracking took 0.0005769000272266567ms
CSRT tracking took 0.000561599968932569ms
CSRT tracking took 0.0005091000348329544ms
CSRT tracking took 0.0007527000270783901ms
CSRT tracking took 0.0009466999908909202ms
CSRT tracking took 0.0009480000007897615ms
CSRT tracking took 0.0006864999886602163ms
CSRT tracking took 0.0005799999926239252ms
CSRT tracking took 0.0006210000137798488ms
CSRT tracking took 0.0007166999857872725ms
CSRT tracking took 0.0007542999810539186ms
CSRT tracking took 0.0005905000143684447ms
CSRT tracking took 0.0007351000094786286ms
CSRT tracking took 0.0008869000012055039ms
CSRT tracking took 0.0008700999896973372ms
CSRT tracking too

# HSV Upper and Lower bound calibration

In [None]:
import cv2
import numpy as np

def nothing(x):
    pass

# Create a window
cv2.namedWindow('HSV Tuner')

# Create trackbars for color change
cv2.createTrackbar('H Min', 'HSV Tuner', 17, 179, nothing)
cv2.createTrackbar('S Min', 'HSV Tuner', 35, 255, nothing)
cv2.createTrackbar('V Min', 'HSV Tuner', 6, 255, nothing)
cv2.createTrackbar('H Max', 'HSV Tuner', 64, 179, nothing)
cv2.createTrackbar('S Max', 'HSV Tuner', 255, 255, nothing)
cv2.createTrackbar('V Max', 'HSV Tuner', 255, 255, nothing)

cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret: break

    # Convert to HSV
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)

    # Get current positions of trackbars
    hMin = cv2.getTrackbarPos('H Min', 'HSV Tuner')
    sMin = cv2.getTrackbarPos('S Min', 'HSV Tuner')
    vMin = cv2.getTrackbarPos('V Min', 'HSV Tuner')
    hMax = cv2.getTrackbarPos('H Max', 'HSV Tuner')
    sMax = cv2.getTrackbarPos('S Max', 'HSV Tuner')
    vMax = cv2.getTrackbarPos('V Max', 'HSV Tuner')

    lower = np.array([hMin, sMin, vMin])
    upper = np.array([hMax, sMax, vMax])

    # Create Mask
    mask = cv2.inRange(hsv, lower, upper)
    result = cv2.bitwise_and(frame, frame, mask=mask)

    cv2.imshow('HSV Tuner', result)
    
    print(f"LOWER: [{hMin},{sMin},{vMin}]  UPPER: [{hMax},{sMax},{vMax}]", end='\r')
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()