#### Extrating frames

In [None]:
import argparse
import queue
import pandas as pd 
import pickle
import imutils
import os
from PIL import Image, ImageDraw
import cv2 
import numpy as np
import torch
import sys
import time
from bisect import bisect_right
from sktime.datatypes._panel._convert import from_2d_array_to_nested
from court_detector import CourtDetector
from TrackPlayers.trackplayers import *
from utils import get_video_properties, get_dtype
from detection import *  # provides diff_xy, remove_outliers, interpolation
from pickle import load

# -------------------------------
# Parse command-line arguments
# -------------------------------
parser = argparse.ArgumentParser()
parser.add_argument("--input_video_path", default="/home/akash/ws/personal/sportsAI/src/dataset_prep/tennis-tracking/VideoInput/video_input2.mp4", type=str)
parser.add_argument("--output_video_path", type=str, default="")
parser.add_argument("--minimap", type=int, default=0)
parser.add_argument("--bounce", type=int, default=0)
args = parser.parse_args("")

input_video_path = args.input_video_path
output_video_path = args.output_video_path
minimap = args.minimap
bounce = args.bounce

# -------------------------------
# File paths and YOLO parameters
# -------------------------------
yolo_classes = 'Yolov3/yolov3.txt'
yolo_weights = 'Yolov3/yolov3.weights'
yolo_config  = 'Yolov3/yolov3.cfg'
CONF_THRESHOLD = 0.5

if output_video_path == "":
    output_video_path = input_video_path.split('.')[0] + "VideoOutput/video_output.mp4"

# -------------------------------
# Open input video and get properties
# -------------------------------
video = cv2.VideoCapture(input_video_path)
if not video.isOpened():
    raise FileNotFoundError(f"Could not open video: {input_video_path}")

fps = int(video.get(cv2.CAP_PROP_FPS))
output_width  = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
output_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
total = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

# -------------------------------
# Set up output video writer
# -------------------------------
fourcc = cv2.VideoWriter_fourcc(*'XVID')
output_video = cv2.VideoWriter(output_video_path, fourcc, fps, (output_width, output_height))

# -------------------------------
# Load YOLO network and labels
# -------------------------------
with open(yolo_classes) as f:
    LABELS = [line.strip() for line in f.readlines()]

net = cv2.dnn.readNet(yolo_weights, yolo_config)
layer_names = net.getLayerNames()
print(net.getUnconnectedOutLayers())
unconnected = net.getUnconnectedOutLayers()
# Check the shape and index accordingly:
if len(unconnected.shape) == 2:
    output_layers = [layer_names[i[0] - 1] for i in unconnected]
else:
    output_layers = [layer_names[i - 1] for i in unconnected]

# -------------------------------
# Initialize court and player detectors
# -------------------------------
court_detector = CourtDetector()
dtype = get_dtype()
detection_model = DetectionModel(dtype=dtype)

# Get video properties for later resizing
fps, length, v_width, v_height = get_video_properties(video)

# -------------------------------
# Process video to detect court and players (first pass)
# -------------------------------
frames = []
frame_i = 0
print(">>>>>>>>Extrating frames")
while True:
    ret, frame = video.read()
    frame_i += 1
    if not ret:
        break

    if frame_i == 1:
        lines = court_detector.detect(frame)
    else:
        lines = court_detector.track_court(frame)
    detection_model.detect_player_1(frame, court_detector)
    detection_model.detect_top_persons(frame, court_detector, frame_i)
    
    # Draw detected court lines
    for i in range(0, len(lines), 4):
        x1, y1, x2, y2 = lines[i], lines[i+1], lines[i+2], lines[i+3]
        cv2.line(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 5)
    
    new_frame = cv2.resize(frame, (v_width, v_height))
    frames.append(new_frame)
    

video.release()
print(">>>>>.frame extraction completed!!!")
detection_model.find_player_2_box()
player1_boxes = detection_model.player_1_boxes
player2_boxes = detection_model.player_2_boxes

# -------------------------------
# Second pass: Detect ball using YOLO (without TrackNet)
# -------------------------------
video = cv2.VideoCapture(input_video_path)
currentFrame = 0
coords = []  # Store ball center [x, y] per frame (or None if not detected)
t = []       # Timestamps for each frame

# Use a fixed-length deque to draw a short trail
q = queue.deque([None]*8, maxlen=8)
last_time = time.time()
print(">>>>>>>deetction started")
while True:
    ret, frame = video.read()
    if not ret:
        break

    # Prepare blob and run YOLO forward pass
    blob = cv2.dnn.blobFromImage(frame, scalefactor=1/255.0, size=(416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    detections = net.forward(output_layers)

    ball_center = None
    H, W = frame.shape[:2]
    # Loop over each detection from each output layer
    for output in detections:
        for detection in output:
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]
            # Check if the detected object is a sports ball and passes confidence threshold.
            if LABELS[classID].lower() in ["sports ball", "tennis ball"] and confidence > CONF_THRESHOLD:
                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width_box, height_box) = box.astype("int")
                # Convert center coordinates from YOLO (center format) to top-left format if needed.
                # Here we simply use the provided center coordinates.
                ball_center = [centerX, centerY]
                break
        if ball_center is not None:
            break

    # Append detection result and update deque for drawing
    coords.append(ball_center)
    q.appendleft(ball_center)
    t.append(time.time() - last_time)

    # Optionally, mark player boxes
    output_img = frame.copy()
    output_img = mark_player_box(output_img, player1_boxes, currentFrame)
    output_img = mark_player_box(output_img, player2_boxes, currentFrame)

    # Draw ball detection trail on the frame
    PIL_image = cv2.cvtColor(output_img, cv2.COLOR_BGR2RGB)
    PIL_image = Image.fromarray(PIL_image)
    for pt in q:
        if pt is not None:
            bbox = (pt[0] - 2, pt[1] - 2, pt[0] + 2, pt[1] + 2)
            draw = ImageDraw.Draw(PIL_image)
            draw.ellipse(bbox, outline='yellow')
            del draw

    opencvImage = cv2.cvtColor(np.array(PIL_image), cv2.COLOR_RGB2BGR)
    cv2.imwrite(f"./saved_frames/frame_{currentFrame:04d}.png" , opencvImage)
    output_video.write(opencvImage)
    currentFrame += 1
    print(f"Detecting frame: {currentFrame}")
    

video.release()
output_video.release()



In [None]:
coords

In [2]:
coords2 = coords.copy()

#### Interpolating points to avoid occlusion

In [3]:
# -------------------------------
# Post-processing: Compute distance, velocity, and direction
# -------------------------------
# Remove outliers and interpolate missing ball positions (diff_xy, remove_outliers, interpolation from detection module)
x_vals, y_vals = diff_xy(coords)
remove_outliers(x_vals, y_vals, coords)
coords = interpolation(coords)



#### Calculating distance

In [5]:

# Compute step distances (Euclidean distance between consecutive frames)
step_distances = []
for i in range(len(coords) - 1):
    if coords[i] is not None and coords[i+1] is not None:
        dx = coords[i+1][0] - coords[i][0]
        dy = coords[i+1][1] - coords[i][1]
        step_distances.append(np.sqrt(dx**2 + dy**2))
    else:
        step_distances.append(0.0)

# Compute cumulative distance (prefix sum)
prefix_sum = [0.0]
current_sum = 0.0
for dist in step_distances:
    current_sum += dist
    prefix_sum.append(current_sum)

# Compute velocity components per frame
Vx, Vy, V = [], [], []
for i in range(len(coords)-1):
    if coords[i] is not None and coords[i+1] is not None:
        delta_time = t[i+1] - t[i]
        delta_time = delta_time if delta_time != 0 else 1e-5
        vx = (coords[i+1][0] - coords[i][0]) / delta_time
        vy = (coords[i+1][1] - coords[i][1]) / delta_time
        Vx.append(vx)
        Vy.append(vy)
        V.append(np.sqrt(vx**2 + vy**2))
    else:
        Vx.append(0.0)
        Vy.append(0.0)
        V.append(0.0)

# Pad velocities to match number of frames
padded_Vx = [0.0] + Vx
padded_Vy = [0.0] + Vy
padded_V  = [0.0] + V

#### creating dataframe

In [None]:
# Determine direction of ball movement relative to players.
directions = []
for i in range(len(coords)):
    if coords[i] is None:
        directions.append("unknown")
        continue
    ball_x, ball_y = coords[i]
    vx = padded_Vx[i]
    vy = padded_Vy[i]
    # Get player boxes for the current frame (assumes these lists are as long as frame count)
    p1_box = player1_boxes[i] if i < len(player1_boxes) else None
    p2_box = player2_boxes[i] if i < len(player2_boxes) else None
    if p1_box is None or p2_box is None:
        directions.append("unknown")
        continue
    p1_center = ((p1_box[0] + p1_box[2]) / 2, (p1_box[1] + p1_box[3]) / 2)
    # print(p2_box)
    try:
        p2_center = ((p2_box[0] + p2_box[2]) / 2, (p2_box[1] + p2_box[3]) / 2)
    except:
        pass
    d1 = np.sqrt((ball_x - p1_center[0])**2 + (ball_y - p1_center[1])**2)
    d2 = np.sqrt((ball_x - p2_center[0])**2 + (ball_y - p2_center[1])**2)
    closest_center = p1_center if d1 < d2 else p2_center
    vector_to_player = (ball_x - closest_center[0], ball_y - closest_center[1])
    velocity_vector = (vx, vy)
    dot_product = vector_to_player[0]*velocity_vector[0] + vector_to_player[1]*velocity_vector[1]
    directions.append('outgoing' if dot_product > 0 else 'incoming')

# -------------------------------
# Compute distance traveled since last hit using a heuristic based on player proximity
# -------------------------------
hits = []
threshold = 500  # threshold distance for a hit event (adjust as needed)
for i in range(1, len(directions)):
    if directions[i-1] == 'incoming' and directions[i] == 'outgoing' and coords[i] is not None:
        ball_x, ball_y = coords[i]
        p1_box = player1_boxes[i] if i < len(player1_boxes) else None
        p2_box = player2_boxes[i] if i < len(player2_boxes) else None
        if p1_box is None or p2_box is None:
            continue
        p1_center = ((p1_box[0] + p1_box[2]) / 2, (p1_box[1] + p1_box[3]) / 2)
        try:
            p2_center = ((p2_box[0] + p2_box[2]) / 2, (p2_box[1] + p2_box[3]) / 2)
        except:
            pass
        d1 = np.sqrt((ball_x - p1_center[0])**2 + (ball_y - p1_center[1])**2)
        d2 = np.sqrt((ball_x - p2_center[0])**2 + (ball_y - p2_center[1])**2)
        print(">>>>>>",d2)
        if d1 < threshold or d2 < threshold:
            hits.append({'frame': i, 'player': 1 if d1 < d2 else 2})

hit_frames = sorted([hit['frame'] for hit in hits])
distance_traveled = []
for i in range(len(coords)):
    idx = bisect_right(hit_frames, i) - 1
    if idx >= 0:
        last_hit = hit_frames[idx]
        distance_traveled_i = prefix_sum[i] - prefix_sum[last_hit]
    else:
        distance_traveled_i = 0.0
    distance_traveled.append(distance_traveled_i)

# -------------------------------
# Save results to CSV files
# -------------------------------
frame_data = []
for i in range(len(coords)):
    # In case of missing detection, fill with NaN or a placeholder.
    ball_pos = coords[i] if coords[i] is not None else [np.nan, np.nan]
    frame_info = {
        'frame': i,
        'x': ball_pos[0],
        'y': ball_pos[1],
        'vx': padded_Vx[i],
        'vy': padded_Vy[i],
        'speed': padded_V[i],
        'direction': 0 if directions[i]=="incoming" else 1,
        'distance_since_last_hit': distance_traveled[i]
    }
    frame_data.append(frame_info)

hits_data = [{'frame': hit['frame'], 'player': hit['player']} for hit in hits]

pd.DataFrame(frame_data).to_csv('ball_data.csv', index=False)
pd.DataFrame(hits_data).to_csv('hits.csv', index=False)

print("Processing complete. Ball data saved to 'ball_data.csv' and hit events to 'hits.csv'.")


#### Shot recognition

In [39]:

from argparse import ArgumentParser
import tensorflow as tf

import sys
import cv2
import numpy as np
import pandas as pd
from tensorflow import keras

sys.path.append("../tennis_shot_recognition")



In [40]:
physical_devices = tf.config.experimental.list_physical_devices("GPU")
print(tf.config.experimental.list_physical_devices("GPU"))
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
        print("Memory growth enabled for GPU:", physical_devices[0])
    except RuntimeError as e:
        print(e)
else:
    print("No GPU detected. Running on CPU.")
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices("GPU")))

from extract_human_pose import HumanPoseExtractor



[]
No GPU detected. Running on CPU.
Num GPUs Available:  0


In [41]:
from track_and_classify_with_rnn import GT, draw_probs

In [35]:
class ShotCounter:
    """Basic shot counter with a shot history"""

    MIN_FRAMES_BETWEEN_SHOTS = 10

    BAR_WIDTH = 30
    BAR_HEIGHT = 170
    MARGIN_ABOVE_BAR = 30
    SPACE_BETWEEN_BARS = 55
    TEXT_ORIGIN_X = 1075
    BAR_ORIGIN_X = 1070

    def __init__(self):
        self.nb_history = 10  # best history size IMO
        self.probs = np.zeros((self.nb_history, 4))

        self.nb_forehands = 0
        self.nb_backhands = 0
        self.nb_serves = 0

        self.last_shot = "neutral"
        self.frames_since_last_shot = self.MIN_FRAMES_BETWEEN_SHOTS

        self.results = []

    def update(self, probs, frame_id):
        """
        Update current state with new shots probabilities
        If one of the probability is over 50%, it can be considered as reliable
        We need at least MIN_FRAMES_BETWEEN_SHOTS frames between two shots (backhand/forehand/serve)
        Between each shot, we should normally go through a "neutral state" meaning that the player
        is not currently hitting the ball
        """

        self.probs[0 : self.nb_history - 1, :] = self.probs[1:, :].copy()
        self.probs[-1, :] = probs

        self.frames_since_last_shot += 1

        means = np.mean(self.probs, axis=0)
        if means[0] > 0.5:
            # backhand currently
            if (
                self.last_shot == "neutral"
                and self.frames_since_last_shot > self.MIN_FRAMES_BETWEEN_SHOTS
            ):
                self.nb_backhands += 1
                self.last_shot = "backhand"
                self.frames_since_last_shot = 0
                self.results.append({"FrameID": frame_id, "Shot": self.last_shot})
        elif means[1] > 0.5:
            # forehand currently
            if (
                self.last_shot == "neutral"
                and self.frames_since_last_shot > self.MIN_FRAMES_BETWEEN_SHOTS
            ):
                self.nb_forehands += 1
                self.last_shot = "forehand"
                self.frames_since_last_shot = 0
                self.results.append({"FrameID": frame_id, "Shot": self.last_shot})
        elif means[2] > 0.5:
            # neutral currently
            self.last_shot = "neutral"
        elif means[3] > 0.5:
            # serve currently
            if (
                self.last_shot == "neutral"
                and self.frames_since_last_shot > self.MIN_FRAMES_BETWEEN_SHOTS
            ):
                self.nb_serves += 1
                self.last_shot = "serve"
                self.frames_since_last_shot = 0
                self.results.append({"FrameID": frame_id, "Shot": self.last_shot})

    def display(self, frame):
        """
        Display shot count
        Colorize last shot in green
        """

        cv2.putText(
            frame,
            f"Backhands = {self.nb_backhands}",
            (20, frame.shape[0] - 100),
            cv2.FONT_HERSHEY_SIMPLEX,
            fontScale=1,
            color=(0, 255, 0)
            if (self.last_shot == "backhand" and self.frames_since_last_shot < 30)
            else (0, 0, 255),
            thickness=2,
        )
        cv2.putText(
            frame,
            f"Forehands = {self.nb_forehands}",
            (20, frame.shape[0] - 60),
            cv2.FONT_HERSHEY_SIMPLEX,
            fontScale=1,
            color=(0, 255, 0)
            if (self.last_shot == "forehand" and self.frames_since_last_shot < 30)
            else (0, 0, 255),
            thickness=2,
        )
        cv2.putText(
            frame,
            f"Serves = {self.nb_serves}",
            (20, frame.shape[0] - 20),
            cv2.FONT_HERSHEY_SIMPLEX,
            fontScale=1,
            color=(0, 255, 0)
            if (self.last_shot == "serve" and self.frames_since_last_shot < 30)
            else (0, 0, 255),
            thickness=2,
        )

In [36]:
def compute_recall_precision(gt, shots):
    """
    Give some metrics to assess current performances, like
    how many shots were missed (recall) or were false positives (precision)
    """
    gt_numpy = gt.to_numpy()
    nb_match = 0
    nb_misses = 0
    nb_fp = 0
    fp_backhands = 0
    fp_forehands = 0
    fp_serves = 0
    for gt_shot in gt_numpy:
        found_match = False
        for shot in shots:
            if shot["Shot"] == gt_shot[0]:
                if abs(shot["FrameID"] - gt_shot[1]) <= 30:
                    found_match = True
                    break
        if found_match:
            nb_match += 1
        else:
            nb_misses += 1

    for shot in shots:
        found_match = False
        for gt_shot in gt_numpy:
            if shot["Shot"] == gt_shot[0]:
                if abs(shot["FrameID"] - gt_shot[1]) <= 30:
                    found_match = True
                    break
        if not found_match:
            nb_fp += 1
            if shot["Shot"] == "backhand":
                fp_backhands += 1
            elif shot["Shot"] == "forehand":
                fp_forehands += 1
            elif shot["Shot"] == "serve":
                fp_serves += 1

    precision = nb_match / (nb_match + nb_fp)
    recall = nb_match / (nb_match + nb_misses)

    print(f"Recall {recall*100:.1f}%")
    print(f"Precision {precision*100:.1f}%")

    print(
        f"FP: backhands = {fp_backhands}, forehands = {fp_forehands}, serves = {fp_serves}"
    )

In [37]:
# ! wget -q -O movenet.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/tflite/float16/4?lite-format=tflite

In [None]:
if __name__ == "__main__":
    parser = ArgumentParser(
        description="Track tennis player and display shot probabilities"
    )
    parser.add_argument("video", default="/home/akash/ws/personal/sportsAI/src/dataset_prep/tennis-tracking/VideoInput/video_input2.mp4")
    parser.add_argument("model",default="/home/akash/ws/personal/sportsAI/src/dataset_prep/tennis_shot_recognition/tennis_fully_connected.h5")
    parser.add_argument("--evaluate", help="Path to annotation file")
    parser.add_argument("-f", type=int, help="Forward to")
    args = parser.parse_args(["/home/akash/ws/personal/sportsAI/src/dataset_prep/tennis-tracking/VideoInput/video_input2.mp4", "/home/akash/ws/personal/sportsAI/src/dataset_prep/tennis_shot_recognition/tennis_fully_connected.h5"])

    shot_counter = ShotCounter()

    if args.evaluate is not None:
        gt = GT(args.evaluate)

    m1 = keras.models.load_model(args.model)

    cap = cv2.VideoCapture(args.video)

    assert cap.isOpened()

    ret, frame = cap.read()

    human_pose_extractor = HumanPoseExtractor(frame.shape)

    FRAME_ID = 0

    while cap.isOpened():
        ret, frame = cap.read()

        if not ret:
            break

        FRAME_ID += 1

        if args.f is not None and FRAME_ID < args.f:
            continue

        assert frame is not None

        human_pose_extractor.extract(frame)

        # dont draw non-significant points/edges by setting probability to 0
        human_pose_extractor.discard(["left_eye", "right_eye", "left_ear", "right_ear"])

        features = human_pose_extractor.keypoints_with_scores.reshape(17, 3)
        features = features[features[:, 2] > 0][:, 0:2].reshape(1, 13 * 2)

        # start = time.time()
        probs = (
            m1.__call__(features)[0] if human_pose_extractor.roi.valid else np.zeros(4)
        )
        # end = time.time()
        # print("predict from features", end - start)

        shot_counter.update(probs, FRAME_ID)

        draw_probs(frame, np.mean(shot_counter.probs, axis=0))
        shot_counter.display(frame)
        # draw_probs(frame, [probs[0], probs[1], probs[2], 0])

        if args.evaluate is not None:
            gt.display(frame, FRAME_ID)

        # Display results on original frame
        human_pose_extractor.draw_results_frame(frame)
        if (
            shot_counter.frames_since_last_shot < 30
            and shot_counter.last_shot != "neutral"
        ):
            human_pose_extractor.roi.draw_shot(frame, shot_counter.last_shot)

        # Display results on original frame
        human_pose_extractor.draw_results_frame(frame)
        # cv2.imshow("Frame", frame)
        human_pose_extractor.roi.update(human_pose_extractor.keypoints_pixels_frame)

        # cv2.imwrite(f"videos/image_{FRAME_ID:05d}.png", frame)

        # k = cv2.waitKey(0)
        # if k == 27:
        #     break

    # cap.release()
    # cv2.destroyAllWindows()

    print(shot_counter.results)




{'FrameID': 26, 'Shot': 'backhand'}, {'FrameID': 175, 'Shot': 'forehand'}, {'FrameID': 339, 'Shot': 'forehand'}, {'FrameID': 426, 'Shot': 'forehand'}, {'FrameID': 527, 'Shot': 'forehand'}, {'FrameID': 812, 'Shot': 'forehand'}

In [20]:
def process_video(
    video_path="/home/akash/ws/personal/sportsAI/src/dataset_prep/tennis-tracking/VideoInput/video_input2.mp4",
    model_path="/home/akash/ws/personal/sportsAI/src/dataset_prep/tennis_shot_recognition/tennis_fully_connected.h5",
    evaluate_path=None,
    forward_frame=0
):
    """
    Process the video to track the tennis player and detect shots.
    
    Parameters:
        video_path (str): Path to the input video.
        model_path (str): Path to the trained model.
        evaluate_path (str or None): Optional path to the annotation file.
        forward_frame (int): Frame number to start processing from.
        
    Returns:
        list: A list of tuples with (frame number, shot type) for detected shots.
    """
    # Initialize the shot counter.
    shot_counter = ShotCounter()

    # Optional ground truth evaluation.
    if evaluate_path is not None:
        gt = GT(evaluate_path)

    # Load the model.
    m1 = keras.models.load_model(model_path)

    # Open the video.
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise IOError(f"Cannot open video: {video_path}")

    # Read the first frame to initialize the human pose extractor.
    ret, frame = cap.read()
    if not ret or frame is None:
        raise RuntimeError("Unable to read the first frame of the video")
    human_pose_extractor = HumanPoseExtractor(frame.shape)

    FRAME_ID = 0

    # Process video frame by frame.
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        FRAME_ID += 1

        # Skip frames if a starting frame is specified.
        if FRAME_ID < forward_frame:
            continue

        # Extract pose and filter out less-significant keypoints.
        human_pose_extractor.extract(frame)
        human_pose_extractor.discard(["left_eye", "right_eye", "left_ear", "right_ear"])

        # Prepare features (assuming 13 keypoints with x, y coordinates).
        features = human_pose_extractor.keypoints_with_scores.reshape(17, 3)
        # Select keypoints with non-zero probability and then reshape to a flat array.
        features = features[features[:, 2] > 0][:, 0:2].reshape(1, 13 * 2)

        # Get shot probabilities if the ROI is valid.
        probs = (
            m1(features)[0] if human_pose_extractor.roi.valid else np.zeros(4)
        )

        # Update shot counter with current probabilities.
        shot_counter.update(probs, FRAME_ID)

        # (Optional) Evaluate and display can be inserted here if needed.
        # But for returning results, we omit drawing and displaying.

        # Update ROI based on current frame keypoints.
        human_pose_extractor.roi.update(human_pose_extractor.keypoints_pixels_frame)

    cap.release()
    
    # shot_counter.results is assumed to be a list of (frame, shot type) tuples.
    # You can modify this extraction logic if your ShotCounter stores results differently.
    return shot_counter.results

In [None]:
# Example usage:
if __name__ == "__main__":
    results = process_video()
    for frame_number, shot_type in results:
        print(f"Frame {frame_number}: {shot_type}")
