# Importing Required Libraries


 
Below cell imports essential libraries for object detection using the DETR (DEtection TRansformer) model with PyTorch Lightning. The key libraries include:  

- `numpy` and `pandas` for data handling  
- `torch` and `torchvision` for deep learning operations  
- `pytorch_lightning` for streamlined training and evaluation  
- `transformers` for utilizing the DETR model and its image processor  
- `torch.utils.data.DataLoader` for batching data  
- `requests` and `zipfile` for handling dataset downloads and extraction  
- `os` for file path management  


In [None]:

import numpy as np 
import pandas as pd 
import pytorch_lightning as pl
from transformers import DetrForObjectDetection
import torch
import os
import zipfile
import requests
import torchvision
from transformers import DetrImageProcessor
from torch.utils.data import DataLoader
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor
from transformers import DetrForObjectDetection, DetrImageProcessor
from PIL import Image
import torch
import matplotlib.pyplot as plt
import requests
import requests
import torch
from PIL import Image
import matplotlib.pyplot as plt
import json
import os
import torch
from transformers import DetrForObjectDetection, DetrImageProcessor
import cv2
import torch
import numpy as np
from tqdm import tqdm
from transformers import DetrForObjectDetection, DetrImageProcessor

# Downloading and Extracting the Golf Ball Dataset


This cell downloads the golf ball dataset from Roboflow and extracts it for further use.  

- The dataset is publicly available and downloaded using the `requests` library.  
- The dataset is saved as a zip file at `/content/golf_ball_dataset.zip`.  
- The zip file is then extracted to `/content/golf_ball_dataset` using the `zipfile` module.  

This ensures that the dataset is ready for preprocessing and model training.

In [3]:


dataset_url = "https://universe.roboflow.com/ds/nQn3RG98UA?key=elxUi2Hqxa"
output_path = "/content/golf_ball_dataset.zip"

print("Downloading dataset...")

response = requests.get(dataset_url)
with open(output_path, "wb") as file:
    file.write(response.content)

dataset_dir = "/content/golf_ball_dataset"
print("Extracting dataset...")
with zipfile.ZipFile(output_path, "r") as zip_ref:
    zip_ref.extractall(dataset_dir)


Downloading dataset...
Extracting dataset...


#  Custom COCO Dataset Class for Object Detection


This cell defines a custom dataset class, `CocoDetection`, which extends `torchvision.datasets.CocoDetection` to work with the DETR model.  

- The dataset loads images and annotations from the COCO format (`_annotations.coco.json`).  
- The `DetrImageProcessor` is used to preprocess images and annotations.  
- In the `__getitem__` method:  
  - The image and corresponding annotations are retrieved.  
  - The annotations are structured in a dictionary format (`image_id`, `annotations`).  
  - The processor encodes the image and annotations into tensors.  
  - The processed pixel values and labels are returned.  

This class is essential for loading and preparing data for training the DETR object detection model.

In [4]:


class CocoDetection(torchvision.datasets.CocoDetection):
    def __init__(self, img_folder, processor, train=True):
        ann_file = os.path.join(img_folder, "_annotations.coco.json")
        super().__init__(img_folder, ann_file)
        self.processor = processor

    def __getitem__(self, idx):
        img, target = super().__getitem__(idx)
        image_id = self.ids[idx]
        target = {"image_id": image_id, "annotations": target}
        encoding = self.processor(images=img, annotations=target, return_tensors="pt")
        pixel_values = encoding["pixel_values"].squeeze()
        labels = encoding["labels"][0]
        return pixel_values, labels


# Dataset Preparation and DataLoader Setup


This cell prepares the dataset and creates data loaders for training and validation.  

- The `DetrImageProcessor` from the pre-trained DETR model (`facebook/detr-resnet-50`) is initialized for image and annotation processing.  
- The dataset directories for training (`train_dir`) and validation (`val_dir`) are set based on the extracted dataset.  
- The `CocoDetection` class is used to load the training and validation datasets.  
- A custom `collate_fn` function:  
  - Pads pixel values using the processor for batch processing.  
  - Returns `pixel_values`, `pixel_mask`, and `labels` in a structured format.  
- DataLoaders (`train_loader` and `val_loader`) are created with a batch size of 32, shuffling enabled for training.  

This ensures efficient data loading for training and evaluating the object detection model.

In [5]:


# Data processor
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")

# Dataset directories (accordding to COCO Dataset directory structure)
train_dir = os.path.join(dataset_dir, "train")
val_dir = os.path.join(dataset_dir, "valid")

# Load datasets
train_dataset = CocoDetection(img_folder=train_dir, processor=processor)
val_dataset = CocoDetection(img_folder=val_dir, processor=processor)

# DataLoader
def collate_fn(batch):
    pixel_values = [item[0] for item in batch]
    encoding = processor.pad(pixel_values, return_tensors="pt")
    labels = [item[1] for item in batch]
    return {
        "pixel_values": encoding["pixel_values"],
        "pixel_mask": encoding["pixel_mask"],
        "labels": labels,
    }

train_loader = DataLoader(train_dataset, collate_fn=collate_fn, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, collate_fn=collate_fn, batch_size=32)


preprocessor_config.json:   0%|          | 0.00/290 [00:00<?, ?B/s]

loading annotations into memory...
Done (t=0.17s)
creating index...
index created!
loading annotations into memory...
Done (t=0.02s)
creating index...
index created!


# DETR Model Implementation with PyTorch Lightning


This cell defines a PyTorch Lightning module for training the DETR (DEtection TRansformer) model for object detection.  

### Key Components:  
- **`__init__` Method:**  
  - Initializes a pre-trained `DetrForObjectDetection` model (`facebook/detr-resnet-50`).  
  - The number of labels is set to `num_classes + 1` to account for the "no object" class.  
  - Learning rate (`lr`), backbone learning rate (`lr_backbone`), and weight decay (`weight_decay`) are set as hyperparameters.  

- **`forward` Method:**  
  - Passes `pixel_values` and `pixel_mask` through the DETR model to generate predictions.  

- **`training_step` Method:**  
  - Computes the loss for a batch of images using ground truth labels.  
  - Logs the training loss for monitoring.  

- **`configure_optimizers` Method:**  
  - Uses the AdamW optimizer with different learning rates for the backbone and other parameters.  
  - Parameters in the backbone have a separate learning rate (`lr_backbone`).  

This class provides an optimized framework for training DETR using PyTorch Lightning, enabling efficient model training and logging.

In [6]:


class Detr(pl.LightningModule):
    def __init__(self, lr, lr_backbone, weight_decay, num_classes):
        super().__init__()
        self.model = DetrForObjectDetection.from_pretrained(
            "facebook/detr-resnet-50",
            revision="no_timm",
            num_labels=num_classes + 1,  # Add one for the "no object" class
            ignore_mismatched_sizes=True
        )
        self.lr = lr
        self.lr_backbone = lr_backbone
        self.weight_decay = weight_decay

    def forward(self, pixel_values, pixel_mask):
        return self.model(pixel_values=pixel_values, pixel_mask=pixel_mask)

    def training_step(self, batch, batch_idx):
        outputs = self.model(
            pixel_values=batch["pixel_values"],
            pixel_mask=batch["pixel_mask"],
            labels=batch["labels"]
        )
        loss = outputs.loss
        self.log("training_loss", loss)
        return loss

    def configure_optimizers(self):
        param_dicts = [
            {"params": [p for n, p in self.named_parameters() if "backbone" not in n and p.requires_grad]},
            {"params": [p for n, p in self.named_parameters() if "backbone" in n and p.requires_grad],
             "lr": self.lr_backbone},
        ]
        optimizer = torch.optim.AdamW(param_dicts, lr=self.lr, weight_decay=self.weight_decay)
        return optimizer


#  Model Training Configuration and Execution


This cell sets up and trains the DETR model using PyTorch Lightning.  

### **Key Steps:**
- **Hyperparameters Configuration:**  
  - `num_classes = 1`: The dataset has only one class (golf ball).  
  - `lr = 1e-4`: Learning rate for model parameters.  
  - `lr_backbone = 1e-5`: Lower learning rate for the backbone to prevent overfitting.  
  - `weight_decay = 1e-4`: Regularization term to prevent overfitting.  
  - `max_epochs = 10`: The model will be trained for 10 epochs.  

- **Model Initialization:**  
  - The `Detr` class (previously defined) is instantiated with the given hyperparameters.  

- **Callbacks for Training:**  
  - `ModelCheckpoint`: Saves the best model checkpoint based on `training_loss`.  
  - `LearningRateMonitor`: Logs the learning rate at each step for analysis.  

- **Trainer Setup:**  
  - Uses PyTorch Lightning’s `Trainer` for training.  
  - Runs on a GPU (`accelerator="gpu", devices=1`).  

- **Training Execution:**  
  - `trainer.fit(model, train_loader)`: Starts model training using the training data.  

This cell ensures an efficient and structured training process, with automatic checkpointing and learning rate monitoring.

In [None]:

# parameters
num_classes = 1  #  in this case only one class: golf ball
lr = 1e-4
lr_backbone = 1e-5
weight_decay = 1e-4
max_epochs = 10

# Initializing  the model
model = Detr(lr=lr, lr_backbone=lr_backbone, weight_decay=weight_decay, num_classes=num_classes)

# Callbacks ( exolained in details in above markdown cell.)
checkpoint_callback = ModelCheckpoint(monitor="training_loss", dirpath="./checkpoints", filename="detr-{epoch:02d}-{training_loss:.2f}")
lr_monitor = LearningRateMonitor(logging_interval="step")

# Trainer instance
trainer = Trainer(max_epochs=max_epochs, callbacks=[checkpoint_callback, lr_monitor], accelerator="gpu", devices=1)

# Training the model
trainer.fit(model, train_loader)


config.json:   0%|          | 0.00/6.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/167M [00:00<?, ?B/s]

Some weights of DetrForObjectDetection were not initialized from the model checkpoint at facebook/detr-resnet-50 and are newly initialized because the shapes did not match:
- class_labels_classifier.weight: found shape torch.Size([92, 256]) in the checkpoint and torch.Size([3, 256]) in the model instantiated
- class_labels_classifier.bias: found shape torch.Size([92]) in the checkpoint and torch.Size([3]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

The `max_size` parameter is deprecated and will be removed in v4.26. Please specify in `size['longest_edge'] instead`.


# Saving the Trained Model and Preprocessor Configuration


This class-based implementation ensures the **trained DETR model, processor, and preprocessor configuration** are saved correctly after training.

### **Key Features:**
1. **Class-Based Structure (`ModelSaver`)**:
   - Encapsulates logic for saving the **model, processor, and configuration**.
   - Ensures the **correct paths** are used for saving.

2. **Ensures Directories Exist**:
   - `os.makedirs(self.processor_dir, exist_ok=True)`: Prevents file path errors.

3. **Model and Processor Saving**:
   - `self.model.save_pretrained(self.model_dir)`: Saves the trained DETR model.
   - `self.processor.save_pretrained(self.processor_dir)`: Saves the processor.

4. **Explicit Preprocessor Configuration Saving**:
   - `self._save_processor_config()`: Saves the preprocessor settings (`preprocessor_config.json`) in **JSON format**.

5. **Modular and Reusable**:
   - Can be reused for multiple models without redundant code.



In [None]:


class ModelSaver:
    def __init__(self, model, processor, model_dir="./detr_golf_ball", processor_dir="./processor_golf_ball"):
        """
        Initializes the model saver with paths for saving the trained model and processor.
        
        Args:
        - model (DetrForObjectDetection): Trained DETR model.
        - processor (DetrImageProcessor): Pretrained DETR processor.
        - model_dir (str): Path to save the trained model.
        - processor_dir (str): Path to save the processor.
        """
        self.model = model
        self.processor = processor
        self.model_dir = model_dir
        self.processor_dir = processor_dir
        self.config_save_path = os.path.join(processor_dir, "preprocessor_config.json")

    def save_model_and_processor(self):
        """
        Saves the trained DETR model and processor, including the preprocessor configuration.
        """
        # Ensuring  directories exist
        os.makedirs(self.processor_dir, exist_ok=True)

        # Saving the trained model
        self.model.save_pretrained(self.model_dir)

        # Saving the processor
        self.processor.save_pretrained(self.processor_dir)

        # Saving preprocessor configuration
        self._save_processor_config()

        # Printing confirmation messages
        print(f"Model saved to: {self.model_dir}")
        print(f"Processor saved to: {self.processor_dir}")
        print(f"Preprocessor configuration saved to: {self.config_save_path}")

    def _save_processor_config(self):
        """
        Saves the preprocessor configuration as a JSON file.
        """
        preprocessor_config = self.processor.to_json_string()
        with open(self.config_save_path, "w") as f:
            f.write(preprocessor_config)


model_save_path = "./detr_golf_ball"
processor_save_path = "./processor_golf_ball"

# Load trained model and processor
model = DetrForObjectDetection.from_pretrained(model_save_path)
processor = DetrImageProcessor.from_pretrained(processor_save_path)

# Initialize the model saver and save the model & processor
saver = ModelSaver(model, processor, model_save_path, processor_save_path)
saver.save_model_and_processor()



# Inference Pipeline for Golf Ball Detection_



This class-based implementation provides a modular approach for detecting golf balls in images using a trained DETR model.  

#### **Key Functionalities:**
1. **`__init__` Method**:
   - Loads the pre-trained DETR model and processor.
   - Moves the model to the specified device (`CPU` or `GPU`).
   - Defines colors for bounding box visualization.

2. **`load_image` Method**:
   - Downloads and opens an image from a URL.

3. **`preprocess_image` Method**:
   - Converts the image into a format suitable for DETR.
   - Moves tensors to the correct device.

4. **`infer` Method**:
   - Runs inference on the input image.

5. **`postprocess` Method**:
   - Filters and extracts bounding boxes, confidence scores, and labels.

6. **`plot_predictions` Method**:
   - Draws bounding boxes and class labels on the image.

7. **`detect` Method**:
   - Runs the full detection pipeline: image loading, preprocessing, inference, and visualization.
   - Prints raw detection results.

#### **Example Usage**:
- The model and processor are loaded from saved files.
- The image is fetched from a given URL.
- The detection pipeline is executed, and results are displayed.


In [None]:

class GolfBallDetector:
    def __init__(self, model_path, processor_path, device="cuda" if torch.cuda.is_available() else "cpu"):
        """
        Initializes the Golf Ball Detector.
        
        Args:
        - model_path (str): Path to the saved DETR model.
        - processor_path (str): Path to the saved image processor.
        - device (str): Device to run inference on ("cuda" or "cpu").
        """
        self.device = device
        self.model = DetrForObjectDetection.from_pretrained(model_path).to(self.device)
        self.processor = DetrImageProcessor.from_pretrained(processor_path)
        self.COLORS = [[0.000, 0.447, 0.741], [0.850, 0.325, 0.098], [0.929, 0.694, 0.125]]
        self.model.eval()

    def load_image(self, image_url):
        """
        Downloads and loads an image from a URL.
        
        Args:
        - image_url (str): URL of the image.
        
        Returns:
        - image (PIL Image): Loaded image in RGB format.
        """
        response = requests.get(image_url, stream=True)
        image = Image.open(response.raw).convert("RGB")
        return image

    def preprocess_image(self, image):
        """
        Prepares the image for DETR model inference.
        
        Args:
        - image (PIL Image): Input image.
        
        Returns:
        - inputs (dict): Processed input tensors.
        - target_sizes (torch.Tensor): Image size tensor for post-processing.
        """
        inputs = self.processor(images=image, return_tensors="pt")
        inputs = {k: v.to(self.device) for k, v in inputs.items()}
        target_sizes = torch.tensor([image.size[::-1]]).to(self.device)
        return inputs, target_sizes

    def infer(self, inputs):
        """
        Performs inference using the DETR model.
        
        Args:
        - inputs (dict): Processed image tensors.
        
        Returns:
        - outputs (dict): Raw model outputs.
        """
        with torch.no_grad():
            outputs = self.model(**inputs)
        return outputs

    def postprocess(self, outputs, target_sizes, threshold=0.5):
        """
        Extracts object detection results from model outputs.
        
        Args:
        - outputs (dict): Raw model outputs.
        - target_sizes (torch.Tensor): Image sizes for post-processing.
        - threshold (float): Confidence threshold for filtering detections.
        
        Returns:
        - results (dict): Processed detection results containing boxes, scores, and labels.
        """
        results = self.processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=threshold)[0]
        return results

    def plot_predictions(self, image, boxes, scores, labels):
        """
        Plots the detected bounding boxes on the image.
        
        Args:
        - image (PIL Image): Input image.
        - boxes (list): Bounding boxes of detected objects.
        - scores (list): Confidence scores of detected objects.
        - labels (list): Object class labels.
        """
        plt.figure(figsize=(16, 10))
        plt.imshow(image)
        ax = plt.gca()
        colors = self.COLORS * 100  # Extend color palette

        for box, score, label, color in zip(boxes, scores, labels, colors):
            xmin, ymin, xmax, ymax = box
            ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, color=color, linewidth=3))
            text = f"Golf Ball: {score:.2f}"
            ax.text(xmin, ymin, text, fontsize=15, bbox=dict(facecolor='yellow', alpha=0.5))

        plt.axis("off")
        plt.show()

    def detect(self, image_url, threshold=0.5):
        """
        Runs the full detection pipeline: image loading, preprocessing, inference, post-processing, and visualization.
        
        Args:
        - image_url (str): URL of the image.
        - threshold (float): Confidence threshold for filtering detections.
        """
        print("Loading image...")
        image = self.load_image(image_url)

        print("Preprocessing image...")
        inputs, target_sizes = self.preprocess_image(image)

        print("Performing inference...")
        outputs = self.infer(inputs)

        print("Post-processing results...")
        results = self.postprocess(outputs, target_sizes, threshold)

        if len(results["boxes"]) == 0:
            print("No objects detected.")
        else:
            # getting bounding boxes, scores, and labels
            boxes = results["boxes"].tolist()
            scores = results["scores"].tolist()
            labels = results["labels"].tolist()

            print(f"Detected {len(boxes)} objects.")
            self.plot_predictions(image, boxes, scores, labels)

        #raw detection results
        print("Boxes:", results["boxes"])
        print("Scores:", results["scores"])
        print("Labels:", results["labels"])



In [None]:
# Example Usage
model_path = "./detr_golf_ball"
processor_path = "./processor_golf_ball"
image_url = "https://media.istockphoto.com/id/620963206/ru/%D1%84%D0%BE%D1%82%D0%BE/%D1%8F%D0%BF%D0%BE%D0%BD%D1%81%D0%BA%D0%B0%D1%8F-%D0%B6%D0%B5%D0%BD%D1%89%D0%B8%D0%BD%D0%B0-%D0%B8%D0%B3%D1%80%D0%B0%D1%8E%D1%89%D0%B0%D1%8F-%D0%B2-%D0%B3%D0%BE%D0%BB%D1%8C%D1%84.jpg?s=612x612&w=0&k=20&c=dIrWywIbD9YbCXyHm8N-kV532FJoAOj26Hu4zmTk21g"

detector = GolfBallDetector(model_path, processor_path)
detector.detect(image_url, threshold=0.5)


# Golf Ball Detection and Trajectory Tracking in Video


This implementation uses a **DETR (DEtection TRansformer) model** to detect a **golf ball** in a video, track its trajectory, and save the processed video with annotations. The pipeline is structured as a class-based implementation for **modularity** and **reusability**.

---

### **Key Components:**

#### **1. `GolfBallTracker` Class**
- **`__init__` (Initialization)**  
  - Loads the **trained DETR model** and **image processor**.
  - Sets up **input and output video paths**.
  - Defines **trajectory tracking parameters**, including a **confidence threshold** and **momentum for smoothing movement**.

- **`process_video()` (Main Processing Pipeline)**  
  - Reads frames from the input video.
  - Applies **golf ball detection and trajectory tracking** to each frame.
  - Saves the processed video with bounding boxes and trajectory lines.

- **`detect_and_track()` (Detection & Processing)**  
  - Converts each frame to **RGB format**.
  - Runs **inference using the DETR model**.
  - Extracts **bounding boxes and confidence scores**.
  - Calls `_handle_detection()` if a golf ball is found or `_handle_prediction()` if not.

- **`_handle_detection()` (Bounding Box Processing)**  
  - Selects the **best detection** based on confidence scores.
  - Extracts the **center coordinates** of the detected golf ball.
  - **Tracks movement** and updates trajectory using **velocity smoothing**.
  - Draws **bounding boxes and labels** on the frame.

- **`_handle_prediction()` (Extrapolation for Missing Detections)**  
  - If the golf ball is **not detected**, estimates its **next position** using the **previous velocity**.

- **`_draw_trajectory()` (Visualizing the Ball's Path)**  
  - Connects past detected points to **draw a trajectory line** on the frame.

---

### **2. Video Processing Pipeline**
1. **Load Video**  
   - Reads frames from the input video using OpenCV (`cv2.VideoCapture`).
   
2. **Perform Object Detection**  
   - Uses DETR to **detect the golf ball** in each frame.
   
3. **Trajectory Tracking**  
   - Maintains a **list of previous positions**.
   - Uses **velocity-based motion estimation** for tracking.
   
4. **Draw Bounding Boxes & Trajectory**  
   - Annotates the **golf ball’s position**.
   - Draws a **trailing trajectory** to visualize movement.
   
5. **Save Processed Video**  
   - Writes the annotated frames into an **output video file**.

---


In [None]:


class GolfBallTracker:
    def __init__(self, model_path, processor_path, input_video, output_video, threshold=0.3, momentum=0.8, device=None):
        """
        Initializes the golf ball tracker for object detection and trajectory tracking.

        Args:
        - model_path (str): Path to the trained DETR model.
        - processor_path (str): Path to the pre-trained processor.
        - input_video (str): Path to the input video file.
        - output_video (str): Path to save the processed output video.
        - threshold (float): Confidence threshold for object detection.
        - momentum (float): Momentum factor for trajectory smoothing.
        - device (str, optional): Device to use ('cuda' or 'cpu'). If None, it is automatically detected.
        """
        self.device = device if device else ("cuda" if torch.cuda.is_available() else "cpu")
        self.threshold = threshold
        self.momentum = momentum

        # Load the DETR model and processor
        self.model = DetrForObjectDetection.from_pretrained(model_path).to(self.device)
        self.processor = DetrImageProcessor.from_pretrained(processor_path)
        self.model.eval()

        # Video paths
        self.input_video = input_video
        self.output_video = output_video

        # Trajectory tracking variables
        self.trajectory_points = []
        self.last_position = None
        self.velocity = None

    def process_video(self):
        """Processes the input video, detects the golf ball, tracks its trajectory, and saves the output."""
        # Open video file
        cap = cv2.VideoCapture(self.input_video)
        if not cap.isOpened():
            print("Error: Could not open video.")
            return

        # Video writer setup
        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = cap.get(cv2.CAP_PROP_FPS)
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(self.output_video, fourcc, fps, (frame_width, frame_height))

        print("Processing video...")
        for _ in tqdm(range(int(cap.get(cv2.CAP_PROP_FRAME_COUNT))), desc="Processing Video"):
            ret, frame = cap.read()
            if not ret:
                break

            # Detect golf ball
            self.detect_and_track(frame)

            # Write the processed frame to output
            out.write(frame)

        # Release resources
        cap.release()
        out.release()
        print(f"Processed video saved as {self.output_video}.")

    def detect_and_track(self, frame):
        """
        Detects the golf ball in the given frame and updates trajectory tracking.

        Args:
        - frame (numpy array): The input frame.
        """
        # Convert frame to RGB for model input
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        inputs = self.processor(images=frame_rgb, return_tensors="pt").to(self.device)

        # Run inference
        with torch.no_grad():
            outputs = self.model(**inputs)

        # Post-process predictions
        target_sizes = torch.tensor([frame.shape[:2][::-1]], device=self.device)
        results = self.processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=self.threshold)[0]

        # Extract bounding boxes and confidence scores
        boxes = results["boxes"].cpu().numpy() if "boxes" in results else []
        scores = results["scores"].cpu().numpy() if "scores" in results else []

        # Handle detections
        if len(boxes) > 0:
            self._handle_detection(frame, boxes, scores)
        else:
            self._handle_prediction()

        # Draw trajectory on frame
        self._draw_trajectory(frame)

    def _handle_detection(self, frame, boxes, scores):
        """
        Handles detection logic when the model successfully detects a golf ball.

        Args:
        - frame (numpy array): The frame where detection occurs.
        - boxes (numpy array): Array of bounding boxes.
        - scores (numpy array): Array of confidence scores.
        """
        best_idx = np.argmax(scores)
        if scores[best_idx] >= self.threshold:
            best_box = boxes[best_idx]
            x_min, y_min, x_max, y_max = map(int, best_box)
            center_x, center_y = (x_min + x_max) // 2, (y_min + y_max) // 2

            # Update velocity for trajectory smoothing
            if self.last_position is not None:
                dx = center_x - self.last_position[0]
                dy = center_y - self.last_position[1]
                self.velocity = (
                    int(self.momentum * self.velocity[0] + (1 - self.momentum) * dx) if self.velocity else dx,
                    int(self.momentum * self.velocity[1] + (1 - self.momentum) * dy) if self.velocity else dy,
                )
            else:
                self.velocity = (0, 0)

            self.last_position = (center_x, center_y)
            self.trajectory_points.append(self.last_position)

            # Draw bounding box and label
            cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
            cv2.putText(frame, f"Golf Ball: {scores[best_idx]:.2f}", (x_min, y_min - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    def _handle_prediction(self):
        """
        Handles missing detections by estimating the next golf ball position using velocity-based extrapolation.
        """
        if self.velocity is not None and self.last_position is not None:
            predicted_x = self.last_position[0] + self.velocity[0]
            predicted_y = self.last_position[1] + self.velocity[1]
            self.last_position = (predicted_x, predicted_y)
            self.trajectory_points.append(self.last_position)

    def _draw_trajectory(self, frame):
        """
        Draws the detected trajectory of the golf ball on the frame.

        Args:
        - frame (numpy array): The frame where the trajectory is drawn.
        """
        for i in range(1, len(self.trajectory_points)):
            cv2.line(frame, self.trajectory_points[i - 1], self.trajectory_points[i], (0, 255, 255), 2)

# Example Usage
model_path = "/kaggle/input/transformerdata"
processor_path = "/kaggle/working/preprocessor_config.json"
input_video = "/kaggle/working/output45.mp4"
output_video = "/kaggle/working/output_with_trajectory.mp4"

tracker = GolfBallTracker(model_path, processor_path, input_video, output_video)
tracker.process_video()
