In [3]:
import supervision as sv
from ultralytics import YOLO
import cv2
import pandas as pd

In [None]:
# Chemins d'entrée et de sortie
video_path = "../tennis_point.mp4"
output_path = "tennis_annotated.mp4"
csv_path = "detections.csv"

# Charger le modèle YOLOv8 (n = nano, rapide)
model = YOLO("yolov8n.pt")

# Informations sur la vidéo
video_info = sv.VideoInfo.from_video_path(video_path)

# Initialisation des outils
frames = []
results_data = []
box_annotator = sv.BoxAnnotator()

# Boucle de traitement frame par frame
for frame_number, frame in enumerate(sv.get_video_frames_generator(video_path)):
    results = model(frame)[0]
    detections = sv.Detections.from_ultralytics(results)

    # Filtrer uniquement les personnes détectées
    detections = detections[detections.class_id == 0]  # 0 = "person"

    # Annoter la frame
    annotated_frame = box_annotator.annotate(
        scene=frame.copy(),
        detections=detections
    )
    frames.append(annotated_frame)

    # Sauvegarder les résultats
    for xyxy, conf in zip(detections.xyxy, detections.confidence):
        x1, y1, x2, y2 = map(int, xyxy)
        results_data.append({
            "frame": frame_number,
            "class": "person",
            "x1": x1, "y1": y1, "x2": x2, "y2": y2,
            "confidence": float(conf)
        })

# Écrire la vidéo annotée frame par frame
with sv.VideoSink(output_path, video_info) as sink:
    for frame in frames:
        sink.write_frame(frame)

# Exporter les résultats en CSV
pd.DataFrame(results_data).to_csv(csv_path, index=False)

print("✅ Analyse terminée.")
print("🎥 Vidéo annotée : tennis_annotated.mp4")
print("📄 Résultats CSV  : detections.csv")


0: 384x640 5 persons, 1 toothbrush, 36.2ms
Speed: 2.0ms preprocess, 36.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 33.9ms
Speed: 1.8ms preprocess, 33.9ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 persons, 34.8ms
Speed: 1.4ms preprocess, 34.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 persons, 32.3ms
Speed: 1.4ms preprocess, 32.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 1 cell phone, 32.2ms
Speed: 1.5ms preprocess, 32.2ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 persons, 1 cell phone, 34.8ms
Speed: 1.5ms preprocess, 34.8ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 1 cell phone, 34.2ms
Speed: 1.3ms preprocess, 34.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 1 cell phone, 34.3ms
Speed: 1.2m