In [9]:
import time, json, shutil, subprocess, sys, platform


def safe_import(name):
    try:
        return __import__(name)
    except Exception:
        return None


def pick_torch_device():
    torch = safe_import("torch")
    if not torch:
        return {"available": False, "reason": "torch not installed"}

    # CUDA (NVIDIA or ROCm builds)
    try:
        if torch.cuda.is_available():
            dev_index = torch.cuda.current_device() if torch.cuda.device_count() else 0
            name = torch.cuda.get_device_name(dev_index)
            return {
                "available": True,
                "backend": "cuda",
                "device": torch.device(f"cuda:{dev_index}"),
                "details": {
                    "cuda_version": getattr(torch.version, "cuda", None),
                    "cudnn": getattr(torch.backends, "cudnn", None) and torch.backends.cudnn.enabled,
                    "device_index": dev_index,
                    "device_name": name,
                },
            }
    except Exception:
        pass

    # Apple MPS
    try:
        if hasattr(torch.backends, "mps") and torch.backends.mps.is_built() and torch.backends.mps.is_available():
            return {
                "available": True,
                "backend": "mps",
                "device": torch.device("mps"),
                "details": {"note": "Apple Metal Performance Shaders"},
            }
    except Exception:
        pass

    # Windows DirectML (AMD/Intel/NVIDIA)
    torch_directml = safe_import("torch_directml")
    try:
        if torch_directml is not None:
            dml_device = torch_directml.device()
            return {
                "available": True,
                "backend": "directml",
                "device": dml_device,
                "details": {"note": "Windows DirectML backend"},
            }
    except Exception:
        pass

    return {"available": False, "reason": "No supported GPU backend found (CUDA/MPS/DirectML)"}


def nvidia_smi_summary():
    if not shutil.which("nvidia-smi"):
        return None
    try:
        res = subprocess.run(
            [
                "nvidia-smi",
                "--query-gpu=name,driver_version,memory.total",
                "--format=csv,noheader,nounits",
            ],
            capture_output=True,
            text=True,
            timeout=2,
        )
        if res.returncode != 0:
            return {"error": res.stderr.strip()[:200]}
        rows = [r.strip().split(",") for r in res.stdout.strip().splitlines() if r.strip()]
        return [
            {"name": a.strip(), "driver": b.strip(), "mem_total_MB": c.strip()}
            for a, b, c in rows
        ]
    except Exception as e:
        return {"error": str(e)[:200]}


def gpu_self_test():
    torch = safe_import("torch")
    dev = pick_torch_device()

    report = {
        "python": sys.version.split()[0],
        "platform": platform.platform(),
        "device_available": dev["available"],
        "backend": dev.get("backend"),
        "details": dev.get("details"),
        "nvidia_smi": nvidia_smi_summary(),
        "compute_test": None,
        "error": None,
    }

    if not dev["available"]:
        report["error"] = dev.get("reason", "unknown")
        print(json.dumps(report, indent=2))
        return

    try:
        # Allocate on the chosen device and do a small matmul
        a = torch.randn((1024, 1024), device=dev["device"])
        b = torch.randn((1024, 1024), device=dev["device"])
        t0 = time.perf_counter()
        c = a @ b
        # Force realization; also sync on CUDA
        _ = float(c[0, 0].item())
        if report["backend"] == "cuda":
            torch.cuda.synchronize()
        dt_ms = (time.perf_counter() - t0) * 1000
        report["compute_test"] = {"ok": True, "op": "1024x1024 matmul", "elapsed_ms": round(dt_ms, 2)}
    except Exception as e:
        report["compute_test"] = {"ok": False}
        report["error"] = str(e)

    print(json.dumps(report, indent=2))


gpu_self_test()


{
  "python": "3.12.1",
  "platform": "Windows-11-10.0.26100-SP0",
  "device_available": false,
  "backend": null,
  "details": null,
  "nvidia_smi": null,
  "compute_test": null,
  "error": "No supported GPU backend found (CUDA/MPS/DirectML)"
}


https://app.roboflow.com/vio-vision/veovision-tnp3c


Importing both keys HuggingFace and RoboFlow

In [None]:
from dotenv import load_dotenv
from getpass import getpass
import os

load_dotenv()

hf = os.getenv("HF_TOKEN") or getpass("HF_TOKEN (hidden): ")
rf = os.getenv("ROBOFLOW_API_KEY") or getpass("ROBOFLOW_API_KEY (hidden): ")

os.environ["HF_TOKEN"] = hf.strip()
os.environ["ROBOFLOW_API_KEY"] = rf.strip()

print("HF_TOKEN set:", bool(os.environ.get("HF_TOKEN")))
print("ROBOFLOW_API_KEY set:", bool(os.environ.get("ROBOFLOW_API_KEY")))

In [4]:
%pip install -q gdown inference-gpu

ERROR: Could not install packages due to an OSError: [WinError 2] The system cannot find the file specified

  You can safely remove it manually.
  You can safely remove it manually.


Note: you may need to restart the kernel to use updated packages.


In [3]:
%pip install -q git+https://github.com/roboflow/sports.git

In [5]:
!gdown -O "0bfacc_0.mp4" "https://drive.google.com/uc?id=12TqauVZ9tLAv8kWxTTBFWtgt2hNQ4_ZF"
!gdown -O "2e57b9_0.mp4" "https://drive.google.com/uc?id=19PGw55V8aA6GZu5-Aac5_9mCy3fNxmEf"
!gdown -O "08fd33_0.mp4" "https://drive.google.com/uc?id=1OG8K6wqUw9t7lp9ms1M48DxRhwTYciK-"
!gdown -O "573e61_0.mp4" "https://drive.google.com/uc?id=1yYPKuXbHsCxqjA9G-S6aeR2Kcnos8RPU"
!gdown -O "121364_0.mp4" "https://drive.google.com/uc?id=1vVwjW1dE1drIdd4ZSILfbCGPD4weoNiu"

Downloading...
From: https://drive.google.com/uc?id=12TqauVZ9tLAv8kWxTTBFWtgt2hNQ4_ZF
To: c:\Users\mwghe\Desktop\VeoVision\0bfacc_0.mp4

  0%|          | 0.00/19.9M [00:00<?, ?B/s]
  3%|▎         | 524k/19.9M [00:00<00:08, 2.16MB/s]
  5%|▌         | 1.05M/19.9M [00:00<00:05, 3.30MB/s]
  8%|▊         | 1.57M/19.9M [00:00<00:04, 3.77MB/s]
 11%|█         | 2.10M/19.9M [00:00<00:05, 3.44MB/s]
 13%|█▎        | 2.62M/19.9M [00:00<00:04, 3.70MB/s]
 16%|█▌        | 3.15M/19.9M [00:00<00:04, 3.67MB/s]
 18%|█▊        | 3.67M/19.9M [00:01<00:04, 3.79MB/s]
 21%|██        | 4.19M/19.9M [00:01<00:04, 3.61MB/s]
 24%|██▎       | 4.72M/19.9M [00:01<00:03, 3.96MB/s]
 29%|██▉       | 5.77M/19.9M [00:01<00:03, 4.61MB/s]
 32%|███▏      | 6.29M/19.9M [00:01<00:03, 4.33MB/s]
 37%|███▋      | 7.34M/19.9M [00:01<00:02, 4.46MB/s]
 40%|███▉      | 7.86M/19.9M [00:02<00:03, 3.97MB/s]
 42%|████▏     | 8.39M/19.9M [00:02<00:03, 3.77MB/s]
 45%|████▍     | 8.91M/19.9M [00:02<00:02, 3.71MB/s]
 50%|█████     | 9.96M/19

In [6]:
import os
os.environ["ONNXRUNTIME_EXECUTION_PROVIDERS"] = "[CUDAExecutionProvider]"

## ball, players, goalkeepers and referees detection

In [None]:
PLAYER_DETECTION_MODEL_ID = "veovision-tnp3c/1"

