In [None]:
!pip install SoccerNet

Collecting SoccerNet
  Downloading SoccerNet-0.1.62-py3-none-any.whl.metadata (13 kB)
Collecting scikit-video (from SoccerNet)
  Downloading scikit_video-1.1.11-py2.py3-none-any.whl.metadata (1.1 kB)
Collecting google-measurement-protocol (from SoccerNet)
  Downloading google_measurement_protocol-1.1.0-py2.py3-none-any.whl.metadata (845 bytes)
Collecting pycocoevalcap (from SoccerNet)
  Downloading pycocoevalcap-1.2-py3-none-any.whl.metadata (3.2 kB)
Collecting boto3 (from SoccerNet)
  Downloading boto3-1.40.64-py3-none-any.whl.metadata (6.6 kB)
Collecting botocore<1.41.0,>=1.40.64 (from boto3->SoccerNet)
  Downloading botocore-1.40.64-py3-none-any.whl.metadata (5.7 kB)
Collecting jmespath<2.0.0,>=0.7.1 (from boto3->SoccerNet)
  Downloading jmespath-1.0.1-py3-none-any.whl.metadata (7.6 kB)
Collecting s3transfer<0.15.0,>=0.14.0 (from boto3->SoccerNet)
  Downloading s3transfer-0.14.0-py3-none-any.whl.metadata (1.7 kB)
Collecting prices>=1.0.0 (from google-measurement-protocol->SoccerNet)

In [None]:
from SoccerNet.Downloader import SoccerNetDownloader
import tensorflow as tf
import numpy as np
import cv2
import os
from tqdm import tqdm
import json
import pickle

In [None]:
LOCAL_DIR = "/content"  # dossier racine où SoccerNet stocke les matchs
# identifiant exact du match (tel que getListGames renverrait)
game_id = "england_epl/2014-2015/2015-02-21 - 18-00 Chelsea 1 - 1 Burnley"
video_filename = "1_224p.mkv"     # nom du fichier à télécharger / traiter
labels_filename = "Labels-v2.json"
output_features_name = "1_ResNET_TF2_custom.npy"  # où sauvegarder

# Extraction params
TARGET_FPS = 2      # matches SoccerNet TF2 behavior
BATCH_SIZE = 32     # batch pour predict()
RESIZE = (224, 224) # taille d'entrée ResNet
# -------------------------


In [None]:

# -------------------------
# 1) Téléchargement d'un seul match
# -------------------------
downloader = SoccerNetDownloader(LocalDirectory=LOCAL_DIR)
downloader.password = input("Password for videos? (press Enter if not needed):\n")

# build paths
game_dir = os.path.join(LOCAL_DIR, game_id)
os.makedirs(game_dir, exist_ok=True)  # ensure dir exists

print(f"Downloading only {video_filename} and {labels_filename} for game: {game_id}")
downloader.downloadGame(game=game_id, files=[video_filename, labels_filename])
print("Download finished (or already present).")

Password for videos? (press Enter if not needed):
s0cc3rn3t
Downloading only 1_224p.mkv and Labels-v2.json for game: england_epl/2014-2015/2015-02-21 - 18-00 Chelsea 1 - 1 Burnley
/content/england_epl/2014-2015/2015-02-21 - 18-00 Chelsea 1 - 1 Burnley/1_224p.mkv already exists
/content/england_epl/2014-2015/2015-02-21 - 18-00 Chelsea 1 - 1 Burnley/Labels-v2.json already exists
Download finished (or already present).


In [None]:
def build_resnet_tf2():
    """
    Construit un extracteur ResNet152 pré-entraîné ImageNet,
    identique à celui utilisé par SoccerNet (sortie 2048D).
    """
    from tensorflow.keras.applications import ResNet152
    from tensorflow.keras.applications.resnet import preprocess_input
    from tensorflow.keras.layers import Input, Lambda
    from tensorflow.keras.models import Model

    base_model = ResNet152(
        include_top=False,
        weights="imagenet",
        pooling="avg"
    )

    # ✅ Correction : encapsuler preprocess_input dans une couche Lambda
    inputs = Input(shape=(224, 224, 3), dtype=tf.float32)
    x = Lambda(lambda img: preprocess_input(img))(inputs)
    outputs = base_model(x)

    model = Model(inputs=inputs, outputs=outputs)
    return model

# instantiate model once (kept on GPU)
print("Building TF2 ResNet152 model...")
tf_model = build_resnet_tf2()
print("Model ready.")

Building TF2 ResNet152 model...
Model ready.


In [None]:
# -------------------------
# 3) Extraction features (batch GPU, no dangerous threading)
# -------------------------
def extract_resnet_tf2_features(video_path, output_path, fps=2, batch_size=32, resize=(224,224)):
    """
    Lit la vidéo video_path, extrait 1 frame toutes les (1/fps) secondes,
    traite par batch sur le modèle TF2, et sauvegarde features (T,2048) en .npy
    """
    if not os.path.exists(video_path):
        raise FileNotFoundError(video_path)
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError("Cannot open video: " + video_path)

    orig_fps = cap.get(cv2.CAP_PROP_FPS)
    if orig_fps <= 0 or np.isnan(orig_fps):
        orig_fps = 25.0  # fallback reasonable default
    frame_interval = max(1, int(round(orig_fps / fps)))

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    print(f"Video FPS: {orig_fps:.2f}, frame_interval: {frame_interval}, total_frames: {total_frames}")

    frames_buffer = []
    features_list = []
    frame_idx = 0
    sampled_count = 0

    pbar = tqdm(total=total_frames, desc="Reading frames")
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_idx % frame_interval == 0:
            # convert BGR->RGB and resize
            img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            img = cv2.resize(img, resize)
            frames_buffer.append(img.astype(np.uint8))
            sampled_count += 1

            # when buffer full, run predict
            if len(frames_buffer) >= batch_size:
                batch = np.stack(frames_buffer, axis=0)  # uint8 array [B,H,W,3]
                preds = tf_model.predict(batch, verbose=0)  # [B,2048]
                features_list.append(preds)
                frames_buffer = []
        frame_idx += 1
        pbar.update(1)

    # last partial batch
    if len(frames_buffer) > 0:
        batch = np.stack(frames_buffer, axis=0)
        preds = tf_model.predict(batch, verbose=0)
        features_list.append(preds)
        frames_buffer = []

    cap.release()
    pbar.close()

    if len(features_list) == 0:
        raise RuntimeError("No frames extracted - check FPS and video length.")

    features = np.concatenate(features_list, axis=0).astype(np.float32)  # [T,2048]
    np.save(output_path, features)
    print(f"Saved features to {output_path}, shape={features.shape}")
    return features

video_path = os.path.join(game_dir, video_filename)
output_path = os.path.join(game_dir, output_features_name)

print("Extracting features...")
features = extract_resnet_tf2_features(video_path, output_path, fps=TARGET_FPS, batch_size=BATCH_SIZE, resize=RESIZE)


Extracting features...
Video FPS: 25.00, frame_interval: 12, total_frames: 67500



Reading frames:   0%|          | 0/67500 [00:00<?, ?it/s][A
Reading frames:   0%|          | 159/67500 [00:00<00:42, 1580.29it/s][A
Reading frames:   1%|          | 345/67500 [00:00<00:38, 1743.55it/s][A
Reading frames:   1%|          | 520/67500 [00:10<29:56, 37.28it/s]  [A
Reading frames:   1%|          | 652/67500 [00:10<20:18, 54.87it/s][A
Reading frames:   1%|          | 808/67500 [00:10<13:55, 79.84it/s][A
Reading frames:   1%|▏         | 953/67500 [00:10<09:39, 114.75it/s][A
Reading frames:   2%|▏         | 1117/67500 [00:11<06:34, 168.28it/s][A
Reading frames:   2%|▏         | 1254/67500 [00:11<05:27, 202.38it/s][A
Reading frames:   2%|▏         | 1429/67500 [00:11<03:46, 291.59it/s][A
Reading frames:   2%|▏         | 1557/67500 [00:11<03:33, 308.35it/s][A
Reading frames:   3%|▎         | 1720/67500 [00:11<02:37, 418.96it/s][A
Reading frames:   3%|▎         | 1877/67500 [00:12<02:01, 541.91it/s][A
Reading frames:   3%|▎         | 2008/67500 [00:12<02:13, 491.51it/

Saved features to /content/england_epl/2014-2015/2015-02-21 - 18-00 Chelsea 1 - 1 Burnley/1_ResNET_TF2_custom.npy, shape=(5625, 2048)





In [None]:
# -------------------------
# 4) Loader universel pour .npy / .npz / pickles crude
# -------------------------
def load_soccernet_features(path, expected_dim=2048, verbose=True):
    """
    Robust loader: tries np.load, pickle, raw bytes fallback.
    Returns numpy array (T, expected_dim)
    """
    if not os.path.exists(path):
        raise FileNotFoundError(path)
    if verbose:
        print("Loading", path, "size(MB)=", os.path.getsize(path)/(1024*1024))

    # try numpy load
    try:
        obj = np.load(path, allow_pickle=True)
        if verbose:
            print("np.load type:", type(obj))
        if isinstance(obj, np.lib.npyio.NpzFile):
            # .npz archive
            keys = obj.files
            if verbose:
                print("npz keys:", keys)
            # try to pick a candidate array
            for k in keys:
                candidate = np.asarray(obj[k])
                if candidate.ndim == 2 and candidate.shape[1] == expected_dim:
                    return candidate.astype(np.float32)
            # else flatten candidate and try reshape
            first = np.asarray(obj[keys[0]])
            flat = first.ravel()
            if flat.size % expected_dim == 0:
                T = flat.size // expected_dim
                return flat.astype(np.float32).reshape(T, expected_dim)
        elif isinstance(obj, np.ndarray):
            arr = obj
            if arr.ndim == 2 and arr.shape[1] == expected_dim:
                return arr.astype(np.float32)
            # try to coerce from 1D/other to desired shape
            flat = arr.ravel()
            if flat.size % expected_dim == 0:
                T = flat.size // expected_dim
                return flat.astype(np.float32).reshape(T, expected_dim)
        else:
            # allowed pickled object in npy
            flat = np.asarray(obj).ravel()
            if flat.size % expected_dim == 0:
                T = flat.size // expected_dim
                return flat.astype(np.float32).reshape(T, expected_dim)
    except Exception as e:
        if verbose:
            print("np.load failed:", repr(e))

    # try pickle
    try:
        with open(path, "rb") as f:
            obj = pickle.load(f, encoding="latin1")
        if verbose:
            print("pickle type:", type(obj))
        if isinstance(obj, np.ndarray):
            if obj.ndim == 2 and obj.shape[1] == expected_dim:
                return obj.astype(np.float32)
            flat = obj.ravel()
            if flat.size % expected_dim == 0:
                T = flat.size // expected_dim
                return flat.astype(np.float32).reshape(T, expected_dim)
        if isinstance(obj, dict):
            for k, v in obj.items():
                a = np.asarray(v)
                if a.ndim == 2 and a.shape[1] == expected_dim:
                    return a.astype(np.float32)
                flat = a.ravel()
                if flat.size % expected_dim == 0:
                    T = flat.size // expected_dim
                    return flat.astype(np.float32).reshape(T, expected_dim)
        if isinstance(obj, list):
            try:
                arr = np.stack([np.asarray(x) for x in obj], axis=0)
                if arr.ndim == 2 and arr.shape[1] == expected_dim:
                    return arr.astype(np.float32)
            except Exception:
                pass
    except Exception as e:
        if verbose:
            print("pickle.load failed:", repr(e))

    # fallback: read raw bytes as float32
    try:
        with open(path, "rb") as f:
            raw = f.read()
        flat = np.frombuffer(raw, dtype=np.float32)
        if flat.size > 0 and flat.size % expected_dim == 0:
            T = flat.size // expected_dim
            if verbose:
                print("raw->float32 reshape possible:", (T, expected_dim))
            return flat.reshape(T, expected_dim)
    except Exception as e:
        if verbose:
            print("raw read failed:", repr(e))

    raise ValueError("Could not parse features file into shape (-1, {}).".format(expected_dim))


In [None]:
# -------------------------
# 5) Optionnel: Comparaison entre official (if present) and custom
# -------------------------
official_path = os.path.join(game_dir, "1_ResNET_TF2.npy")  # common official name
custom_path = output_path

print("\nTrying to load official features (if present) for comparison...")
try:
    official_feats = load_soccernet_features(official_path, expected_dim=2048, verbose=True)
    print("Official features shape:", official_feats.shape)
except Exception as e:
    print("Could not load official features:", e)
    official_feats = None

print("Custom features shape:", features.shape)

if official_feats is not None:
    # if shapes differ, try naive downsampling of custom to match length
    if official_feats.shape[0] != features.shape[0]:
        factor = int(round(features.shape[0] / official_feats.shape[0]))
        if factor <= 0: factor = 1
        print(f"Length mismatch: custom {features.shape[0]} vs official {official_feats.shape[0]}. Downsampling custom by factor ~{factor}.")
        down = features[::factor]
    else:
        down = features
    # compute simple correlation on a subset
    n = min(1000, official_feats.size, down.size)
    try:
        corr = np.corrcoef(official_feats.ravel()[:n], down.ravel()[:n])[0,1]
    except Exception:
        corr = float('nan')
    print(f"Correlation (first {n} values): {corr:.4f}")

print("\nAll done.")


Trying to load official features (if present) for comparison...
Loading /content/england_epl/2014-2015/2015-02-21 - 18-00 Chelsea 1 - 1 Burnley/1_ResNET_TF2.npy size(MB)= 15.0
np.load failed: ValueError('cannot reshape array of size 3932128 into shape (5400,2048)')
pickle.load failed: UnpicklingError('unpickling stack underflow')
raw->float32 reshape possible: (1920, 2048)
Official features shape: (1920, 2048)
Custom features shape: (5625, 2048)
Length mismatch: custom 5625 vs official 1920. Downsampling custom by factor ~3.
Correlation (first 1000 values): -0.0148

All done.
