In [1]:
!pip install -q gdown

import gdown

zip_url = 'https://drive.google.com/uc?id=1HCFLBO9fJutCKG11FtjKfdLvME6Qe_5L'
output_zip = '/kaggle/working/JAAD_clips.zip'

gdown.download(zip_url, output_zip, quiet=False)


Downloading...
From (original): https://drive.google.com/uc?id=1HCFLBO9fJutCKG11FtjKfdLvME6Qe_5L
From (redirected): https://drive.google.com/uc?id=1HCFLBO9fJutCKG11FtjKfdLvME6Qe_5L&confirm=t&uuid=54bc805e-f70a-4645-8fed-a4abb59fcd13
To: /kaggle/working/JAAD_clips.zip
100%|██████████| 3.08G/3.08G [00:24<00:00, 127MB/s] 


'/kaggle/working/JAAD_clips.zip'

In [2]:
# Crée la structure attendue
!mkdir -p /kaggle/working/JAAD_fixed/JAAD_clips

# Extraction AU BON ENDROIT
!unzip -q /kaggle/working/JAAD_clips.zip -d /kaggle/working/JAAD_fixed/JAAD_clips


In [3]:
!rm /kaggle/working/JAAD_clips.zip


In [4]:
!ls /kaggle/working/JAAD_fixed/JAAD_clips/JAAD_clips | head -5


video_0001.mp4
video_0002.mp4
video_0003.mp4
video_0004.mp4
video_0005.mp4


In [5]:
%cd /kaggle/working

# Cloner dans un dossier différent
!git clone https://github.com/ykotseruba/JAAD.git JAAD_full

# Vérification
!ls JAAD_full

/kaggle/working
Cloning into 'JAAD_full'...
remote: Enumerating objects: 6155, done.[K
remote: Counting objects: 100% (724/724), done.[K
remote: Compressing objects: 100% (72/72), done.[K
remote: Total 6155 (delta 672), reused 695 (delta 652), pack-reused 5431 (from 1)[K
Receiving objects: 100% (6155/6155), 42.16 MiB | 31.38 MiB/s, done.
Resolving deltas: 100% (5491/5491), done.
annotations		annotations_vehicle  LICENSE
annotations_appearance	behavior.png	     README.md
annotations_attributes	download_clips.sh    split_clips_to_frames.sh
annotations_traffic	jaad_data.py	     split_ids


In [6]:
!mkdir -p /kaggle/working/JAAD_fixed/annotations
!cp /kaggle/working/JAAD_full/annotations/* \
    /kaggle/working/JAAD_fixed/annotations/

In [7]:
!pip uninstall -y protobuf
!pip install -q protobuf==3.20.3

Found existing installation: protobuf 6.33.0
Uninstalling protobuf-6.33.0:
  Successfully uninstalled protobuf-6.33.0
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.1/162.1 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
bigframes 2.12.0 requires google-cloud-bigquery-storage<3.0.0,>=2.30.0, which is not installed.
opentelemetry-proto 1.37.0 requires protobuf<7.0,>=5.0, but you have protobuf 3.20.3 which is incompatible.
onnx 1.18.0 requires protobuf>=4.25.1, but you have protobuf 3.20.3 which is incompatible.
a2a-sdk 0.3.10 requires protobuf>=5.29.5, but you have protobuf 3.20.3 which is incompatible.
ray 2.51.1 requires click!=8.3.0,>=7.0, but you have click 8.3.0 which is incompatible.
bigframes 2.12.0 requires rich<14,>=12.4.4, but you have rich 14.2.0 which is inc

In [8]:
import os
import cv2
import xml.etree.ElementTree as ET
import numpy as np
from tqdm import tqdm

import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam


2025-12-16 12:24:15.710421: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1765887855.897734      47 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1765887855.946996      47 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [9]:
def parse_jaad_annotations(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()

    frames = {}   #  IMPORTANT

    for track in root.findall(".//track[@label='pedestrian']"):
        for box in track.findall("box"):
            frame = int(box.attrib["frame"])

            bbox = (
                int(float(box.attrib["xtl"])),
                int(float(box.attrib["ytl"])),
                int(float(box.attrib["xbr"])),
                int(float(box.attrib["ybr"]))
            )

            labels = {
                "action": "standing",
                "cross": "not-crossing",
                "look": "not-looking"
            }

            for attr in box.findall("attribute"):
                if attr.attrib["name"] in labels:
                    labels[attr.attrib["name"]] = attr.text

            frames[frame] = {
                "bbox": bbox,
                "labels": labels
            }

    return frames   #  NE PAS OUBLIER


In [10]:
import numpy as np

def encode_labels(labels):
    return np.array([
        1 if labels["action"] == "walking" else 0,
        1 if labels["cross"] == "crossing" else 0,
        1 if labels["look"] == "looking" else 0
    ], dtype=np.float32)


In [11]:
import cv2

def crop_frame(frame, bbox, size=(224,224)):
    xtl, ytl, xbr, ybr = bbox
    h, w, _ = frame.shape

    xtl, ytl = max(0, xtl), max(0, ytl)
    xbr, ybr = min(w, xbr), min(h, ybr)

    crop = frame[ytl:ybr, xtl:xbr]
    if crop.size == 0:
        return None

    crop = cv2.resize(crop, size)
    crop = cv2.cvtColor(crop, cv2.COLOR_BGR2RGB)
    return crop / 255.0


In [12]:
def load_cropped_clip(video_path, annotations, start_frame=0, num_frames=16):
    cap = cv2.VideoCapture(video_path)
    frames = []
    labels = []

    for i in range(num_frames):
        frame_id = start_frame + i
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_id)

        ret, frame = cap.read()
        if not ret or frame_id not in annotations:
            continue

        cropped = crop_frame(frame, annotations[frame_id]["bbox"])
        if cropped is not None:
            frames.append(cropped)
            labels.append(encode_labels(annotations[frame_id]["labels"]))

    cap.release()

    if len(frames) == 0:
        return None, None

    while len(frames) < num_frames:
        frames.append(frames[-1])
        labels.append(labels[-1])

    return np.array(frames), labels[-1]


In [13]:
from tensorflow.keras.utils import Sequence
import os
import numpy as np

class JAADI3DGenerator(Sequence):  
    def __init__(self, video_dir, ann_dir, video_list, batch_size=1, **kwargs):
        super().__init__(**kwargs)  #  IMPORTANT
        self.video_dir = video_dir
        self.ann_dir = ann_dir
        self.video_list = video_list
        self.batch_size = batch_size

    def __len__(self):
        return len(self.video_list) // self.batch_size

    def __getitem__(self, idx):
        X, Y = [], []

        i = idx * self.batch_size
        while len(X) < self.batch_size:
            if i >= len(self.video_list):
                i = 0  # boucle si problème

            name = self.video_list[i]
            i += 1

            video_path = os.path.join(self.video_dir, name + ".mp4")
            xml_path = os.path.join(self.ann_dir, name + ".xml")

            ann = parse_jaad_annotations(xml_path)
            clip, label = load_cropped_clip(video_path, ann)

            if clip is None:
                continue

            # GARANTIE DE FORME
            if clip.shape != (16,224,224,3):
                continue

            X.append(clip)
            Y.append(label)

        return np.stack(X), np.stack(Y)


In [14]:
videos = sorted([
    v.replace(".mp4","")
    for v in os.listdir("/kaggle/working/JAAD_fixed/JAAD_clips/JAAD_clips")
])[:100]

split = int(0.8 * len(videos))
train_videos = videos[:split]
val_videos = videos[split:]

train_gen = JAADI3DGenerator(
    "/kaggle/working/JAAD_fixed/JAAD_clips/JAAD_clips",
    "/kaggle/working/JAAD_fixed/annotations",
    train_videos
)

val_gen = JAADI3DGenerator(
    "/kaggle/working/JAAD_fixed/JAAD_clips/JAAD_clips",
    "/kaggle/working/JAAD_fixed/annotations",
    val_videos
)


In [15]:
x, y = train_gen[0]
print(x.shape)  # (1,16,224,224,3)
print(y.shape)  # (1,3)


(1, 16, 224, 224, 3)
(1, 3)


In [16]:
from tensorflow.keras.layers import Input
# ===============================
# Imports TensorFlow / Keras
# ===============================
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input,

    Conv3D,
    MaxPooling3D,
    AveragePooling3D,
    BatchNormalization,
    Dropout,
    Dense,
    Activation,
    Concatenate,
    Lambda,
    Reshape
)
from tensorflow.keras import backend as K
from tensorflow.keras.layers import GlobalAveragePooling1D


In [19]:
def I3DNet(freeze_conv_layers=False, weights=None, classes=1,
           dense_activation='softmax', dropout=0.5, num_channels=3, include_top=False):
    """
    I3D model implementation. Source: https://github.com/dlpbc/keras-kinetics-i3d
    Reference: Joao Carreira and Andrew Zisserman.  Quo vadis, action recognition?
    A new model and the kinetics dataset. CVPR, 2017.
    Args:
        freeze_conv_layers: Whether to freeze convolutional layers at the time of training
        weights: Pre-trained weights
        classes: Number of classes
        dense_activation: Activation of the last layer
        dropout: Dropout of dense layers
        include_top: Whether to add fc layers
    Returns:
        I3D model
    """
    def conv3d_bn(x,
                  filters,
                  num_frames,
                  num_row,
                  num_col,
                  padding='same',
                  strides=(1, 1, 1),
                  use_bias=False,
                  use_activation_fn=True,
                  use_bn=True,
                  name=None):
        """Utility function to apply conv3d + BN.

        # Arguments
            x: input tensor.
            filters: filters in `Conv3D`.
            num_frames: frames (time depth) of the convolution kernel.
            num_row: height of the convolution kernel.
            num_col: width of the convolution kernel.
            padding: padding mode in `Conv3D`.
            strides: strides in `Conv3D`.
            use_bias: use bias or not
            use_activation_fn: use an activation function or not.
            use_bn: use batch normalization or not.
            name: name of the ops; will become `name + '_conv'`
                for the convolution and `name + '_bn'` for the
                batch norm layer.

        # Returns
            Output tensor after applying `Conv3D` and `BatchNormalization`.
        """
        if name is not None:
            bn_name = name + '_bn'
            conv_name = name + '_conv'
        else:
            bn_name = None
            conv_name = None

        x = Conv3D(
            filters, (num_frames, num_row, num_col),
            strides=strides,
            padding=padding,
            use_bias=use_bias,
            name=conv_name)(x)

        if use_bn:
            bn_axis = 4
            x = BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x)

        if use_activation_fn:
            x = Activation('relu', name=name)(x)

        return x

    channel_axis = 4
    inputs = Input(shape=(16, 224, 224, num_channels))

    # Downsampling via convolution (spatial and temporal)
    x = conv3d_bn(inputs, 64, 7, 7, 7, strides=(2, 2, 2), padding='same', name='Conv3d_1a_7x7')

    # Downsampling (spatial only)
    x = MaxPooling3D((1, 3, 3), strides=(1, 2, 2), padding='same', name='MaxPool2d_2a_3x3')(x)
    x = conv3d_bn(x, 64, 1, 1, 1, strides=(1, 1, 1), padding='same', name='Conv3d_2b_1x1')
    x = conv3d_bn(x, 192, 3, 3, 3, strides=(1, 1, 1), padding='same', name='Conv3d_2c_3x3')

    # Downsampling (spatial only)
    x = MaxPooling3D((1, 3, 3), strides=(1, 2, 2), padding='same', name='MaxPool2d_3a_3x3')(x)

    # Mixed 3b
    branch_0 = conv3d_bn(x, 64, 1, 1, 1, padding='same', name='Conv3d_3b_0a_1x1')

    branch_1 = conv3d_bn(x, 96, 1, 1, 1, padding='same', name='Conv3d_3b_1a_1x1')
    branch_1 = conv3d_bn(branch_1, 128, 3, 3, 3, padding='same', name='Conv3d_3b_1b_3x3')

    branch_2 = conv3d_bn(x, 16, 1, 1, 1, padding='same', name='Conv3d_3b_2a_1x1')
    branch_2 = conv3d_bn(branch_2, 32, 3, 3, 3, padding='same', name='Conv3d_3b_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_3b_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3, 32, 1, 1, 1, padding='same', name='Conv3d_3b_3b_1x1')

    x = Concatenate(axis=channel_axis, name='Mixed_3b')([branch_0, branch_1, branch_2, branch_3])

    # Mixed 3c
    branch_0 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_3c_0a_1x1')

    branch_1 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_3c_1a_1x1')
    branch_1 = conv3d_bn(branch_1, 192, 3, 3, 3, padding='same', name='Conv3d_3c_1b_3x3')

    branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_3c_2a_1x1')
    branch_2 = conv3d_bn(branch_2, 96, 3, 3, 3, padding='same', name='Conv3d_3c_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_3c_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_3c_3b_1x1')

    x = Concatenate(axis=channel_axis, name='Mixed_3c')([branch_0, branch_1, branch_2, branch_3])

    # Downsampling (spatial and temporal)
    x = MaxPooling3D((3, 3, 3), strides=(2, 2, 2), padding='same', name='MaxPool2d_4a_3x3')(x)

    # Mixed 4b
    branch_0 = conv3d_bn(x, 192, 1, 1, 1, padding='same', name='Conv3d_4b_0a_1x1')

    branch_1 = conv3d_bn(x, 96, 1, 1, 1, padding='same', name='Conv3d_4b_1a_1x1')
    branch_1 = conv3d_bn(branch_1, 208, 3, 3, 3, padding='same', name='Conv3d_4b_1b_3x3')

    branch_2 = conv3d_bn(x, 16, 1, 1, 1, padding='same', name='Conv3d_4b_2a_1x1')
    branch_2 = conv3d_bn(branch_2, 48, 3, 3, 3, padding='same', name='Conv3d_4b_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4b_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4b_3b_1x1')

    x = Concatenate(axis=channel_axis, name='Mixed_4b')([branch_0, branch_1, branch_2, branch_3])

    # Mixed 4c
    branch_0 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_4c_0a_1x1')

    branch_1 = conv3d_bn(x, 112, 1, 1, 1, padding='same', name='Conv3d_4c_1a_1x1')
    branch_1 = conv3d_bn(branch_1, 224, 3, 3, 3, padding='same', name='Conv3d_4c_1b_3x3')

    branch_2 = conv3d_bn(x, 24, 1, 1, 1, padding='same', name='Conv3d_4c_2a_1x1')
    branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4c_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4c_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4c_3b_1x1')

    x = Concatenate(axis=channel_axis, name='Mixed_4c')([branch_0, branch_1, branch_2, branch_3])

    # Mixed 4d
    branch_0 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_4d_0a_1x1')

    branch_1 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_4d_1a_1x1')
    branch_1 = conv3d_bn(branch_1, 256, 3, 3, 3, padding='same', name='Conv3d_4d_1b_3x3')

    branch_2 = conv3d_bn(x, 24, 1, 1, 1, padding='same', name='Conv3d_4d_2a_1x1')
    branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4d_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4d_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4d_3b_1x1')

    x = Concatenate(axis=channel_axis, name='Mixed_4d')([branch_0, branch_1, branch_2, branch_3])

    # Mixed 4e
    branch_0 = conv3d_bn(x, 112, 1, 1, 1, padding='same', name='Conv3d_4e_0a_1x1')

    branch_1 = conv3d_bn(x, 144, 1, 1, 1, padding='same', name='Conv3d_4e_1a_1x1')
    branch_1 = conv3d_bn(branch_1, 288, 3, 3, 3, padding='same', name='Conv3d_4e_1b_3x3')

    branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_4e_2a_1x1')
    branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4e_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4e_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4e_3b_1x1')

    x = Concatenate(axis=channel_axis, name='Mixed_4e')([branch_0, branch_1, branch_2, branch_3])

    # Mixed 4f
    branch_0 = conv3d_bn(x, 256, 1, 1, 1, padding='same', name='Conv3d_4f_0a_1x1')

    branch_1 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_4f_1a_1x1')
    branch_1 = conv3d_bn(branch_1, 320, 3, 3, 3, padding='same', name='Conv3d_4f_1b_3x3')

    branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_4f_2a_1x1')
    branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_4f_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4f_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_4f_3b_1x1')

    x = Concatenate(axis=channel_axis, name='Mixed_4f')([branch_0, branch_1, branch_2, branch_3])

    # Downsampling (spatial and temporal)
    x = MaxPooling3D((2, 2, 2), strides=(2, 2, 2), padding='same', name='MaxPool2d_5a_2x2')(x)

    # Mixed 5b
    branch_0 = conv3d_bn(x, 256, 1, 1, 1, padding='same', name='Conv3d_5b_0a_1x1')

    branch_1 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_5b_1a_1x1')
    branch_1 = conv3d_bn(branch_1, 320, 3, 3, 3, padding='same', name='Conv3d_5b_1b_3x3')

    branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_5b_2a_1x1')
    branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_5b_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_5b_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_5b_3b_1x1')

    x = Concatenate(axis=channel_axis, name='Mixed_5b')([branch_0, branch_1, branch_2, branch_3])

    # Mixed 5c
    branch_0 = conv3d_bn(x, 384, 1, 1, 1, padding='same', name='Conv3d_5c_0a_1x1')

    branch_1 = conv3d_bn(x, 192, 1, 1, 1, padding='same', name='Conv3d_5c_1a_1x1')
    branch_1 = conv3d_bn(branch_1, 384, 3, 3, 3, padding='same', name='Conv3d_5c_1b_3x3')

    branch_2 = conv3d_bn(x, 48, 1, 1, 1, padding='same', name='Conv3d_5c_2a_1x1')
    branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_5c_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_5c_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_5c_3b_1x1')

    x_concatenate = Concatenate(axis=channel_axis, name='Mixed_5c')([branch_0, branch_1, branch_2, branch_3])


    # create model
    if include_top:
        # Classification block
        x = AveragePooling3D((2, 7, 7), strides=(1, 1, 1), padding='valid',
                             name='global_avg_pool')(x_concatenate)
        x = Dropout(dropout)(x)
        x = conv3d_bn(x, classes, 1, 1, 1, padding='same',
                      use_bias=True, use_activation_fn=False,
                      use_bn=False, name='Conv3d_6a_1x1_new')
        num_frames_remaining = int(x.shape[1])
        x = Reshape((num_frames_remaining, classes))(x)
        # logits (raw scores for each class)
        x = tf.keras.layers.GlobalAveragePooling1D()(x)
        # if not endpoint_logit:
        x = Activation(dense_activation, name='prediction')(x)
        net_model = Model(inputs, x, name='i3d_inception')
        if freeze_conv_layers:
            for layer in net_model.layers[:-5]:
                layer.trainable = False
            # for layer in net_model.layers:
            #     print(layer.name, layer.trainable)
    else:
        h = int(x.shape[2])
        w = int(x.shape[3])
        x = AveragePooling3D((2, h, w), strides=(1, 1, 1), padding='valid', name='global_avg_pool')(x_concatenate)
        net_model = Model(inputs, x, name='i3d_no_top')
        if freeze_conv_layers:
            for layer in net_model.layers[:-5]:
                layer.trainable = False
            # for layer in net_model.layers:
            #     print(layer.name, layer.trainable)

    if weights is not None:
        net_model.load_weights(weights, by_name=True)

    return net_model

In [20]:
#Modèle I3D
from tensorflow.keras.optimizers import Adam

model = I3DNet(
    classes=3,
    dense_activation='sigmoid',
    include_top=True
)

model.summary()

I0000 00:00:1765887951.757941      47 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1765887951.758582      47 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


In [21]:
model.compile(
    optimizer=Adam(1e-4),
    loss='binary_crossentropy',
    metrics=['accuracy']
)


In [22]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(
    monitor="val_loss",
    patience=2,
    restore_best_weights=True
)

model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=15,
    callbacks=[early_stop]
)
model.save("i3d_jaad_model_2.keras")

Epoch 1/15


I0000 00:00:1765887999.711969     154 service.cc:148] XLA service 0x7bc77c003650 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1765887999.712851     154 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1765887999.712879     154 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1765888003.445446     154 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m 1/80[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:17:26[0m 59s/step - accuracy: 1.0000 - loss: 0.4067

I0000 00:00:1765888026.755723     154 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m205s[0m 2s/step - accuracy: 0.8190 - loss: 0.6746 - val_accuracy: 0.9500 - val_loss: 0.5008
Epoch 2/15
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 2s/step - accuracy: 0.9794 - loss: 0.5447 - val_accuracy: 0.9500 - val_loss: 0.5072
Epoch 3/15
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 2s/step - accuracy: 0.9314 - loss: 0.5169 - val_accuracy: 0.9500 - val_loss: 0.4986
Epoch 4/15
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 2s/step - accuracy: 0.9315 - loss: 0.5246 - val_accuracy: 0.9500 - val_loss: 0.4834
Epoch 5/15
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 2s/step - accuracy: 0.9543 - loss: 0.5105 - val_accuracy: 0.9500 - val_loss: 0.6984
Epoch 6/15
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 2s/step - accuracy: 0.9616 - loss: 0.4258 - val_accuracy: 0.9500 - val_loss: 1.2136


In [23]:
import tensorflow as tf


#  Charger le modèle
model = tf.keras.models.load_model(
    "i3d_jaad_model_2.keras",
    compile=False
)

In [24]:
import cv2
import numpy as np

def load_video_clip(video_path, num_frames=16, size=(224,224)):
    cap = cv2.VideoCapture(video_path)
    frames = []

    while len(frames) < num_frames:
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = cv2.resize(frame, size)
        frame = frame / 255.0
        frames.append(frame)

    cap.release()

    while len(frames) < num_frames:
        frames.append(frames[-1])

    return np.array(frames, dtype=np.float32)


In [25]:
clip = load_video_clip("/kaggle/working/JAAD_fixed/JAAD_clips/JAAD_clips/video_0101.mp4")

# Ajouter la dimension batch
clip = np.expand_dims(clip, axis=0)   # (1,16,224,224,3)

pred = model.predict(clip)
print(pred)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
[[0.97829586 0.733252   0.5784673 ]]


In [26]:
action = "walking" if pred[0][0] > 0.5 else "standing"
cross  = "crossing" if pred[0][1] > 0.5 else "not-crossing"
look   = "looking" if pred[0][2] > 0.5 else "not-looking"

print(action, cross, look)


walking crossing looking


In [61]:
import cv2
import numpy as np
import tensorflow as tf
import os
from PIL import Image, ImageDraw, ImageFont

# Charger le modèle
model = tf.keras.models.load_model(
    "/kaggle/working/i3d_jaad_model_2.keras",
    compile=False
)

def get_default_bbox(frame):
    """Définir une bounding box par défaut"""
    height, width = frame.shape[:2]
    bbox_width = width // 3
    bbox_height = height // 3
    x1 = (width - bbox_width) // 2
    y1 = (height - bbox_height) // 2
    x2 = x1 + bbox_width
    y2 = y1 + bbox_height
    return int(x1), int(y1), int(x2), int(y2)

def get_clip_from_buffer(frame_buffer, size=(224, 224)):
    """Préparer le clip pour I3D"""
    clip = []
    for frame in frame_buffer:
        frame = cv2.resize(frame, size)
        frame = frame / 255.0
        clip.append(frame)
    return np.array(clip)

def draw_results_on_frame(frame, action, cross, look, walking_prob, crossing_prob, looking_prob):
    """Dessiner les résultats sur la frame"""
    # Créer une copie
    overlay = frame.copy()
    h, w = frame.shape[:2]
    
    # Zone semi-transparente pour le texte
    box_height = 280
    box_width = 500
    x_start = 20
    y_start = 20
    
    # Fond semi-transparent
    cv2.rectangle(overlay, (x_start, y_start), 
                  (x_start + box_width, y_start + box_height), 
                  (0, 0, 0), -1)
    frame = cv2.addWeighted(overlay, 0.7, frame, 0.3, 0)
    
    # Bordure
    cv2.rectangle(frame, (x_start, y_start), 
                  (x_start + box_width, y_start + box_height), 
                  (255, 255, 255), 3)
    
    # Position du texte
    x_text = x_start + 15
    y_text = y_start + 40
    line_height = 35
    
    # Police et taille
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 0.7
    thickness = 2
    
    # Couleurs
    color_white = (255, 255, 255)
    color_green = (0, 255, 0)
    color_red = (0, 0, 255)
    color_orange = (0, 165, 255)
    color_gray = (180, 180, 180)
    
    # === ACTION ===
    action_emoji = "WALKING" if action == "WALKING" else "STANDING"
    action_color = color_green if action == "WALKING" else color_orange
    
    cv2.putText(frame, f"ACTION: {action_emoji}", 
                (x_text, y_text), font, font_scale, action_color, thickness)
    cv2.putText(frame, f"Probabilite: {walking_prob:.1%}", 
                (x_text + 20, y_text + line_height), font, 0.6, color_gray, 1)
    
    y_text += line_height * 2 + 10
    
    # === TRAVERSÉE ===
    cross_emoji = "CROSSING" if cross == "CROSSING" else "NOT-CROSSING"
    cross_color = color_green if cross == "CROSSING" else color_red
    
    cv2.putText(frame, f"TRAVERSEE: {cross_emoji}", 
                (x_text, y_text), font, font_scale, cross_color, thickness)
    cv2.putText(frame, f"Probabilite: {crossing_prob:.1%}", 
                (x_text + 20, y_text + line_height), font, 0.6, color_gray, 1)
    
    y_text += line_height * 2 + 10
    
    # === REGARD ===
    look_emoji = "LOOKING" if look == "LOOKING" else "NOT-LOOKING"
    look_color = color_green if look == "LOOKING" else color_red
    
    cv2.putText(frame, f"REGARD: {look_emoji}", 
                (x_text, y_text), font, font_scale, look_color, thickness)
    cv2.putText(frame, f"Probabilite: {looking_prob:.1%}", 
                (x_text + 20, y_text + line_height), font, 0.6, color_gray, 1)
    
    return frame

def predict_video_with_overlay(video_path, model, threshold=0.5, max_frames=100, 
                                output_video="output_annotated.mp4",
                                output_gif="output_annotated.gif"):
    """
    Analyser la vidéo et créer une version annotée + GIF
    """
    
    print("=" * 70)
    print(f" ANALYSE DE VIDÉO AVEC OVERLAY".center(70))
    print("=" * 70)
    print(f" Fichier: {os.path.basename(video_path)}")
    
    # Ouvrir la vidéo
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        cap = cv2.VideoCapture(video_path, cv2.CAP_FFMPEG)
        if not cap.isOpened():
            print(" Erreur: Impossible d'ouvrir la vidéo")
            return None
    
    # Propriétés vidéo
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    print(f" Résolution: {width}x{height} @ {fps}fps")
    print("=" * 70)
    
    # Créer le writer pour la vidéo de sortie
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video, fourcc, fps, (width, height))
    
    # Buffer et variables
    buffer = []
    frame_count = 0
    clip_count = 0
    history = []
    
    # Dernières prédictions (pour affichage continu)
    last_action = "STANDING"
    last_cross = "NOT-CROSSING"
    last_look = "NOT-LOOKING"
    last_walking_prob = 0.0
    last_crossing_prob = 0.0
    last_looking_prob = 0.0
    
    # Frames pour le GIF
    gif_frames = []
    
    print("Traitement en cours...")
    
    while frame_count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        
        # Bounding box
        x1, y1, x2, y2 = get_default_bbox(frame)
        crop = frame[y1:y2, x1:x2]
        
        # Ajouter au buffer (sans dessiner la bounding box)
        buffer.append(crop)
        if len(buffer) > 16:
            buffer.pop(0)
        
        # Faire la prédiction quand on a 16 frames
        if len(buffer) == 16:
            clip = get_clip_from_buffer(buffer)
            clip = np.expand_dims(clip, axis=0)
            
            # Prédiction
            pred = model.predict(clip, verbose=0)
            
            # Extraire les probabilités
            last_walking_prob = float(pred[0][0]) if pred.ndim == 2 else float(pred[0])
            last_crossing_prob = float(pred[0][1]) if pred.ndim == 2 else float(pred[1])
            last_looking_prob = float(pred[0][2]) if pred.ndim == 2 else float(pred[2])
            
            # Déterminer les actions
            last_action = "WALKING" if last_walking_prob > threshold else "STANDING"
            last_cross = "CROSSING" if last_crossing_prob > threshold else "NOT-CROSSING"
            last_look = "LOOKING" if last_looking_prob > threshold else "NOT-LOOKING"
            
            # Stocker l'historique
            history.append({
                'clip': clip_count,
                'action': last_action,
                'cross': last_cross,
                'look': last_look,
                'probs': [last_walking_prob, last_crossing_prob, last_looking_prob]
            })
            
            clip_count += 1
            
            print(f" Clip {clip_count}: {last_action} | {last_cross} | {last_look}")
        
        # Dessiner les résultats sur la frame
        annotated_frame = draw_results_on_frame(
            frame, last_action, last_cross, last_look,
            last_walking_prob, last_crossing_prob, last_looking_prob
        )
        
        # Écrire dans la vidéo de sortie
        out.write(annotated_frame)
        
        # Ajouter au GIF (une frame sur 3 pour réduire la taille)
        if frame_count % 3 == 0:
            # Convertir BGR à RGB pour PIL
            rgb_frame = cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB)
            # Redimensionner pour le GIF (plus petit)
            small_frame = cv2.resize(rgb_frame, (width//2, height//2))
            gif_frames.append(Image.fromarray(small_frame))
        
        frame_count += 1
    
    cap.release()
    out.release()
    
    print(f"\n Vidéo annotée sauvegardée: {output_video}")
    
    # Créer le GIF
    if gif_frames:
        print(f" Création du GIF...")
        gif_frames[0].save(
            output_gif,
            save_all=True,
            append_images=gif_frames[1:],
            duration=int(1000/fps)*3,  # durée en ms
            loop=0
        )
        print(f"GIF créé: {output_gif}")
    
    # Afficher le résumé
    print("\n" + "=" * 70)
    print(" RÉSUMÉ FINAL")
    print("=" * 70)
    
    if history:
        from collections import Counter
        actions = [h['action'] for h in history]
        crosses = [h['cross'] for h in history]
        looks = [h['look'] for h in history]
        
        total = len(history)
        
        print(f"\n ACTION ({total} clips):")
        print(f"   WALKING: {actions.count('WALKING')} ({actions.count('WALKING')/total*100:.1f}%)")
        print(f"   STANDING: {actions.count('STANDING')} ({actions.count('STANDING')/total*100:.1f}%)")
        
        print(f"\n TRAVERSÉE:")
        print(f"   CROSSING: {crosses.count('CROSSING')} ({crosses.count('CROSSING')/total*100:.1f}%)")
        print(f"   NOT-CROSSING: {crosses.count('NOT-CROSSING')} ({crosses.count('NOT-CROSSING')/total*100:.1f}%)")
        
        print(f"\n REGARD:")
        print(f"   LOOKING: {looks.count('LOOKING')} ({looks.count('LOOKING')/total*100:.1f}%)")
        print(f"   NOT-LOOKING: {looks.count('NOT-LOOKING')} ({looks.count('NOT-LOOKING')/total*100:.1f}%)")
        
        # Conclusion
        final_action = Counter(actions).most_common(1)[0][0]
        final_cross = Counter(crosses).most_common(1)[0][0]
        final_look = Counter(looks).most_common(1)[0][0]
        
        print(f"\n CONCLUSION:")
        print(f"   Action: {final_action}")
        print(f"   Traversée: {final_cross}")
        print(f"   Regard: {final_look}")
    
    print("=" * 70 + "\n")
    
    return history

# Exécuter
video_path = "/kaggle/working/JAAD_fixed/JAAD_clips/JAAD_clips/video_0186.mp4"
output_video = "/kaggle/working/output_annotated.mp4"
output_gif = "/kaggle/working/output_annotated.gif"

print(" Lancement de l'analyse avec overlay vidéo...")
history = predict_video_with_overlay(
    video_path, 
    model, 
    threshold=0.5, 
    max_frames=100,
    output_video=output_video,
    output_gif=output_gif
)

 Lancement de l'analyse avec overlay vidéo...
                     ANALYSE DE VIDÉO AVEC OVERLAY                    
 Fichier: video_0186.mp4
 Résolution: 1920x1080 @ 30fps
Traitement en cours...
 Clip 1: WALKING | NOT-CROSSING | NOT-LOOKING
 Clip 2: WALKING | NOT-CROSSING | NOT-LOOKING
 Clip 3: WALKING | NOT-CROSSING | NOT-LOOKING
 Clip 4: WALKING | NOT-CROSSING | NOT-LOOKING
 Clip 5: WALKING | NOT-CROSSING | NOT-LOOKING
 Clip 6: WALKING | NOT-CROSSING | NOT-LOOKING
 Clip 7: WALKING | NOT-CROSSING | NOT-LOOKING
 Clip 8: WALKING | NOT-CROSSING | NOT-LOOKING
 Clip 9: WALKING | NOT-CROSSING | NOT-LOOKING
 Clip 10: WALKING | NOT-CROSSING | NOT-LOOKING
 Clip 11: WALKING | NOT-CROSSING | NOT-LOOKING
 Clip 12: WALKING | NOT-CROSSING | NOT-LOOKING
 Clip 13: WALKING | NOT-CROSSING | NOT-LOOKING
 Clip 14: WALKING | NOT-CROSSING | NOT-LOOKING
 Clip 15: WALKING | NOT-CROSSING | NOT-LOOKING
 Clip 16: WALKING | NOT-CROSSING | NOT-LOOKING
 Clip 17: WALKING | NOT-CROSSING | NOT-LOOKING
 Clip 18: WALK

In [None]:
import cv2
import numpy as np
import tensorflow as tf
import os
from PIL import Image, ImageDraw, ImageFont

# Charger le modèle
model = tf.keras.models.load_model(
    "/kaggle/working/i3d_jaad_model_2.keras",
    compile=False
)

def get_default_bbox(frame):
    """Définir une bounding box par défaut"""
    height, width = frame.shape[:2]
    bbox_width = width // 3
    bbox_height = height // 3
    x1 = (width - bbox_width) // 2
    y1 = (height - bbox_height) // 2
    x2 = x1 + bbox_width
    y2 = y1 + bbox_height
    return int(x1), int(y1), int(x2), int(y2)

def get_clip_from_buffer(frame_buffer, size=(224, 224)):
    """Préparer le clip pour I3D"""
    clip = []
    for frame in frame_buffer:
        frame = cv2.resize(frame, size)
        frame = frame / 255.0
        clip.append(frame)
    return np.array(clip)

def draw_results_on_frame(frame, action, cross, look, walking_prob, crossing_prob, looking_prob):
    """Dessiner les résultats sur la frame"""
    # Créer une copie
    overlay = frame.copy()
    h, w = frame.shape[:2]
    
    # Zone semi-transparente pour le texte
    box_height = 280
    box_width = 500
    x_start = 20
    y_start = 20
    
    # Fond semi-transparent
    cv2.rectangle(overlay, (x_start, y_start), 
                  (x_start + box_width, y_start + box_height), 
                  (0, 0, 0), -1)
    frame = cv2.addWeighted(overlay, 0.7, frame, 0.3, 0)
    
    # Bordure
    cv2.rectangle(frame, (x_start, y_start), 
                  (x_start + box_width, y_start + box_height), 
                  (255, 255, 255), 3)
    
    # Position du texte
    x_text = x_start + 15
    y_text = y_start + 40
    line_height = 35
    
    # Police et taille
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 0.7
    thickness = 2
    
    # Couleurs
    color_white = (255, 255, 255)
    color_green = (0, 255, 0)
    color_red = (0, 0, 255)
    color_orange = (0, 165, 255)
    color_gray = (180, 180, 180)
    
    # === ACTION ===
    action_emoji = "WALKING" if action == "WALKING" else "STANDING"
    action_color = color_green if action == "WALKING" else color_orange
    
    cv2.putText(frame, f"ACTION: {action_emoji}", 
                (x_text, y_text), font, font_scale, action_color, thickness)
    cv2.putText(frame, f"Probabilite: {walking_prob:.1%}", 
                (x_text + 20, y_text + line_height), font, 0.6, color_gray, 1)
    
    y_text += line_height * 2 + 10
    
    # === TRAVERSÉE ===
    cross_emoji = "CROSSING" if cross == "CROSSING" else "NOT-CROSSING"
    cross_color = color_green if cross == "CROSSING" else color_red
    
    cv2.putText(frame, f"TRAVERSEE: {cross_emoji}", 
                (x_text, y_text), font, font_scale, cross_color, thickness)
    cv2.putText(frame, f"Probabilite: {crossing_prob:.1%}", 
                (x_text + 20, y_text + line_height), font, 0.6, color_gray, 1)
    
    y_text += line_height * 2 + 10
    
    # === REGARD ===
    look_emoji = "LOOKING" if look == "LOOKING" else "NOT-LOOKING"
    look_color = color_green if look == "LOOKING" else color_red
    
    cv2.putText(frame, f"REGARD: {look_emoji}", 
                (x_text, y_text), font, font_scale, look_color, thickness)
    cv2.putText(frame, f"Probabilite: {looking_prob:.1%}", 
                (x_text + 20, y_text + line_height), font, 0.6, color_gray, 1)
    
    return frame

def predict_video_with_overlay(video_path, model, threshold=0.5, max_frames=100, 
                                output_video="output_annotated.mp4",
                                output_gif="output_annotated.gif"):
    """
    Analyser la vidéo et créer une version annotée + GIF
    """
    
    print("=" * 70)
    print(f" ANALYSE DE VIDÉO AVEC OVERLAY".center(70))
    print("=" * 70)
    print(f"Fichier: {os.path.basename(video_path)}")
    
    # Ouvrir la vidéo
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        cap = cv2.VideoCapture(video_path, cv2.CAP_FFMPEG)
        if not cap.isOpened():
            print(" Erreur: Impossible d'ouvrir la vidéo")
            return None
    
    # Propriétés vidéo
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    print(f" Résolution: {width}x{height} @ {fps}fps")
    print("=" * 70)
    
    # Créer le writer pour la vidéo de sortie
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video, fourcc, fps, (width, height))
    
    # Buffer et variables
    buffer = []
    frame_count = 0
    clip_count = 0
    history = []
    
    # Dernières prédictions (pour affichage continu)
    last_action = "STANDING"
    last_cross = "NOT-CROSSING"
    last_look = "NOT-LOOKING"
    last_walking_prob = 0.0
    last_crossing_prob = 0.0
    last_looking_prob = 0.0
    
    # Frames pour le GIF
    gif_frames = []
    
    print(" Traitement en cours...")
    
    while frame_count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        
        # Bounding box
        x1, y1, x2, y2 = get_default_bbox(frame)
        crop = frame[y1:y2, x1:x2]
        
        # Ajouter au buffer (sans dessiner la bounding box)
        buffer.append(crop)
        if len(buffer) > 16:
            buffer.pop(0)
        
        # Faire la prédiction quand on a 16 frames
        if len(buffer) == 16:
            clip = get_clip_from_buffer(buffer)
            clip = np.expand_dims(clip, axis=0)
            
            # Prédiction
            pred = model.predict(clip, verbose=0)
            
            # Extraire les probabilités
            last_walking_prob = float(pred[0][0]) if pred.ndim == 2 else float(pred[0])
            last_crossing_prob = float(pred[0][1]) if pred.ndim == 2 else float(pred[1])
            last_looking_prob = float(pred[0][2]) if pred.ndim == 2 else float(pred[2])
            
            # Déterminer les actions
            last_action = "WALKING" if last_walking_prob > threshold else "STANDING"
            last_cross = "CROSSING" if last_crossing_prob > threshold else "NOT-CROSSING"
            last_look = "LOOKING" if last_looking_prob > threshold else "NOT-LOOKING"
            
            # Stocker l'historique
            history.append({
                'clip': clip_count,
                'action': last_action,
                'cross': last_cross,
                'look': last_look,
                'probs': [last_walking_prob, last_crossing_prob, last_looking_prob]
            })
            
            clip_count += 1
            
            print(f" Clip {clip_count}: {last_action} | {last_cross} | {last_look}")
        
        # Dessiner les résultats sur la frame
        annotated_frame = draw_results_on_frame(
            frame, last_action, last_cross, last_look,
            last_walking_prob, last_crossing_prob, last_looking_prob
        )
        
        # Écrire dans la vidéo de sortie
        out.write(annotated_frame)
        
        # Ajouter au GIF (une frame sur 3 pour réduire la taille)
        if frame_count % 3 == 0:
            # Convertir BGR à RGB pour PIL
            rgb_frame = cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB)
            # Redimensionner pour le GIF (plus petit)
            small_frame = cv2.resize(rgb_frame, (width//2, height//2))
            gif_frames.append(Image.fromarray(small_frame))
        
        frame_count += 1
    
    cap.release()
    out.release()
    
    print(f"\n Vidéo annotée sauvegardée: {output_video}")
    
    # Créer le GIF
    if gif_frames:
        print(f" Création du GIF...")
        gif_frames[0].save(
            output_gif,
            save_all=True,
            append_images=gif_frames[1:],
            duration=int(1000/fps)*3,  # durée en ms
            loop=0
        )
        print(f" GIF créé: {output_gif}")
    
    # Afficher le résumé
    print("\n" + "=" * 70)
    print(" RÉSUMÉ FINAL")
    print("=" * 70)
    
    if history:
        from collections import Counter
        actions = [h['action'] for h in history]
        crosses = [h['cross'] for h in history]
        looks = [h['look'] for h in history]
        
        total = len(history)
        
        print(f"\n ACTION ({total} clips):")
        print(f"   WALKING: {actions.count('WALKING')} ({actions.count('WALKING')/total*100:.1f}%)")
        print(f"   STANDING: {actions.count('STANDING')} ({actions.count('STANDING')/total*100:.1f}%)")
        
        print(f"\n TRAVERSÉE:")
        print(f"   CROSSING: {crosses.count('CROSSING')} ({crosses.count('CROSSING')/total*100:.1f}%)")
        print(f"   NOT-CROSSING: {crosses.count('NOT-CROSSING')} ({crosses.count('NOT-CROSSING')/total*100:.1f}%)")
        
        print(f"\n REGARD:")
        print(f"   LOOKING: {looks.count('LOOKING')} ({looks.count('LOOKING')/total*100:.1f}%)")
        print(f"   NOT-LOOKING: {looks.count('NOT-LOOKING')} ({looks.count('NOT-LOOKING')/total*100:.1f}%)")
        
        # Conclusion
        final_action = Counter(actions).most_common(1)[0][0]
        final_cross = Counter(crosses).most_common(1)[0][0]
        final_look = Counter(looks).most_common(1)[0][0]
        
        print(f"\n CONCLUSION:")
        print(f"   Action: {final_action}")
        print(f"   Traversée: {final_cross}")
        print(f"   Regard: {final_look}")
    
    print("=" * 70 + "\n")
    
    return history

def display_video_in_kaggle(video_path):
    """Afficher la vidéo dans Kaggle Notebook"""
    from IPython.display import Video, display
    
    print(" Affichage de la vidéo annotée...")
    display(Video(video_path, embed=True, width=800))

def display_gif_in_kaggle(gif_path):
    """Afficher le GIF dans Kaggle Notebook"""
    from IPython.display import Image as IPImage, display
    
    print(" Affichage du GIF...")
    display(IPImage(filename=gif_path))

# Exécuter
video_path = "/kaggle/working/JAAD_fixed/JAAD_clips/JAAD_clips/video_0186.mp4"
output_video = "/kaggle/working/output_annotated6.mp4"
output_gif = "/kaggle/working/output_annotated6.gif"

print(" Lancement de l'analyse avec overlay vidéo...")
history = predict_video_with_overlay(
    video_path, 
    model, 
    threshold=0.5, 
    max_frames=100,
    output_video=output_video,
    output_gif=output_gif
)

# Afficher la vidéo et le GIF dans le notebook
print("\n" + "=" * 70)
print("AFFICHAGE DES RÉSULTATS")
print("=" * 70)

# Afficher la vidéo annotée
display_video_in_kaggle(output_video)

print("\n")

# Afficher le GIF
display_gif_in_kaggle(output_gif)

In [None]:
import cv2
import numpy as np
import tensorflow as tf
import os
from PIL import Image, ImageDraw, ImageFont

# Charger le modèle
model = tf.keras.models.load_model(
    "/kaggle/working/i3d_jaad_model_2.keras",
    compile=False
)

def get_default_bbox(frame):
    """Définir une bounding box par défaut"""
    height, width = frame.shape[:2]
    bbox_width = width // 3
    bbox_height = height // 3
    x1 = (width - bbox_width) // 2
    y1 = (height - bbox_height) // 2
    x2 = x1 + bbox_width
    y2 = y1 + bbox_height
    return int(x1), int(y1), int(x2), int(y2)

def get_clip_from_buffer(frame_buffer, size=(224, 224)):
    """Préparer le clip pour I3D"""
    clip = []
    for frame in frame_buffer:
        frame = cv2.resize(frame, size)
        frame = frame / 255.0
        clip.append(frame)
    return np.array(clip)

def draw_results_on_frame(frame, action, cross, look, walking_prob, crossing_prob, looking_prob):
    """Dessiner les résultats sur la frame"""
    # Créer une copie
    overlay = frame.copy()
    h, w = frame.shape[:2]
    
    # Zone semi-transparente pour le texte
    box_height = 280
    box_width = 500
    x_start = 20
    y_start = 20
    
    # Fond semi-transparent
    cv2.rectangle(overlay, (x_start, y_start), 
                  (x_start + box_width, y_start + box_height), 
                  (0, 0, 0), -1)
    frame = cv2.addWeighted(overlay, 0.7, frame, 0.3, 0)
    
    # Bordure
    cv2.rectangle(frame, (x_start, y_start), 
                  (x_start + box_width, y_start + box_height), 
                  (255, 255, 255), 3)
    
    # Position du texte
    x_text = x_start + 15
    y_text = y_start + 40
    line_height = 35
    
    # Police et taille
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 0.7
    thickness = 2
    
    # Couleurs
    color_white = (255, 255, 255)
    color_green = (0, 255, 0)
    color_red = (0, 0, 255)
    color_orange = (0, 165, 255)
    color_gray = (180, 180, 180)
    
    # === ACTION ===
    action_emoji = "WALKING" if action == "WALKING" else "STANDING"
    action_color = color_green if action == "WALKING" else color_orange
    
    cv2.putText(frame, f"ACTION: {action_emoji}", 
                (x_text, y_text), font, font_scale, action_color, thickness)
    cv2.putText(frame, f"Probabilite: {walking_prob:.1%}", 
                (x_text + 20, y_text + line_height), font, 0.6, color_gray, 1)
    
    y_text += line_height * 2 + 10
    
    # === TRAVERSÉE ===
    cross_emoji = "CROSSING" if cross == "CROSSING" else "NOT-CROSSING"
    cross_color = color_green if cross == "CROSSING" else color_red
    
    cv2.putText(frame, f"TRAVERSEE: {cross_emoji}", 
                (x_text, y_text), font, font_scale, cross_color, thickness)
    cv2.putText(frame, f"Probabilite: {crossing_prob:.1%}", 
                (x_text + 20, y_text + line_height), font, 0.6, color_gray, 1)
    
    y_text += line_height * 2 + 10
    
    # === REGARD ===
    look_emoji = "LOOKING" if look == "LOOKING" else "NOT-LOOKING"
    look_color = color_green if look == "LOOKING" else color_red
    
    cv2.putText(frame, f"REGARD: {look_emoji}", 
                (x_text, y_text), font, font_scale, look_color, thickness)
    cv2.putText(frame, f"Probabilite: {looking_prob:.1%}", 
                (x_text + 20, y_text + line_height), font, 0.6, color_gray, 1)
    
    return frame

def predict_video_with_overlay(video_path, model, threshold=0.5, max_frames=100, 
                                output_video="output_annotated.mp4",
                                output_gif="output_annotated.gif"):
    """
    Analyser la vidéo et créer une version annotée + GIF
    """
    
    print("=" * 70)
    print(f" ANALYSE DE VIDÉO AVEC OVERLAY".center(70))
    print("=" * 70)
    print(f" Fichier: {os.path.basename(video_path)}")
    
    # Ouvrir la vidéo
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        cap = cv2.VideoCapture(video_path, cv2.CAP_FFMPEG)
        if not cap.isOpened():
            print(" Erreur: Impossible d'ouvrir la vidéo")
            return None
    
    # Propriétés vidéo
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    print(f" Résolution: {width}x{height} @ {fps}fps")
    print("=" * 70)
    
    # Créer le writer pour la vidéo de sortie
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video, fourcc, fps, (width, height))
    
    # Buffer et variables
    buffer = []
    frame_count = 0
    clip_count = 0
    history = []
    
    # Dernières prédictions (pour affichage continu)
    last_action = "STANDING"
    last_cross = "NOT-CROSSING"
    last_look = "NOT-LOOKING"
    last_walking_prob = 0.0
    last_crossing_prob = 0.0
    last_looking_prob = 0.0
    
    # Frames pour le GIF
    gif_frames = []
    
    print(" Traitement en cours...")
    
    while frame_count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        
        # Bounding box
        x1, y1, x2, y2 = get_default_bbox(frame)
        crop = frame[y1:y2, x1:x2]
        
        # Ajouter au buffer (sans dessiner la bounding box)
        buffer.append(crop)
        if len(buffer) > 16:
            buffer.pop(0)
        
        # Faire la prédiction quand on a 16 frames
        if len(buffer) == 16:
            clip = get_clip_from_buffer(buffer)
            clip = np.expand_dims(clip, axis=0)
            
            # Prédiction
            pred = model.predict(clip, verbose=0)
            
            # Extraire les probabilités
            last_walking_prob = float(pred[0][0]) if pred.ndim == 2 else float(pred[0])
            last_crossing_prob = float(pred[0][1]) if pred.ndim == 2 else float(pred[1])
            last_looking_prob = float(pred[0][2]) if pred.ndim == 2 else float(pred[2])
            
            # Déterminer les actions
            last_action = "WALKING" if last_walking_prob > threshold else "STANDING"
            last_cross = "CROSSING" if last_crossing_prob > threshold else "NOT-CROSSING"
            last_look = "LOOKING" if last_looking_prob > threshold else "NOT-LOOKING"
            
            # Stocker l'historique
            history.append({
                'clip': clip_count,
                'action': last_action,
                'cross': last_cross,
                'look': last_look,
                'probs': [last_walking_prob, last_crossing_prob, last_looking_prob]
            })
            
            clip_count += 1
            
            print(f" Clip {clip_count}: {last_action} | {last_cross} | {last_look}")
        
        # Dessiner les résultats sur la frame
        annotated_frame = draw_results_on_frame(
            frame, last_action, last_cross, last_look,
            last_walking_prob, last_crossing_prob, last_looking_prob
        )
        
        # Écrire dans la vidéo de sortie
        out.write(annotated_frame)
        
        # Ajouter au GIF (une frame sur 3 pour réduire la taille)
        if frame_count % 3 == 0:
            # Convertir BGR à RGB pour PIL
            rgb_frame = cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB)
            # Redimensionner pour le GIF (plus petit)
            small_frame = cv2.resize(rgb_frame, (width//2, height//2))
            gif_frames.append(Image.fromarray(small_frame))
        
        frame_count += 1
    
    cap.release()
    out.release()
    
    print(f"\nVidéo annotée sauvegardée: {output_video}")
    
    # Créer le GIF
    if gif_frames:
        print(f" Création du GIF avec {len(gif_frames)} frames...")
        try:
            gif_frames[0].save(
                output_gif,
                save_all=True,
                append_images=gif_frames[1:],
                duration=int(1000/fps)*3,  # durée en ms
                loop=0,
                optimize=False
            )
            print(f" GIF créé avec succès: {output_gif}")
            print(f"   Taille du fichier: {os.path.getsize(output_gif) / (1024*1024):.2f} MB")
        except Exception as e:
            print(f" Erreur lors de la création du GIF: {e}")
            output_gif = None
    else:
        print(" Aucune frame pour créer le GIF")
        output_gif = None
    
    # Afficher le résumé
    print("\n" + "=" * 70)
    print(" RÉSUMÉ FINAL")
    print("=" * 70)
    
    if history:
        from collections import Counter
        actions = [h['action'] for h in history]
        crosses = [h['cross'] for h in history]
        looks = [h['look'] for h in history]
        
        total = len(history)
        
        print(f"\n ACTION ({total} clips):")
        print(f"   WALKING: {actions.count('WALKING')} ({actions.count('WALKING')/total*100:.1f}%)")
        print(f"   STANDING: {actions.count('STANDING')} ({actions.count('STANDING')/total*100:.1f}%)")
        
        print(f"\n TRAVERSÉE:")
        print(f"   CROSSING: {crosses.count('CROSSING')} ({crosses.count('CROSSING')/total*100:.1f}%)")
        print(f"   NOT-CROSSING: {crosses.count('NOT-CROSSING')} ({crosses.count('NOT-CROSSING')/total*100:.1f}%)")
        
        print(f"\n REGARD:")
        print(f"   LOOKING: {looks.count('LOOKING')} ({looks.count('LOOKING')/total*100:.1f}%)")
        print(f"   NOT-LOOKING: {looks.count('NOT-LOOKING')} ({looks.count('NOT-LOOKING')/total*100:.1f}%)")
        
        # Conclusion
        final_action = Counter(actions).most_common(1)[0][0]
        final_cross = Counter(crosses).most_common(1)[0][0]
        final_look = Counter(looks).most_common(1)[0][0]
        
        print(f"\n CONCLUSION:")
        print(f"   Action: {final_action}")
        print(f"   Traversée: {final_cross}")
        print(f"   Regard: {final_look}")
    
    print("=" * 70 + "\n")
    
    return history

def display_video_in_kaggle(video_path):
    """Afficher la vidéo dans Kaggle Notebook"""
    from IPython.display import Video, display
    
    print("Affichage de la vidéo annotée...")
    display(Video(video_path, embed=True, width=800))

def display_gif_in_kaggle(gif_path):
    """Afficher le GIF dans Kaggle Notebook"""
    from IPython.display import Image as IPImage, display
    from IPython.display import FileLink
    
    print(" Affichage du GIF...")
    
    # Vérifier si le fichier existe
    if os.path.exists(gif_path):
        # Afficher le GIF
        display(IPImage(filename=gif_path))
        print("\n Télécharger le GIF:")
        display(FileLink(gif_path))
    else:
        print(f" Erreur: Le fichier {gif_path} n'existe pas")

# Exécuter
video_path = "/kaggle/working/JAAD_fixed/JAAD_clips/JAAD_clips/video_0186.mp4"
output_video = "/kaggle/working/output_annotated.mp4"
output_gif = "/kaggle/working/output_annotated.gif"

print(" Lancement de l'analyse avec overlay vidéo...")
history = predict_video_with_overlay(
    video_path, 
    model, 
    threshold=0.5, 
    max_frames=100,
    output_video=output_video,
    output_gif=output_gif
)

# Afficher la vidéo et le GIF dans le notebook
print("\n" + "=" * 70)
print(" AFFICHAGE DES RÉSULTATS")
print("=" * 70)

# Afficher la vidéo annotée
display_video_in_kaggle(output_video)

print("\n")

# Afficher le GIF seulement s'il a été créé
if output_gif and os.path.exists(output_gif):
    display_gif_in_kaggle(output_gif)
else:
    print("Le GIF n'a pas pu être créé")

In [None]:
import cv2
import numpy as np
import tensorflow as tf
import os
import imageio.v2 as imageio
from IPython.display import display, Image
import matplotlib.pyplot as plt

# Charger le modèle
model = tf.keras.models.load_model(
    "/kaggle/working/i3d_jaad_model_2.keras",
    compile=False
)
video_path='/kaggle/working/JAAD_fixed/JAAD_clips/JAAD_clips/video_0186.mp4'

# Fonctions auxiliaires
def get_default_bbox(frame):
    """Définir une bounding box par défaut"""
    height, width = frame.shape[:2]
    bbox_width = width // 3
    bbox_height = height // 3
    x1 = (width - bbox_width) // 2
    y1 = (height - bbox_height) // 2
    x2 = x1 + bbox_width
    y2 = y1 + bbox_height
    return int(x1), int(y1), int(x2), int(y2)

def get_clip_from_buffer(frame_buffer, size=(224, 224)):
    """Préparer le clip pour I3D"""
    clip = []
    for frame in frame_buffer:
        frame = cv2.resize(frame, size)
        frame = frame / 255.0
        clip.append(frame)
    return np.array(clip)  # (16, 224, 224, 3)

def create_prediction_gif(video_path, model, output_gif="prediction.gif", num_frames=50, threshold=0.5):
    """Créer un GIF avec les prédictions en temps réel"""
    
    print(f" Création du GIF des prédictions...")
    
    # Ouvrir la vidéo
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(" Erreur: Impossible d'ouvrir la vidéo pour le GIF")
        return None
    
    # Buffer pour les prédictions
    buffer = []
    frames_for_gif = []
    predictions_history = []
    
    frame_count = 0
    clip_count = 0
    
    while frame_count < num_frames:
        ret, frame = cap.read()
        if not ret:
            break
        
        # Copie pour annotation
        frame_annotated = frame.copy()
        
        # Bounding box
        x1, y1, x2, y2 = get_default_bbox(frame)
        crop = frame[y1:y2, x1:x2]
        
        # Dessiner la bounding box
        cv2.rectangle(frame_annotated, (x1, y1), (x2, y2), (0, 255, 0), 3)
        
        # Ajouter au buffer
        buffer.append(crop)
        if len(buffer) > 16:
            buffer.pop(0)
        
        # Faire la prédiction quand on a 16 frames
        if len(buffer) == 16:
            clip = get_clip_from_buffer(buffer)
            clip = np.expand_dims(clip, axis=0)  # (1, 16, 224, 224, 3)
            
            # Prédiction
            pred = model.predict(clip, verbose=0)
            
            # Extraire les probabilités
            if pred.ndim == 2:
                walking_prob = float(pred[0][0])
                crossing_prob = float(pred[0][1])
                looking_prob = float(pred[0][2])
            else:
                walking_prob = float(pred[0])
                crossing_prob = float(pred[1])
                looking_prob = float(pred[2])
            
            # Déterminer les actions
            action = "WALKING" if walking_prob > threshold else "STANDING"
            cross = "CROSSING" if crossing_prob > threshold else "NOT-CROSSING"
            look = "LOOKING" if looking_prob > threshold else "NOT-LOOKING"
            
            # Stocker l'historique
            predictions_history.append({
                'frame': frame_count,
                'action': action,
                'cross': cross,
                'look': look,
                'probs': [walking_prob, crossing_prob, looking_prob]
            })
            
            # Couleurs (vert pour oui, rouge pour non)
            action_color = (0, 255, 0) if action == "WALKING" else (0, 0, 255)
            cross_color = (0, 255, 0) if cross == "CROSSING" else (0, 0, 255)
            look_color = (0, 255, 0) if look == "LOOKING" else (0, 0, 255)
            
            # Ajouter un fond semi-transparent pour le texte
            overlay = frame_annotated.copy()
            cv2.rectangle(overlay, (10, 10), (350, 150), (0, 0, 0), -1)
            cv2.addWeighted(overlay, 0.6, frame_annotated, 0.4, 0, frame_annotated)
            
            # Ajouter les textes avec les prédictions
            y_pos = 40
            cv2.putText(frame_annotated, f"Clip #{clip_count + 1}", (20, y_pos), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
            y_pos += 35
            
            cv2.putText(frame_annotated, f"Action: {action}", (20, y_pos), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.6, action_color, 2)
            cv2.putText(frame_annotated, f"({walking_prob:.2f})", (180, y_pos), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 200, 200), 1)
            y_pos += 30
            
            cv2.putText(frame_annotated, f"Crossing: {cross}", (20, y_pos), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.6, cross_color, 2)
            cv2.putText(frame_annotated, f"({crossing_prob:.2f})", (180, y_pos), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 200, 200), 1)
            y_pos += 30
            
            cv2.putText(frame_annotated, f"Looking: {look}", (20, y_pos), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.6, look_color, 2)
            cv2.putText(frame_annotated, f"({looking_prob:.2f})", (180, y_pos), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 200, 200), 1)
            
            clip_count += 1
        
        # Ajouter le numéro de frame
        cv2.putText(frame_annotated, f"Frame: {frame_count}", (frame.shape[1] - 150, 40), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
        
        # Ajouter une barre de progression en bas
        progress_width = int((frame_count / num_frames) * frame.shape[1])
        cv2.rectangle(frame_annotated, (0, frame.shape[0] - 20), 
                     (progress_width, frame.shape[0]), (0, 255, 0), -1)
        
        # Redimensionner pour le GIF
        frame_resized = cv2.resize(frame_annotated, (640, 360))
        frame_rgb = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2RGB)
        frames_for_gif.append(frame_rgb)
        
        frame_count += 1
    
    cap.release()
    
    # Créer le GIF
    if frames_for_gif:
        gif_path = f"/kaggle/working/{output_gif}"
        imageio.mimsave(gif_path, frames_for_gif, fps=10)
        
        print(f"GIF créé: {gif_path}")
        print(f"{len(frames_for_gif)} frames dans le GIF")
        print(f"{clip_count} clips analysés")
        
        # Afficher le GIF
        display(Image(filename=gif_path))
        
        # Afficher un résumé des prédictions
        if predictions_history:
            print("\n RÉSUMÉ DES PRÉDICTIONS DANS LE GIF:")
            print("=" * 60)
            
            # Compter les occurrences
            actions = [p['action'] for p in predictions_history]
            crosses = [p['cross'] for p in predictions_history]
            looks = [p['look'] for p in predictions_history]
            
            from collections import Counter
            
            action_counts = Counter(actions)
            cross_counts = Counter(crosses)
            look_counts = Counter(looks)
            
            print(f"\n ACTION: {action_counts}")
            print(f"CROSSING: {cross_counts}")
            print(f" LOOKING: {look_counts}")
            
            # Afficher les dernières prédictions
            print(f"\n DERNIÈRES PRÉDICTIONS:")
            for pred in predictions_history[-3:]:
                print(f"  Frame {pred['frame']}: {pred['action']} | {pred['cross']} | {pred['look']}")
    
    return gif_path if frames_for_gif else None

def predict_video_binary(video_path, model, threshold=0.5, max_frames=100):
    """
    Version avec sortie binaire : walking/standing, crossing/not-crossing, looking/not-looking
    """
    
    print(f" Analyse de: {video_path}")
    
    # Ouvrir la vidéo
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        cap = cv2.VideoCapture(video_path, cv2.CAP_FFMPEG)
        if not cap.isOpened():
            print(" Erreur: Impossible d'ouvrir la vidéo")
            return None
    
    # Propriétés vidéo
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    print(f" {width}x{height} @ {fps}fps")
    
    # Buffer et compteurs
    buffer = []
    frame_count = 0
    clip_count = 0
    
    # Historique des prédictions
    history = []
    
    print("\n DÉBUT DE L'ANALYSE:")
    print("=" * 50)
    
    # Traitement frame par frame
    while frame_count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        
        # Bounding box
        x1, y1, x2, y2 = get_default_bbox(frame)
        crop = frame[y1:y2, x1:x2]
        
        # Ajouter au buffer
        buffer.append(crop)
        if len(buffer) > 16:
            buffer.pop(0)
        
        # Quand on a 16 frames, faire la prédiction
        if len(buffer) == 16:
            clip = get_clip_from_buffer(buffer)
            clip = np.expand_dims(clip, axis=0)  # (1, 16, 224, 224, 3)
            
            # Prédiction
            pred = model.predict(clip, verbose=0)  # Shape: (1, 3) ou (3,)
            
            # Convertir en sortie binaire
            walking_prob = float(pred[0][0]) if pred.ndim == 2 else float(pred[0])
            crossing_prob = float(pred[0][1]) if pred.ndim == 2 else float(pred[1])
            looking_prob = float(pred[0][2]) if pred.ndim == 2 else float(pred[2])
            
            # Déterminer les actions
            action = "WALKING" if walking_prob > threshold else "STANDING"
            cross = "CROSSING" if crossing_prob > threshold else "NOT-CROSSING"
            look = "LOOKING" if looking_prob > threshold else "NOT-LOOKING"
            
            # Stocker dans l'historique
            history.append({
                'clip': clip_count,
                'frame': frame_count,
                'action': action,
                'cross': cross,
                'look': look,
                'probs': [walking_prob, crossing_prob, looking_prob]
            })
            
            # Afficher la prédiction
            print(f"\n CLIP {clip_count + 1} (Frame {frame_count}):")
            print(f"   Action: {action} (confiance: {walking_prob:.3f})")
            print(f"   Traversée: {cross} (confiance: {crossing_prob:.3f})")
            print(f"   Regard: {look} (confiance: {looking_prob:.3f})")
            
            clip_count += 1
        
        frame_count += 1
    
    cap.release()
    
    # Afficher le résumé
    print("\n" + "=" * 50)
    print(" RÉSUMÉ FINAL:")
    print("=" * 50)
    
    if history:
        # Compter les occurrences
        actions = [h['action'] for h in history]
        crosses = [h['cross'] for h in history]
        looks = [h['look'] for h in history]
        
        walking_count = actions.count("WALKING")
        standing_count = actions.count("STANDING")
        crossing_count = crosses.count("CROSSING")
        not_crossing_count = crosses.count("NOT-CROSSING")
        looking_count = looks.count("LOOKING")
        not_looking_count = looks.count("NOT-LOOKING")
        
        print(f"\n STATISTIQUES sur {len(history)} clips:")
        print(f"\n ACTION:")
        print(f"   Walking: {walking_count} clips ({walking_count/len(history)*100:.1f}%)")
        print(f"   Standing: {standing_count} clips ({standing_count/len(history)*100:.1f}%)")
        
        print(f"\n TRAVERSÉE:")
        print(f"   Crossing: {crossing_count} clips ({crossing_count/len(history)*100:.1f}%)")
        print(f"   Not-crossing: {not_crossing_count} clips ({not_crossing_count/len(history)*100:.1f}%)")
        
        print(f"\n REGARD:")
        print(f"   Looking: {looking_count} clips ({looking_count/len(history)*100:.1f}%)")
        print(f"   Not-looking: {not_looking_count} clips ({not_looking_count/len(history)*100:.1f}%)")
        
        # Décision finale (mode)
        from collections import Counter
        final_action = Counter(actions).most_common(1)[0][0]
        final_cross = Counter(crosses).most_common(1)[0][0]
        final_look = Counter(looks).most_common(1)[0][0]
        
        print("\n" + "=" * 50)
        print("CONCLUSION GLOBALE:")
        print(f"   Action principale: {final_action}")
        print(f"   État de traversée: {final_cross}")
        print(f"   Direction du regard: {final_look}")
        print("=" * 50)
    
    return history

# Fonction pour visualiser les résultats
def visualize_predictions(history):
    """Visualiser les prédictions sous forme de graphiques"""
    
    if not history:
        print(" Aucune donnée à visualiser")
        return
    
    # Extraire les données
    clips = [h['clip'] for h in history]
    actions_binary = [1 if h['action'] == "WALKING" else 0 for h in history]
    crosses_binary = [1 if h['cross'] == "CROSSING" else 0 for h in history]
    looks_binary = [1 if h['look'] == "LOOKING" else 0 for h in history]
    
    walking_probs = [h['probs'][0] for h in history]
    crossing_probs = [h['probs'][1] for h in history]
    looking_probs = [h['probs'][2] for h in history]
    
    # Créer la visualisation
    fig, axes = plt.subplots(2, 3, figsize=(18, 10))
    
    # Graphique 1: Probabilités brutes
    axes[0, 0].plot(clips, walking_probs, 'b-', label='Walking', linewidth=2, marker='o', markersize=4)
    axes[0, 0].plot(clips, crossing_probs, 'g-', label='Crossing', linewidth=2, marker='s', markersize=4)
    axes[0, 0].plot(clips, looking_probs, 'r-', label='Looking', linewidth=2, marker='^', markersize=4)
    axes[0, 0].axhline(y=0.5, color='k', linestyle='--', alpha=0.5, label='Seuil (0.5)')
    axes[0, 0].set_xlabel('Clip Index')
    axes[0, 0].set_ylabel('Probabilité')
    axes[0, 0].set_title('Probabilités brutes')
    axes[0, 0].legend()
    axes[0, 0].grid(True, alpha=0.3)
    axes[0, 0].set_ylim([0, 1])
    
    # Graphique 2: Évolution binaire
    axes[0, 1].step(clips, actions_binary, where='mid', label='Action', linewidth=3, color='blue')
    axes[0, 1].step(clips, crosses_binary, where='mid', label='Crossing', linewidth=3, color='green')
    axes[0, 1].step(clips, looks_binary, where='mid', label='Looking', linewidth=3, color='red')
    axes[0, 1].set_xlabel('Clip Index')
    axes[0, 1].set_ylabel('État (1=Oui, 0=Non)')
    axes[0, 1].set_title('Évolution binaire')
    axes[0, 1].legend()
    axes[0, 1].grid(True, alpha=0.3)
    axes[0, 1].set_yticks([0, 1])
    axes[0, 1].set_ylim([-0.1, 1.1])
    
    # Graphique 3: Heatmap des probabilités
    prob_matrix = np.array([walking_probs, crossing_probs, looking_probs]).T
    im1 = axes[0, 2].imshow(prob_matrix, aspect='auto', cmap='RdYlGn', 
                          vmin=0, vmax=1, interpolation='nearest')
    axes[0, 2].set_xlabel('Clip Index')
    axes[0, 2].set_ylabel('Catégorie')
    axes[0, 2].set_yticks(range(3))
    axes[0, 2].set_yticklabels(['Walking', 'Crossing', 'Looking'])
    axes[0, 2].set_title('Heatmap des probabilités')
    plt.colorbar(im1, ax=axes[0, 2], label='Probabilité')
    
    # Graphique 4: Distribution binaire
    categories = ['Walking', 'Crossing', 'Looking']
    yes_counts = [sum(actions_binary), sum(crosses_binary), sum(looks_binary)]
    no_counts = [len(actions_binary)-sum(actions_binary), 
                 len(crosses_binary)-sum(crosses_binary), 
                 len(looks_binary)-sum(looks_binary)]
    
    x = np.arange(len(categories))
    width = 0.35
    
    axes[1, 0].bar(x - width/2, yes_counts, width, label='Oui', color='green', alpha=0.7, edgecolor='black')
    axes[1, 0].bar(x + width/2, no_counts, width, label='Non', color='red', alpha=0.7, edgecolor='black')
    
    axes[1, 0].set_xlabel('Catégorie')
    axes[1, 0].set_ylabel('Nombre de clips')
    axes[1, 0].set_title('Distribution binaire')
    axes[1, 0].set_xticks(x)
    axes[1, 0].set_xticklabels(categories)
    axes[1, 0].legend()
    axes[1, 0].grid(True, alpha=0.3, axis='y')
    
    # Ajouter les pourcentages
    for i, (yes, no) in enumerate(zip(yes_counts, no_counts)):
        total = yes + no
        axes[1, 0].text(i - width/2, yes + 0.1, f'{yes}\n({yes/total*100:.0f}%)', 
                       ha='center', va='bottom', fontsize=9)
        axes[1, 0].text(i + width/2, no + 0.1, f'{no}\n({no/total*100:.0f}%)', 
                       ha='center', va='bottom', fontsize=9)
    
    # Graphique 5: Heatmap binaire
    binary_matrix = np.array([actions_binary, crosses_binary, looks_binary]).T
    im2 = axes[1, 1].imshow(binary_matrix, aspect='auto', cmap='RdYlGn', 
                          vmin=0, vmax=1, interpolation='nearest')
    axes[1, 1].set_xlabel('Clip Index')
    axes[1, 1].set_ylabel('Catégorie')
    axes[1, 1].set_yticks(range(3))
    axes[1, 1].set_yticklabels(['Walking', 'Crossing', 'Looking'])
    axes[1, 1].set_title('Heatmap binaire (Vert=Oui, Rouge=Non)')
    plt.colorbar(im2, ax=axes[1, 1], ticks=[0, 1])
    
    # Graphique 6: Tendances cumulatives
    cumulative_walking = np.cumsum(actions_binary) / (np.arange(len(actions_binary)) + 1)
    cumulative_crossing = np.cumsum(crosses_binary) / (np.arange(len(crosses_binary)) + 1)
    cumulative_looking = np.cumsum(looks_binary) / (np.arange(len(looks_binary)) + 1)
    
    axes[1, 2].plot(clips, cumulative_walking, label='Walking', linewidth=2, color='blue')
    axes[1, 2].plot(clips, cumulative_crossing, label='Crossing', linewidth=2, color='green')
    axes[1, 2].plot(clips, cumulative_looking, label='Looking', linewidth=2, color='red')
    axes[1, 2].axhline(y=0.5, color='k', linestyle='--', alpha=0.5, label='Seuil 50%')
    
    axes[1, 2].set_xlabel('Clip Index')
    axes[1, 2].set_ylabel('Proportion cumulative')
    axes[1, 2].set_title('Tendances cumulatives')
    axes[1, 2].legend()
    axes[1, 2].grid(True, alpha=0.3)
    axes[1, 2].set_ylim([0, 1])
    
    plt.suptitle('ANALYSE DES COMPORTEMENTS DE PIÉTON - JAAD DATASET', 
                fontsize=16, fontweight='bold', y=1.02)
    plt.tight_layout()
    plt.show()

# Exécuter l'analyse complète
video_path = "/kaggle/working/JAAD_fixed/JAAD_clips/JAAD_clips/video_0186.mp4"

print("=" * 70)
print(" LANCEMENT DE L'ANALYSE COMPLÈTE")
print("=" * 70)

# Étape 1: Analyse détaillée
print("\n ÉTAPE 1: Analyse détaillée des prédictions")
print("-" * 50)
history = predict_video_binary(video_path, model, threshold=0.5, max_frames=100)

# Étape 2: Création du GIF
print("\n ÉTAPE 2: Création du GIF des prédictions")
print("-" * 50)
gif_path = create_prediction_gif(video_path, model, "jaad_predictions.gif", num_frames=100, threshold=0.5)

# Étape 3: Visualisation graphique
if history:
    print("\n ÉTAPE 3: Visualisation graphique des résultats")
    print("-" * 50)
    visualize_predictions(history)
    
    # Étape 4: Sauvegarde des résultats
    print("\n ÉTAPE 4: Sauvegarde des résultats")
    print("-" * 50)
    
    # Sauvegarder l'historique en CSV
    import pandas as pd
    
    df = pd.DataFrame(history)
    csv_path = "/kaggle/working/jaad_predictions.csv"
    df.to_csv(csv_path, index=False)
    print(f" Historique sauvegardé: {csv_path}")
    
    # Afficher un aperçu des données
    print(f"\n Aperçu des données ({len(df)} enregistrements):")
    print(df.head())
    
    # Résumé statistique
    print("\n Statistiques descriptives:")
    print(df[['probs']].apply(lambda x: pd.Series(x[0]), axis=1).describe())
    
    # Lien de téléchargement
    from IPython.display import FileLink
    print(f"\n Télécharger les résultats:")
    display(FileLink(csv_path))
    if gif_path:
        display(FileLink(gif_path))

print("\n" + "=" * 70)
print(" ANALYSE TERMINÉE AVEC SUCCÈS!")
print("=" * 70)