# Gesture inference and ensemble demo

This notebook shows how to load the MIL model and a per-window deeper model, run inference on a bag of windows extracted from a clip (already preprocessed), and compute the ensemble prediction.

In [None]:
import numpy as np
import tensorflow as tf
import json
import os
from Code.dataset.windowed_generator import generate_windows_from_csv

# paths
MIL_MODEL = 'Models/gesture_wlasl_mil_finetuned_mil.keras'
DEEPER_MODEL = 'Models/gesture_wlasl_deeper_encinit_long.keras'
MIL_LABELS = 'Models/gesture_wlasl_mil_finetuned_mil_labels.json'
DEEPER_LABELS = 'Models/gesture_wlasl_deeper_encinit_long_labels.json'

mil = tf.keras.models.load_model(MIL_MODEL)
deeper = tf.keras.models.load_model(DEEPER_MODEL)
with open(MIL_LABELS) as f:
    mil_labels = json.load(f)
with open(DEEPER_LABELS) as f:
    deeper_labels = json.load(f)

def windows_for_clip(csv_path, clip_id, window=16, stride=4):
    windows = []
    for item in generate_windows_from_csv(csv_path, window_size=window, stride=stride, pad=True):
        if item['clip_id'] == clip_id:
            windows.append(item['window'])
    return np.stack(windows, axis=0) if windows else np.zeros((0,window,0))

# Example: pick one clip from dataset CSV
CSV = 'Dataset/Generated_Data/wlasl_pipeline_frames.csv'
# get a sample clip id
for item in generate_windows_from_csv(CSV, window_size=16, stride=4, pad=True):
    clip = item['clip_id']
    break
windows = windows_for_clip(CSV, clip)
if windows.size == 0:
    raise SystemExit('No windows for clip')

# sample/pad to bag_size
BAG_SIZE = 32
if windows.shape[0] >= BAG_SIZE:
    sel = np.random.choice(windows.shape[0], BAG_SIZE, replace=False)
    bag = windows[sel]
else:
    pad = np.repeat(windows[-1:], BAG_SIZE - windows.shape[0], axis=0)
    bag = np.concatenate([windows, pad], axis=0)

# deeper per-window prediction (average)
deeper_probs = deeper.predict(bag, verbose=0)
deeper_clip_prob = deeper_probs.mean(axis=0)
# map deeper to mil index space
mil_from_deeper = np.zeros(len(mil_labels))
for name, idx in deeper_labels.items():
    if name in mil_labels:
        mil_from_deeper[int(mil_labels[name])] = deeper_clip_prob[int(idx)]

mil_input = np.expand_dims(bag, axis=0)
mil_probs = mil.predict(mil_input, verbose=0)[0]
avg_prob = (mil_from_deeper + mil_probs) / 2.0
pred_idx = int(np.argmax(avg_prob))
pred_name = [k for k,v in mil_labels.items() if int(v)==pred_idx][0]
print('Predicted label:', pred_name)