In [3]:
# --- CELL 1: SETUP ---
!pip install tensorflow h5py pandas
import os, cv2, numpy as np, pandas as pd, h5py, tensorflow as tf, glob
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
print("Setup Complete.")

Setup Complete.


In [10]:
# =================================================================
#      FINAL Cell: Resumable Frame Conversion with Debugging
# =================================================================
import os
import cv2

VIDEO_ROOT = '/kaggle/input/lsa64-cut-version/all'
FRAMES_ROOT = '/kaggle/working/lsa64_frames'
IMG_SIZE = 299

if not os.path.exists(FRAMES_ROOT): os.makedirs(FRAMES_ROOT)

print(f"Starting RESUMABLE video to frames conversion (Resizing to {IMG_SIZE}x{IMG_SIZE})...")

# Get a sorted list of all video files to process.
video_files_to_process = sorted(os.listdir(VIDEO_ROOT))
total_videos = len(video_files_to_process)

# Iterate through the list with an index for progress tracking.
for i, video_file in enumerate(video_files_to_process):
    video_path = os.path.join(VIDEO_ROOT, video_file)
    
    # --- Define the final output folder for this video's frames ---
    sign_class_folder_name = video_file[:7]
    video_name = video_file.split('.')[0]
    output_video_folder = os.path.join(FRAMES_ROOT, sign_class_folder_name, video_name)
    
    # --- THIS IS THE KEY OPTIMIZATION ---
    # Check if the output folder already exists and is not empty.
    # If it is, we have already processed this video, so we can skip it.
    if os.path.exists(output_video_folder) and len(os.listdir(output_video_folder)) > 0:
        print(f"({i+1}/{total_videos}) SKIPPING: {video_file} - Already processed.")
        continue # Go to the next video in the loop
        
    # If we are here, it means we need to process this video.
    # Our debugging print statement:
    print(f"({i+1}/{total_videos}) PROCESSING: {video_file}")

    # Create the output directories if they don't exist
    if not os.path.exists(output_video_folder):
        os.makedirs(output_video_folder)
            
    try:
        cap = cv2.VideoCapture(video_path)
        frame_count = 0
        while cap.isOpened():
            success, frame = cap.read()
            if not success: break
            
            resized_frame = cv2.resize(frame, (IMG_SIZE, IMG_SIZE))
            cv2.imwrite(os.path.join(output_video_folder, f"frame_{frame_count:04d}.jpg"), resized_frame)
            frame_count += 1
        cap.release()
    except Exception as e:
        # If any error occurs during processing (e.g., corrupted file),
        # print the error and the problematic filename, then continue.
        print(f"!!!!!!!! FAILED to process {video_file}. Error: {e}")
        continue # Skip to the next video
    
print("Frame conversion complete.")

Starting RESUMABLE video to frames conversion (Resizing to 299x299)...
(1/3200) SKIPPING: 001_001_001.mp4 - Already processed.
(2/3200) SKIPPING: 001_001_002.mp4 - Already processed.
(3/3200) SKIPPING: 001_001_003.mp4 - Already processed.
(4/3200) SKIPPING: 001_001_004.mp4 - Already processed.
(5/3200) SKIPPING: 001_001_005.mp4 - Already processed.
(6/3200) SKIPPING: 001_002_001.mp4 - Already processed.
(7/3200) SKIPPING: 001_002_002.mp4 - Already processed.
(8/3200) SKIPPING: 001_002_003.mp4 - Already processed.
(9/3200) SKIPPING: 001_002_004.mp4 - Already processed.
(10/3200) SKIPPING: 001_002_005.mp4 - Already processed.
(11/3200) SKIPPING: 001_003_001.mp4 - Already processed.
(12/3200) SKIPPING: 001_003_002.mp4 - Already processed.
(13/3200) SKIPPING: 001_003_003.mp4 - Already processed.
(14/3200) SKIPPING: 001_003_004.mp4 - Already processed.
(15/3200) SKIPPING: 001_003_005.mp4 - Already processed.
(16/3200) SKIPPING: 001_004_001.mp4 - Already processed.
(17/3200) SKIPPING: 001_00

In [14]:
# --- CELL 3: CREATE CSV INDEX ---
FRAMES_ROOT = '/kaggle/working/lsa64_frames'
OUTPUT_CSV = '/kaggle/working/lsa64_index.csv'
print("Creating CSV index file...")
data_list = []
for sign_class_folder in sorted(os.listdir(FRAMES_ROOT)):
    signer_id = sign_class_folder.split('_')[0]
    for video_folder in sorted(os.listdir(os.path.join(FRAMES_ROOT, sign_class_folder))):
        video_folder_path = os.path.join(FRAMES_ROOT, sign_class_folder, video_folder)
        data_list.append({
            'Sign/Class': sign_class_folder, 'fullPath': video_folder_path,
            'framesN': len(os.listdir(video_folder_path)), 'SignerID': signer_id
        })
df = pd.DataFrame(data_list)
df.to_csv(OUTPUT_CSV, index=False)
print(f"CSV index created at {OUTPUT_CSV}")
df.head()

Creating CSV index file...
CSV index created at /kaggle/working/lsa64_index.csv


Unnamed: 0,Sign/Class,fullPath,framesN,SignerID
0,001_001,/kaggle/working/lsa64_frames/001_001/001_001_001,88,1
1,001_001,/kaggle/working/lsa64_frames/001_001/001_001_002,148,1
2,001_001,/kaggle/working/lsa64_frames/001_001/001_001_003,118,1
3,001_001,/kaggle/working/lsa64_frames/001_001/001_001_004,118,1
4,001_001,/kaggle/working/lsa64_frames/001_001/001_001_005,88,1


In [21]:
# --- 1. Define Configuration ---
diVideoSet = {"nFramesNorm": 18}
diFeature = {
    "sName": "Xception",
    "tuInputShape": (299, 299, 3),
    "tuOutputShape": (2048,)
}
sFrameBaseDir = "/kaggle/working/lsa64_frames/"
sFeatureBaseDir = "/kaggle/working/lsa64_features_xception/"

# --- 2. Load the CNN Feature Extractor Model ---
def load_feature_extractor(diFeature_config):
    base_model = Xception(weights='imagenet', include_top=False, input_shape=diFeature_config["tuInputShape"])
    base_model.trainable = False
    cnn_out = GlobalAveragePooling2D()(base_model.output)
    model = Model(inputs=base_model.input, outputs=cnn_out)
    return model

feature_extractor_model = load_feature_extractor(diFeature)

# --- 3. Run the Feature Extraction Process ---
# This call will now work because features_2D_predict_generator is defined above.
features_2D_predict_generator(
    sFrameBaseDir,
    sFeatureBaseDir,
    feature_extractor_model,
    nFramesNorm=diVideoSet["nFramesNorm"]
)

print("\n--- STAGE 1 COMPLETE: All features extracted and saved. ---")

DataGenerator: Detected 3200 video samples in /kaggle/working/lsa64_frames/
Predicting features with functional_2...


I0000 00:00:1759089216.174608   37306 service.cc:148] XLA service 0x7cbe24008d50 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1759089216.177417   37306 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1759089216.177436   37306 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1759089216.823110   37306 cuda_dnn.cc:529] Loaded cuDNN version 90300
2025-09-28 19:53:42.278266: E external/local_xla/xla/service/slow_operation_alarm.cc:65] Trying algorithm eng3{k11=0} for conv (f32[18,128,147,147]{3,2,1,0}, u8[0]{0}) custom-call(f32[18,128,147,147]{3,2,1,0}, f32[128,128,1,1]{3,2,1,0}), window={size=1x1}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convForward", backend_config={"cudnn_conv_backend_config":{"activation_mode":"kNone","conv_result_scale":1,"leakyrelu_alpha":0,"side_input_scale":0},"force_earliest_schedule":false,"operation_queue_id":"0

Video     0: Features (18, 2048) saved to /kaggle/working/lsa64_features_xception/001_001/001_001_001.npy
Video     1: Features (18, 2048) saved to /kaggle/working/lsa64_features_xception/001_001/001_001_002.npy
Video     2: Features (18, 2048) saved to /kaggle/working/lsa64_features_xception/001_001/001_001_003.npy
Video     3: Features (18, 2048) saved to /kaggle/working/lsa64_features_xception/001_001/001_001_004.npy
Video     4: Features (18, 2048) saved to /kaggle/working/lsa64_features_xception/001_001/001_001_005.npy
Video     5: Features (18, 2048) saved to /kaggle/working/lsa64_features_xception/001_002/001_002_001.npy
Video     6: Features (18, 2048) saved to /kaggle/working/lsa64_features_xception/001_002/001_002_002.npy
Video     7: Features (18, 2048) saved to /kaggle/working/lsa64_features_xception/001_002/001_002_003.npy
Video     8: Features (18, 2048) saved to /kaggle/working/lsa64_features_xception/001_002/001_002_004.npy
Video     9: Features (18, 2048) saved to /kag

In [16]:
# ==============================================================================
#                 Cell 5: Port of model_lstm.py
#
# Description:
# This cell contains the functions from the original repository's model_lstm.py
# file. It acts as a "model factory," providing helper functions to build the
# different neural network architectures required for the project.
#
# I have made the following corrections:
#   1. Removed the 'from transformer_model import...' line, as the required
#      file is not in the repository and this model is not used.
#   2. Cleaned up the Keras/TensorFlow imports to use modern, standard paths.
# ==============================================================================

# --- Standard Imports ---
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, LSTM, Dense, Dropout, Bidirectional, TimeDistributed,
    BatchNormalization, concatenate, Flatten, Conv1D
)
from tensorflow.keras.applications import MobileNet, InceptionV3
from tensorflow.keras.applications.mobilenet import preprocess_input

def lstm_build_multi_single(nFramesNorm:int, nFeatureLength_01:int, nClasses:int):
    """
    Builds the final SRN (Sign Recognition Network) model.

    This is a two-stream model that fuses features from the DMN and AMN.
    - Stream 1 (DMN): An LSTM network processes a sequence of features.
    - Stream 2 (AMN): A CNN (MobileNet) processes a single AVM image.
    The features from both streams are then concatenated and classified.
    
    Args:
        nFramesNorm (int): The number of frames in the input sequence (e.g., 16).
        nFeatureLength_01 (int): The length of the feature vector for each frame.
        nClasses (int): The total number of sign classes to predict.

    Returns:
        keras.Model: The compiled Keras model for the SRN.
    """
    print("Building the SRN (fused DMN+AMN) model...")

    ## --- DMN Stream (LSTM for sequential features) ---
    input_frames = Input(shape=(nFramesNorm, nFeatureLength_01), name='input_frames')
    x1 = LSTM(2048, return_sequences=True, dropout=0.7)(input_frames)    
    x1 = LSTM(2048, return_sequences=False, dropout=0.7)(x1) # return_sequences=False for the last LSTM
    x1 = Dense(1024, activation='relu')(x1)
    x1 = Dropout(0.6)(x1)

    ## --- AMN Stream (CNN for the AVM image) ---
    img_size = 224
    input_img = Input(shape=(img_size, img_size, 3), name='input_img')
    
    # We create a new MobileNet instance for the AMN stream.
    # We pass the input_img tensor to it.
    base_cnn = MobileNet(weights="imagenet", include_top=False, input_tensor=input_img)
    x2 = base_cnn.output # Get the output of the base CNN
    x2 = keras.layers.GlobalAveragePooling2D()(x2)
    x2 = Dropout(0.6)(x2)

    ## --- Fusion Stage ---
    # Concatenate the features from both streams side-by-side.
    x = concatenate([x1, x2])
    x = BatchNormalization()(x) # Normalize the combined features.
    
    # The original author reshapes and uses a 1D Convolution.
    # This can help find patterns in the combined feature vector.
    x = tf.expand_dims(x, axis=-1) # Add a dimension for Conv1D
    x = Conv1D(256, kernel_size=7, activation='relu')(x)
    x = Dropout(0.6)(x)
    x = Flatten()(x)

    # Final classification layer.
    fc = Dense(nClasses,  activation="softmax")(x)
    
    # Create the final Keras Model, defining its inputs and outputs.
    keModel = Model(inputs=[input_frames, input_img], outputs=fc)
    
    print("================SRN MODEL SUMMARY================")
    keModel.summary()
    print("=================================================")

    return keModel


def pretrainedModel(img_size, modelName, nClasses, retrainModel=False):
    """
    Builds the CNN feature extractor model (used by DMN).

    This function loads a pre-trained model (like MobileNet) and prepares it
    to be used as a feature extractor. It removes the final classification
    layer ('include_top=False') and adds a pooling layer.

    Args:
        img_size (int): The input image size (e.g., 224).
        modelName (str): The name of the model to load ('mobileNet' or 'InceptionV3').
        nClasses (int): Number of classes (not directly used here but good practice).
        retrainModel (bool): If True, some layers of the pre-trained model will be unfrozen.

    Returns:
        keras.Model: The CNN feature extractor model.
    """
    print(f"Building the '{modelName}' feature extractor...")
    
    input_tensor = Input(shape=(img_size, img_size, 3))
    
    # Preprocess the input according to the specific model's requirements.
    # This is a crucial step.
    x = preprocess_input(input_tensor)
    
    if modelName == 'mobileNet':
        base_model = MobileNet(weights="imagenet", include_top=False, input_tensor=x)
    elif modelName == 'InceptionV3':
        base_model = InceptionV3(weights="imagenet", include_top=False, input_tensor=x)
    else:
        raise ValueError(f"Unknown model name: {modelName}")
        
    if retrainModel:
        # This allows for fine-tuning the pre-trained model.
        for layer in base_model.layers[:-4]:
            layer.trainable = True

    # Get the output of the base model and add pooling and dropout layers.
    cnn_out = keras.layers.GlobalAveragePooling2D()(base_model.output)
    cnn_out = Dropout(0.6)(cnn_out)
    
    # Create the final model. The input is the original input_tensor.
    model = Model(inputs=input_tensor, outputs=cnn_out)
    
    return model


print("Helper script 'model_lstm.py' loaded.")

Helper script 'model_lstm.py' loaded.


In [17]:
# ==============================================================================
#                 Cell 5 (Continued): Port of feature.py
#
# Description:
# This cell contains the functions from the original repository's feature.py
# file. Its purpose is to perform offline feature extraction. This involves
# loading a pre-trained CNN, passing all the frame images through it, and
# saving the resulting feature vectors to disk as .npy files.
#
# I have made the following corrections:
#   1. Heavily simplified the features_2D_load_model function. Instead of the
#      complex and buggy original code, we can just call the much cleaner
#      pretrainedModel() function we already defined in this notebook.
#   2. Fixed all Keras imports to use the standard 'tensorflow.keras' path.
#   3. Removed unused functions to reduce clutter.
# ==============================================================================

# --- Imports specific to this section ---
from tensorflow.keras.applications import mobilenet
from tensorflow.keras.models import Sequential

def features_2D_load_model(diFeature: dict) -> keras.Model:
    """
    Loads or builds the 2D feature extraction model (the CNN).

    This function has been simplified to call our existing `pretrainedModel`
    helper function, which is cleaner and more reliable than the original code.

    Args:
        diFeature (dict): A dictionary containing model parameters.
                          Expected keys: 'sName', 'tuInputShape', 'tuOutputShape'.

    Returns:
        keras.Model: The CNN feature extractor model.
    """
    sModelName = diFeature["sName"]
    print("Load 2D feature extraction model %s ..." % sModelName)

    # Use our reliable helper function from model_lstm.py to build the CNN
    # We are ignoring the nClasses argument as it's not needed for the feature extractor.
    keModel = pretrainedModel(img_size=diFeature["tuInputShape"][0], modelName=sModelName, nClasses=0)
    
    # --- Verification Step ---
    # It's good practice to check if the created model has the expected shapes.
    tuInputShape = keModel.input_shape[1:]
    tuOutputShape = keModel.output_shape[1:]
    print("Model Input Shape: %s, Output Shape: %s" % (str(tuInputShape), str(tuOutputShape)))

    if tuInputShape != diFeature["tuInputShape"]:
        raise ValueError("Unexpected input shape for the feature extractor.")
    if tuOutputShape != diFeature["tuOutputShape"]:
        raise ValueError("Unexpected output shape for the feature extractor.")

    return keModel


def features_2D_predict_generator(sFrameBaseDir: str, sFeatureBaseDir: str, keModel: keras.Model,
                                  nFramesNorm: int = 40):
    """
    Extracts features from all frame images and saves them to disk.

    This function uses a DataGenerator to load the frame images in batches,
    passes them through the provided CNN model (keModel) to get the feature
    vectors, and then saves these vectors as individual .npy files.

    Args:
        sFrameBaseDir (str): Path to the root directory containing the frame images.
        sFeatureBaseDir (str): Path to the directory where the extracted features will be saved.
        keModel (keras.Model): The CNN feature extractor model.
        nFramesNorm (int): The number of frames to normalize each sequence to (not used for prediction).
    """
    # Check if the target directory already exists to avoid re-calculating.
    if os.path.exists(sFeatureBaseDir): 
        print(f"\nFeature folder {sFeatureBaseDir} already exists. Skipping feature extraction.") 
        return

    # Get the required image dimensions from the model itself.
    _, h, w, c = keModel.input_shape
    
    # The DataGenerator is the key component for loading the images.
    # We set bShuffle=False because the order of frames matters.
    # We set batch_size=1 to process one video's frames at a time.
    genFrames = FramesGenerator(sFrameBaseDir, 1, nFramesNorm, h, w, c, 
                                liClassesFull=None, bShuffle=False)

    print("Predicting features with %s..." % keModel.name)
    nCount = 0
    # Loop through all video samples provided by the generator.
    for _, seVideo in genFrames.dfVideos.iterrows():
        
        sVideoName = seVideo.sFrameDir.split("/")[-1]
        sLabel = seVideo.sLabel
        sFeaturePath = os.path.join(sFeatureBaseDir, sLabel, sVideoName + ".npy")

        # Check if features for this specific video have already been extracted.
        if os.path.exists(sFeaturePath):
            print(f"Video {nCount:5d}: Features already exist at {sFeaturePath}")
            nCount += 1
            continue

        # Get the batch of frames for the current video.
        arX, _ = genFrames.data_generation(seVideo)
        
        # This is the core feature extraction step.
        # We pass the batch of images to the CNN and get the feature vectors back.
        arFeature = keModel.predict(arX, verbose=0)

        # Save the resulting feature array to a .npy file.
        os.makedirs(os.path.join(sFeatureBaseDir, sLabel), exist_ok=True)
        np.save(sFeaturePath, arFeature)

        print(f"Video {nCount:5d}: Features {str(arFeature.shape)} saved to {sFeaturePath}")
        nCount += 1

    print(f"{nCount} feature sets saved in {sFeatureBaseDir}")
    return


print("Helper script 'feature.py' loaded.")

Helper script 'feature.py' loaded.


In [18]:
# ==============================================================================
#      Cell 5 (Continued): Port of frame.py and datagenerator.py
#
# Description:
# This cell contains the code from TWO helper files: frame.py and
# datagenerator.py. They are placed together because the datagenerator
# depends heavily on the functions defined in frame.py.
#
# I have made the following corrections:
#   1. Corrected all TensorFlow/Keras import paths.
#   2. In frame.py, modified files2frames to read '.jpg' files to match our
#      preprocessing output.
#   3. In datagenerator.py, simplified to only include the FramesGenerator class,
#      which is the only one needed for the feature extraction step.
#   4. Ensured the label extraction logic works with our Kaggle file paths.
# ==============================================================================
import glob

# --- Imports for this section ---
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# ------------------------------------------------------------------------------
#               CONTENT OF frame.py (Corrected)
# ------------------------------------------------------------------------------

def files2frames(sPath:str) -> np.array:
    """
    Reads all image files from a directory and loads them into a NumPy array.
    """
    # Use glob to find all .jpg files in the directory. The '*' is a wildcard.
    # We sort the files to ensure they are in the correct temporal order.
    liFiles = sorted(glob.glob(os.path.join(sPath, "*.jpg")))
    if len(liFiles) == 0:
        # Check for .png as a fallback, just in case.
        liFiles = sorted(glob.glob(os.path.join(sPath, "*.png")))
        if len(liFiles) == 0:
            raise ValueError("No frames found in " + sPath)

    liFrames = []
    for sFramePath in liFiles:
        arFrame = cv2.imread(sFramePath)
        liFrames.append(arFrame)

    return np.array(liFrames)

def frames_downsample(arFrames:np.array, nFramesTarget:int) -> np.array:
    """
    Adjusts the number of frames in a sequence to a target number.
    This is crucial for creating fixed-size inputs for the LSTM.
    It uses linear interpolation to select frames.
    """
    # Get the number of frames in the input array.
    nFramesExisting = arFrames.shape[0]
    if nFramesExisting == nFramesTarget:
        return arFrames

    # Generate a list of indices to select from the original array.
    # For example, to get 10 frames from 20, it will select indices [0, 2, 4, ...].
    # To get 10 frames from 5, it will select indices [0, 0, 1, 1, 2, ...].
    indices = np.linspace(0, nFramesExisting - 1, nFramesTarget, dtype=int)
    
    # Select the frames at the calculated indices.
    arFramesDownsampled = arFrames[indices, ...]
    
    return arFramesDownsampled

def images_rescale(arFrames:np.array) -> np.array(float):
    """ Rescales pixel values from the 0-255 range to the -1.0 to 1.0 range. """
    return (arFrames.astype(np.float32) / 127.5) - 1.0

def images_normalize(arFrames:np.array, nFrames:int, nHeight:int, nWidth:int) -> np.array(float):
    """
    A wrapper function that applies a sequence of preprocessing steps:
    1. Downsamples the number of frames to a fixed length.
    2. Rescales pixel values to the [-1.0, 1.0] range.
    """
    # Note: The original repo had a cropping function here, but since we already
    # resized all frames to the target size (e.g., 224x224), cropping is not needed.
    arFrames = frames_downsample(arFrames, nFrames)
    arFrames = images_rescale(arFrames)
    return arFrames

# ------------------------------------------------------------------------------
#               CONTENT OF datagenerator.py (Corrected)
# ------------------------------------------------------------------------------

class FramesGenerator(tf.keras.utils.Sequence):
    """
    A Keras Sequence generator for loading and preprocessing video frames.
    This is used to feed data to the model during feature extraction.
    """
    def __init__(self, sPath:str, nBatchSize:int, nFrames:int, nHeight:int, nWidth:int, nChannels:int,
                 liClassesFull:list = None, bShuffle:bool = True):
        'Initialization'
        self.nBatchSize = nBatchSize
        self.nFrames = nFrames
        self.nHeight = nHeight
        self.nWidth = nWidth
        self.nChannels = nChannels
        self.tuXshape = (nFrames, nHeight, nWidth, nChannels)
        self.bShuffle = bShuffle
        		
        # Find all the individual video frame directories.
        self.dfVideos = pd.DataFrame(sorted(glob.glob(os.path.join(sPath, "*", "*"))), columns=["sFrameDir"])
        self.nSamples = len(self.dfVideos)
        if self.nSamples == 0:
            raise ValueError("Found no frame directories in " + sPath)
        print(f"DataGenerator: Detected {self.nSamples} video samples in {sPath}")

        # Extract the class label from the path (e.g., '/.../001_001/video_name')
        self.dfVideos["sLabel"] = self.dfVideos["sFrameDir"].apply(lambda p: os.path.basename(os.path.dirname(p)))
            
        # Get a sorted list of unique class labels found in the dataset.
        self.liClasses = sorted(self.dfVideos["sLabel"].unique())
        self.nClasses = len(self.liClasses)

        # Encode the string labels into numerical indices (e.g., '001_001' -> 0)
        trLabelEncoder = LabelEncoder()
        self.dfVideos["nLabel"] = trLabelEncoder.fit_transform(self.dfVideos["sLabel"])
        
        # Prepare the indices for the first epoch.
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.ceil(self.nSamples / self.nBatchSize))

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(self.nSamples)
        if self.bShuffle:
            np.random.shuffle(self.indexes)

    def __getitem__(self, nStep):
        'Generate one batch of data'
        # Get the indices for the current batch
        indexes = self.indexes[nStep*self.nBatchSize:(nStep+1)*self.nBatchSize]
        dfVideosBatch = self.dfVideos.iloc[indexes]

        # Initialize empty arrays for the batch data
        arX = np.empty((len(dfVideosBatch),) + self.tuXshape, dtype=np.float32)
        arY = np.empty((len(dfVideosBatch),), dtype=int)

        # Generate data for each sample in the batch
        for i, (_, seVideo) in enumerate(dfVideosBatch.iterrows()):
            arX[i,], arY[i] = self.data_generation(seVideo)
        
        # Convert the numerical labels to one-hot encoded vectors
        return arX, to_categorical(arY, num_classes=self.nClasses)

    def data_generation(self, seVideo:pd.Series):
        "Loads and preprocesses the frames for a single video."
        # Load all frame images from the directory.
        ar_nFrames = files2frames(seVideo.sFrameDir)
        
        # Ensure we only use the specified number of channels (typically 3 for RGB).
        ar_nFrames = ar_nFrames[..., 0:self.nChannels]
        
        # Apply normalization (downsampling and rescaling).
        ar_fFrames = images_normalize(ar_nFrames, self.nFrames, self.nHeight, self.nWidth)
        
        return ar_fFrames, seVideo.nLabel

print("Helper scripts 'frame.py' and 'datagenerator.py' loaded.")

Helper scripts 'frame.py' and 'datagenerator.py' loaded.


In [19]:
# ==============================================================================
#                 Cell 6: Stage 1 - Feature Extraction
#
# Description:
# This cell performs the offline feature extraction process. It loads a
# pre-trained MobileNetV2 model to act as a powerful feature extractor.
# It then iterates through every single frame image we created in Cell 3,
# passes them through the CNN, and saves the resulting feature vectors
# (as .npy files) to a new directory. This is a time-consuming but one-off
# process that dramatically speeds up the final LSTM training.
# ==============================================================================

print("--- STAGE 1: FEATURE EXTRACTION ---")

# --- 1. Define Configuration ---
# This dictionary defines the parameters for our video dataset.
diVideoSet = {
    "sName": "LSA64",
    "nFramesNorm": 40,    # We will normalize each video to have 40 frames.
    "nMinDim": 224,       # The image size our model expects.
    "tuShape": (224, 224)
}

# This dictionary defines the parameters for our CNN feature extractor.
diFeature = {
    "sName": "mobileNet", # We'll use MobileNet as described in the paper.
    "tuInputShape": (224, 224, 3),
    "tuOutputShape": (1024,) # MobileNetV2 with global pooling outputs a 1024-length vector.
}

# Define the input and output paths for this stage.
sFrameBaseDir = "/kaggle/working/lsa64_frames/"
sFeatureBaseDir = "/kaggle/working/lsa64_features/"

# --- 2. Load the CNN Feature Extractor Model ---
# We use the helper function we defined in Cell 5.
feature_extractor_model = features_2D_load_model(diFeature)

# --- 3. Run the Feature Extraction Process ---
# This function will loop through all the frames in sFrameBaseDir,
# generate feature vectors using the model, and save them to sFeatureBaseDir.
features_2D_predict_generator(
    sFrameBaseDir,
    sFeatureBaseDir,
    feature_extractor_model,
    nFramesNorm=diVideoSet["nFramesNorm"]
)

print("\n--- STAGE 1 COMPLETE: All features extracted and saved. ---")

--- STAGE 1: FEATURE EXTRACTION ---
Load 2D feature extraction model mobileNet ...
Building the 'mobileNet' feature extractor...


  base_model = MobileNet(weights="imagenet", include_top=False, input_tensor=x)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/mobilenet_1_0_224_tf_no_top.h5
[1m17225924/17225924[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Model Input Shape: (224, 224, 3), Output Shape: (1024,)

Feature folder /kaggle/working/lsa64_features/ already exists. Skipping feature extraction.

--- STAGE 1 COMPLETE: All features extracted and saved. ---


In [23]:
# ==============================================================================
#           FINAL CELL (Corrected): Stage 2 - DMN (GRU) Training
#
# Description:
# This is the final, corrected script for the DMN task. It now includes the
# definition for the required Data Generator class directly in this cell to
# resolve the NameError.
# ==============================================================================

print("--- STAGE 2 (Corrected): DMN (GRU) TRAINING ---")

# --- 1. Imports for this Stage ---
import pandas as pd
import numpy as np
import glob
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import tensorflow as tf
from tensorflow.keras.layers import GRU, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

# --- THE FIX IS HERE: Paste the Data Generator class definition at the top ---
class FeaturesGenerator_withSplitting(tf.keras.utils.Sequence):
    def __init__(self, sPath:str, nBatchSize:int, tuXshape,
                 liClassesFull:list = None, bShuffle:bool = True, diVideoSet=None, diFeature = None):
        self.nBatchSize = nBatchSize
        self.tuXshape = tuXshape
        self.diFeature = diFeature
        self.bShuffle = bShuffle
        self.diVideoSet = diVideoSet
        self.dfSamples = sPath.copy() # sPath is now a DataFrame
        self.nSamples = len(self.dfSamples)
        if self.nSamples == 0: raise ValueError("Found no feature files in DataFrame")

        seLabels = self.dfSamples.sPath.apply(lambda s: s.split("/")[-2])
        self.dfSamples["sLabel"] = seLabels
        self.liClasses = sorted(list(self.dfSamples.sLabel.unique()))
        if liClassesFull is not None:
            self.liClasses = sorted(liClassesFull)
        self.nClasses = len(self.liClasses)
        self.label_encoder = LabelEncoder().fit(self.liClasses)
        self.dfSamples["nLabel"] = self.label_encoder.transform(self.dfSamples.sLabel)
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(self.nSamples / self.nBatchSize))

    def on_epoch_end(self):
        self.indexes = np.arange(self.nSamples)
        if self.bShuffle:
            np.random.shuffle(self.indexes)

    def __getitem__(self, nStep):
        indexes = self.indexes[nStep*self.nBatchSize:(nStep+1)*self.nBatchSize]
        dfSamplesBatch = self.dfSamples.iloc[indexes]
        
        arX = np.empty((len(dfSamplesBatch),) + self.tuXshape, dtype=np.float32)
        arY = np.empty((len(dfSamplesBatch),), dtype=int)

        for i, row in enumerate(dfSamplesBatch.itertuples()):
            features = np.load(row.sPath)
            # The original generator uses this downsampling for features too.
            arX[i,] = frames_downsample(features, self.tuXshape[0])
            arY[i,] = row.nLabel
            
        return arX, to_categorical(arY, num_classes=self.nClasses)


# --- 2. Load and Prepare DataFrames ---
sFeatureDir = '/kaggle/working/lsa64_features_xception/'
print(f"Loading features from: {sFeatureDir}")
all_features_paths = sorted(glob.glob(os.path.join(sFeatureDir, "*", "*.npy")))
dfSamples = pd.DataFrame(all_features_paths, columns=["sPath"])
dfSamples["sLabel"] = dfSamples["sPath"].apply(lambda p: os.path.basename(os.path.dirname(p)))
num_classes = dfSamples['sLabel'].nunique()
all_classes_str = sorted(dfSamples['sLabel'].unique())
print(f"Found {len(dfSamples)} samples belonging to {num_classes} classes.")
train_df, val_df = train_test_split(dfSamples, test_size=0.20, random_state=42, stratify=dfSamples['sLabel'])
train_df.reset_index(drop=True, inplace=True)
val_df.reset_index(drop=True, inplace=True)
print(f"Training samples: {len(train_df)}, Validation samples: {len(val_df)}")

# --- 3. Define Configuration Dictionaries (Matching Stage 1) ---
diVideoSet = {"nFramesNorm": 18, "reshape_input": False}
diFeature = {"sName": "Xception", "tuOutputShape": (2048,)}
lstm_input_shape = (diVideoSet["nFramesNorm"], diFeature["tuOutputShape"][0])

# --- 4. Create Data Generators ---
train_generator = FeaturesGenerator_withSplitting(train_df, 16, lstm_input_shape, all_classes_str, True, diVideoSet, diFeature)
val_generator = FeaturesGenerator_withSplitting(val_df, 16, lstm_input_shape, all_classes_str, False, diVideoSet, diFeature)

# --- 5. Use the Author's Model Architecture ---
def build_original_dmn_model(nFramesNorm, nFeatureLength, nClasses, fDropout=0.7):
    model = tf.keras.models.Sequential()
    model.add(GRU(2048, return_sequences=True,
                  input_shape=(nFramesNorm, nFeatureLength),
                  dropout=fDropout))
    model.add(GRU(2048, return_sequences=False, dropout=fDropout))
    model.add(Dense(1024, activation='relu'))
    model.add(Dropout(0.6))
    model.add(Dense(nClasses, activation='softmax'))
    return model

model = build_original_dmn_model(lstm_input_shape[0], lstm_input_shape[1], num_classes)

# --- 6. Compile and Train the Model ---
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model.summary()
checkpoint = ModelCheckpoint('/kaggle/working/dmn_model.h5', monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
early_stopping = EarlyStopping(monitor='val_loss', patience=15, verbose=1, restore_best_weights=True)

print("\nStarting model training with the corrected pipeline...")
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=100,
    callbacks=[checkpoint, early_stopping]
)

print("\n--- STAGE 2 COMPLETE: DMN model training finished. ---")
print("Best model saved to /kaggle/working/dmn_model.h5")

--- STAGE 2 (Corrected): DMN (GRU) TRAINING ---
Loading features from: /kaggle/working/lsa64_features_xception/
Found 3200 samples belonging to 640 classes.
Training samples: 2560, Validation samples: 640


  super().__init__(**kwargs)



Starting model training with the corrected pipeline...
Epoch 1/100


  self._warn_if_super_not_called()


[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step - accuracy: 5.2730e-04 - loss: 6.5876
Epoch 1: val_accuracy improved from -inf to 0.00156, saving model to /kaggle/working/dmn_model.h5
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 120ms/step - accuracy: 5.2888e-04 - loss: 6.5876 - val_accuracy: 0.0016 - val_loss: 6.4552
Epoch 2/100
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step - accuracy: 0.0019 - loss: 6.5069
Epoch 2: val_accuracy improved from 0.00156 to 0.00469, saving model to /kaggle/working/dmn_model.h5
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 109ms/step - accuracy: 0.0019 - loss: 6.5069 - val_accuracy: 0.0047 - val_loss: 6.3475
Epoch 3/100
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step - accuracy: 0.0065 - loss: 6.4097
Epoch 3: val_accuracy improved from 0.00469 to 0.03438, saving model to /kaggle/working/dmn_model.h5
[1m160/160[0m [32m━━━━━━━━━

In [27]:
# ==============================================================================
#           AMN PREPROCESSING (Corrected): Generate AVM Image Dataset
#
# Description:
# This corrected script uses df.iterrows() to reliably access columns with
# special characters in their names, fixing the previous errors.
# ==============================================================================

# --- THE FIX IS HERE: Paste the generate_avm function definition at the top ---
def generate_avm(key_frames: list[np.ndarray]):
    """
    Generates Accumulative Video Motion (AVM) images from a list of key frames.
    """
    if not key_frames:
        # Return three Nones to match the expected output format
        return None, None, None
        
    frames_array = np.array(key_frames, dtype=np.float32)
    summed_image = np.sum(frames_array, axis=0)
    normalized_image = cv2.normalize(summed_image, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)
    
    # --- THIS IS THE CORRECTED PART ---
    # We must return three values to match how the function is called.
    # Since summation is commutative, they are all the same image.
    forward_avm = normalized_image
    backward_avm = normalized_image
    bidirectional_avm = normalized_image
    
    return forward_avm, backward_avm, bidirectional_avm

print("--- AMN PREPROCESSING: Starting AVM Image Generation ---")

# --- 1. Configuration ---
FRAMES_ROOT = '/kaggle/working/lsa64_frames'
AVM_ROOT = '/kaggle/working/lsa64_avm_images'
CSV_PATH = '/kaggle/working/lsa64_index.csv'

if not os.path.exists(AVM_ROOT):
    os.makedirs(AVM_ROOT)

df = pd.read_csv(CSV_PATH)
total_videos = len(df)
print(f"Found {total_videos} videos to process.")

# --- 2. The Generation Loop (Using iterrows for robustness) ---
for index, row in df.iterrows():
    frame_folder_path = row['fullPath']
    # This now correctly accesses the column and gets the string value
    sign_class = row['Sign/Class'] 
    
    # Define the output path for the AVM image
    output_class_folder = os.path.join(AVM_ROOT, sign_class)
    if not os.path.exists(output_class_folder):
        os.makedirs(output_class_folder)
        
    video_name = os.path.basename(frame_folder_path)
    output_avm_path = os.path.join(output_class_folder, f"{video_name}.jpg")

    if os.path.exists(output_avm_path):
        continue

    try:
        frames = files2frames(frame_folder_path)
    except ValueError as e:
        print(f"Warning: Could not load frames for {frame_folder_path}. Skipping. Error: {e}")
        continue
    
    if len(frames) > 0:
        _, _, bidirectional_avm = generate_avm(list(frames))
        
        if bidirectional_avm is not None:
            cv2.imwrite(output_avm_path, bidirectional_avm)
            if (index + 1) % 100 == 0:
                print(f"({index + 1}/{total_videos}) Saved AVM for {video_name}")

print("\n--- AMN PREPROCESSING COMPLETE ---")
print(f"All AVM images saved to: {AVM_ROOT}")

--- AMN PREPROCESSING: Starting AVM Image Generation ---
Found 3200 videos to process.
(100/3200) Saved AVM for 002_010_005
(200/3200) Saved AVM for 004_010_005
(300/3200) Saved AVM for 006_010_005
(400/3200) Saved AVM for 008_010_005
(500/3200) Saved AVM for 010_010_005
(600/3200) Saved AVM for 012_010_005
(700/3200) Saved AVM for 014_010_005
(800/3200) Saved AVM for 016_010_005
(900/3200) Saved AVM for 018_010_005
(1400/3200) Saved AVM for 028_010_005
(1500/3200) Saved AVM for 030_010_005
(1600/3200) Saved AVM for 032_010_005
(1700/3200) Saved AVM for 034_010_005
(1800/3200) Saved AVM for 036_010_005
(1900/3200) Saved AVM for 038_010_005
(2000/3200) Saved AVM for 040_010_005
(2100/3200) Saved AVM for 042_010_005
(2200/3200) Saved AVM for 044_010_005
(2300/3200) Saved AVM for 046_010_005
(2400/3200) Saved AVM for 048_010_005
(2500/3200) Saved AVM for 050_010_005
(2600/3200) Saved AVM for 052_010_005
(2700/3200) Saved AVM for 054_010_005
(2800/3200) Saved AVM for 056_010_005
(2900/3200

In [28]:
# ==============================================================================
#                 FINAL CELL: AMN Model Training
#
# Description:
# This script adapts the original AMN.py to train a CNN on our generated
# AVM images. It uses Keras's ImageDataGenerator to load the AVM images
# on-the-fly, builds a MobileNetV2-based classifier, and trains it.
# ==============================================================================

print("--- AMN TRAINING SCRIPT (Adapted from AMN.py) ---")

# --- 1. Imports ---
import pandas as pd
import numpy as np
import glob
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
import tensorflow as tf

# --- 2. Build the AMN Model Architecture ---
# This is a simplified and corrected version of the `pretrainedModel` function.
def amn_build(nClasses):
    """Builds the AMN model using MobileNetV2 as a base."""
    base_model = MobileNetV2(
        weights='imagenet',
        include_top=False, # We don't need the original top layer
        input_shape=(224, 224, 3)
    )
    # Freeze the pre-trained layers so we only train our new classifier.
    base_model.trainable = False
    
    # Add our custom classification head
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.5)(x) # Add dropout for regularization
    x = Dense(1024, activation='relu')(x)
    predictions = Dense(nClasses, activation='softmax')(x)
    
    model = Model(inputs=base_model.input, outputs=predictions)
    return model

# --- 3. Load and Prepare DataFrames ---
AVM_ROOT = '/kaggle/working/lsa64_avm_images/'
print(f"Loading AVM images from: {AVM_ROOT}")

# Find all the .jpg AVM image files.
all_avm_paths = sorted(glob.glob(os.path.join(AVM_ROOT, "*", "*.jpg")))
dfSamples = pd.DataFrame(all_avm_paths, columns=["fullPath"]) # Changed column name
dfSamples["sLabel"] = dfSamples["fullPath"].apply(lambda p: os.path.basename(os.path.dirname(p)))
num_classes = dfSamples['sLabel'].nunique()
print(f"Found {len(dfSamples)} AVM samples belonging to {num_classes} classes.")

# Split data into training (80%) and validation (20%) sets.
train_df, val_df = train_test_split(dfSamples, test_size=0.20, random_state=42, stratify=dfSamples['sLabel'])
print(f"Training samples: {len(train_df)}, Validation samples: {len(val_df)}")

# --- 4. Set up Keras Image Data Generators ---
IMG_SIZE = 224
BATCH_SIZE = 32 # A batch size of 32 is standard for image models

# Create a generator for training data with augmentation
train_datagen = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input # Crucial preprocessing step
)

# Create a generator for validation data (NO augmentation, only preprocessing)
val_datagen = ImageDataGenerator(preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input)

# Create the generator objects from our DataFrames
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='fullPath',
    y_col='sLabel',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

validation_generator = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    x_col='fullPath',
    y_col='sLabel',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

# --- 5. Build, Compile, and Train the AMN Model ---
model = amn_build(num_classes)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3) # A slightly higher LR can work for CNNs
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model.summary()

# Define callbacks
checkpoint = ModelCheckpoint('/kaggle/working/amn_model.h5', monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, restore_best_weights=True)

print("\nStarting AMN model training...")
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=50,
    callbacks=[checkpoint, early_stopping]
)

print("\n--- AMN TRAINING COMPLETE ---")
print("Best model saved to /kaggle/working/amn_model.h5")

--- AMN TRAINING SCRIPT (Adapted from AMN.py) ---
Loading AVM images from: /kaggle/working/lsa64_avm_images/
Found 3200 AVM samples belonging to 640 classes.
Training samples: 2560, Validation samples: 640
Found 2560 validated image filenames belonging to 640 classes.
Found 640 validated image filenames belonging to 640 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step



Starting AMN model training...


  self._warn_if_super_not_called()


Epoch 1/50
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 303ms/step - accuracy: 0.0022 - loss: 6.7844
Epoch 1: val_accuracy improved from -inf to 0.02344, saving model to /kaggle/working/amn_model.h5
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 372ms/step - accuracy: 0.0022 - loss: 6.7819 - val_accuracy: 0.0234 - val_loss: 6.1609
Epoch 2/50
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 313ms/step - accuracy: 0.0200 - loss: 5.8266
Epoch 2: val_accuracy improved from 0.02344 to 0.21719, saving model to /kaggle/working/amn_model.h5
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 331ms/step - accuracy: 0.0202 - loss: 5.8225 - val_accuracy: 0.2172 - val_loss: 4.0852
Epoch 3/50
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 320ms/step - accuracy: 0.1774 - loss: 3.9386
Epoch 3: val_accuracy improved from 0.21719 to 0.40625, saving model to /kaggle/working/amn_model.h5
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━

In [4]:
# ==============================================================================
#                 MASTER HELPER CODE CELL
#
# Description:
# This cell contains all the corrected and necessary helper functions and
# classes from the entire original repository. By placing them all here,
# we ensure that all subsequent cells can access them without NameErrors.
# ==============================================================================

# --- 1. Imports ---
import os
import glob
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, LSTM, Dense, Dropout, TimeDistributed, BatchNormalization,
    concatenate, Flatten, Conv1D, GlobalAveragePooling2D
)
from tensorflow.keras.applications import MobileNetV2, Xception
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input as mobilenet_preprocess
from tensorflow.keras.applications.xception import preprocess_input as xception_preprocess

# --- 2. Code from frame.py ---
def files2frames(sPath:str) -> np.array:
    liFiles = sorted(glob.glob(os.path.join(sPath, "*.jpg")))
    if not liFiles: liFiles = sorted(glob.glob(os.path.join(sPath, "*.png")))
    if not liFiles: raise ValueError("No frames found in " + sPath)
    return np.array([cv2.imread(f) for f in liFiles])

def frames_downsample(arFrames:np.array, nFramesTarget:int) -> np.array:
    nFramesExisting = arFrames.shape[0]
    if nFramesExisting == nFramesTarget: return arFrames
    indices = np.linspace(0, nFramesExisting - 1, nFramesTarget, dtype=int)
    return arFrames[indices, ...]

# --- 3. Code from model_lstm.py ---
def srn_build(nFramesNorm, nFeatureLength, nClasses):
    print("Building the SRN (fused DMN+AMN) model...")
    # DMN Stream
    input_frames = Input(shape=(nFramesNorm, nFeatureLength), name='input_dmn_features')
    x1 = LSTM(2048, return_sequences=True, dropout=0.5)(input_frames)
    x1 = LSTM(2048, return_sequences=False, dropout=0.5)(x1)
    x1 = Dense(1024, activation='relu')(x1)
    x1 = Dropout(0.5)(x1)
    # AMN Stream
    img_size = 224
    input_img = Input(shape=(img_size, img_size, 3), name='input_amn_image')
    x2 = mobilenet_preprocess(input_img)
    base_cnn = MobileNetV2(weights="imagenet", include_top=False, input_shape=(img_size, img_size, 3))
    base_cnn.trainable = False
    x2 = base_cnn(x2, training=False)
    x2 = GlobalAveragePooling2D()(x2)
    # Fusion
    x = concatenate([x1, x2])
    x = BatchNormalization()(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    fc = Dense(nClasses, activation="softmax")(x)
    model = Model(inputs=[input_frames, input_img], outputs=fc)
    return model

# --- 4. Code from datagenerator.py (for SRN) ---
class FeaturesGenerator_multiInput(tf.keras.utils.Sequence):
    def __init__(self, df, avm_root_path, batch_size, dmn_shape, amn_shape, num_classes, class_list, shuffle=True):
        self.df = df.copy()
        self.avm_root_path = avm_root_path
        self.batch_size = batch_size
        self.dmn_shape = dmn_shape
        self.amn_shape = amn_shape
        self.num_classes = num_classes
        self.class_list = class_list
        self.shuffle = shuffle
        self.label_encoder = LabelEncoder().fit(self.class_list)
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.df) / self.batch_size))

    def on_epoch_end(self):
        if self.shuffle:
            self.df = self.df.sample(frac=1).reset_index(drop=True)

    def __getitem__(self, index):
        start_idx = index * self.batch_size
        end_idx = (index + 1) * self.batch_size
        batch_df = self.df.iloc[start_idx:end_idx]
        
        X_dmn = np.zeros((len(batch_df),) + self.dmn_shape, dtype=np.float32)
        X_amn = np.zeros((len(batch_df),) + self.amn_shape, dtype=np.float32)
        y = np.zeros((len(batch_df),), dtype=int)
        
        for i, row in enumerate(batch_df.itertuples()):
            # Load DMN features
            features = np.load(row.sPath)
            X_dmn[i,] = frames_downsample(features, self.dmn_shape[0])
            
            # Load corresponding AMN image
            video_name = os.path.basename(row.sPath).replace('.npy', '.jpg')
            sign_class = os.path.basename(os.path.dirname(row.sPath))
            avm_path = os.path.join(self.avm_root_path, sign_class, video_name)
            img = cv2.imread(avm_path)
            img = cv2.resize(img, (self.amn_shape[0], self.amn_shape[1]))
            X_amn[i,] = mobilenet_preprocess(img) # Preprocess for MobileNet
            
            y[i,] = self.label_encoder.transform([sign_class])[0]
            
        return (X_dmn, X_amn), to_categorical(y, num_classes=self.num_classes)

print("--- MASTER HELPER CELL LOADED ---")

--- MASTER HELPER CELL LOADED ---


In [5]:
# ==============================================================================
#                 FINAL CELL: SRN Model Training
#
# Description:
# This is the final training script. It loads the DMN features and the AMN
# images, feeds them into the two-stream SRN model, and trains the final
# classifier.
# ==============================================================================

print("--- SRN (FUSED MODEL) TRAINING SCRIPT ---")

# --- 1. Imports ---
import pandas as pd
import numpy as np
import glob
import os
# --- THE FIX IS HERE ---
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import tensorflow as tf

# --- 2. Load and Prepare DataFrames ---
DMN_FEATURES_ROOT = '/kaggle/working/lsa64_features_xception/'
AMN_IMAGES_ROOT = '/kaggle/working/lsa64_avm_images/'
BATCH_SIZE = 32
EPOCHS = 100

all_features_paths = sorted(glob.glob(os.path.join(DMN_FEATURES_ROOT, "*", "*.npy")))
dfSamples = pd.DataFrame(all_features_paths, columns=["sPath"])
dfSamples["sLabel"] = dfSamples["sPath"].apply(lambda p: os.path.basename(os.path.dirname(p)))
num_classes = dfSamples['sLabel'].nunique()
all_classes_str = sorted(dfSamples['sLabel'].unique())
print(f"Found {len(dfSamples)} samples for {num_classes} classes.")

train_df, val_df = train_test_split(dfSamples, test_size=0.20, random_state=42, stratify=dfSamples['sLabel'])
print(f"Training samples: {len(train_df)}, Validation samples: {len(val_df)}")

# --- 3. Define Model Input Shapes ---
DMN_INPUT_SHAPE = (18, 2048)
AMN_INPUT_SHAPE = (224, 224, 3)

# --- 4. Create Data Generators ---
train_generator = FeaturesGenerator_multiInput(train_df, AMN_IMAGES_ROOT, BATCH_SIZE, DMN_INPUT_SHAPE, AMN_INPUT_SHAPE, num_classes, all_classes_str)
val_generator = FeaturesGenerator_multiInput(val_df, AMN_IMAGES_ROOT, BATCH_SIZE, DMN_INPUT_SHAPE, AMN_INPUT_SHAPE, num_classes, all_classes_str, shuffle=False)

# --- 5. Build, Compile, and Train the SRN Model ---
model = srn_build(DMN_INPUT_SHAPE[0], DMN_INPUT_SHAPE[1], num_classes)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model.summary()

checkpoint = ModelCheckpoint('/kaggle/working/srn_model.h5', monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
early_stopping = EarlyStopping(monitor='val_loss', patience=20, verbose=1, restore_best_weights=True)

print("\nStarting SRN model training...")
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=EPOCHS,
    callbacks=[checkpoint, early_stopping]
)

print("\n--- SRN TRAINING COMPLETE ---")
print("Best fused model saved to /kaggle/working/srn_model.h5")

--- SRN (FUSED MODEL) TRAINING SCRIPT ---
Found 3200 samples for 640 classes.
Training samples: 2560, Validation samples: 640
Building the SRN (fused DMN+AMN) model...



Starting SRN model training...
Epoch 1/100


  self._warn_if_super_not_called()
I0000 00:00:1759124776.069253     101 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 165ms/step - accuracy: 9.1645e-04 - loss: 6.7612

  self._warn_if_super_not_called()



Epoch 1: val_accuracy improved from -inf to 0.00156, saving model to /kaggle/working/srn_model.h5
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 262ms/step - accuracy: 9.1961e-04 - loss: 6.7603 - val_accuracy: 0.0016 - val_loss: 6.4906
Epoch 2/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 168ms/step - accuracy: 0.0045 - loss: 6.4259
Epoch 2: val_accuracy did not improve from 0.00156
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 198ms/step - accuracy: 0.0046 - loss: 6.4249 - val_accuracy: 0.0016 - val_loss: 6.4211
Epoch 3/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 169ms/step - accuracy: 0.0159 - loss: 6.0764
Epoch 3: val_accuracy improved from 0.00156 to 0.00469, saving model to /kaggle/working/srn_model.h5
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 238ms/step - accuracy: 0.0159 - loss: 6.0761 - val_accuracy: 0.0047 - val_loss: 6.3104
Epoch 4/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━