In [None]:
!pip install tensorflow opencv-python mediapipe scikit-learn

Collecting mediapipe
  Downloading mediapipe-0.10.21-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
INFO: pip is looking at multiple versions of mediapipe to determine which version is compatible with other requirements. This could take a while.
  Downloading mediapipe-0.10.20-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
  Downloading mediapipe-0.10.18-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
  Downloading mediapipe-0.10.15-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
  Downloading mediapipe-0.10.14-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev,>=3.20.3 (from tensorflow)
  Downloading protobuf-4.25.8-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.2-py3-none-any.whl.metadata (1.6 kB)
Downloading mediapip

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import cv2
import numpy as np
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense
from tensorflow.keras.callbacks import TensorBoard
import tensorflow as tf
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import warnings

# Suppress Protobuf deprecation warning from MediaPipe
warnings.filterwarnings("ignore", category=UserWarning, module='google.protobuf.symbol_database')

# --- Part 1: Configuration & Setup ---

DATA_PATH = "/content/drive/MyDrive/samples"
actions = np.array(['obrigado', 'null'])
sequence_length = 100

pose_model_path = 'pose_landmarker_lite.task'
hand_model_path = 'hand_landmarker.task'
if not os.path.exists(pose_model_path) or not os.path.exists(hand_model_path):
    print("="*80)
    print("ERROR: Please download the required MediaPipe models and place them in this directory.")
    exit()

# **FIX: Switched to the more stable IMAGE mode for processing**
base_options = python.BaseOptions
PoseLandmarker = vision.PoseLandmarker
PoseLandmarkerOptions = vision.PoseLandmarkerOptions
HandLandmarker = vision.HandLandmarker
HandLandmarkerOptions = vision.HandLandmarkerOptions
VisionRunningMode = vision.RunningMode

pose_options = PoseLandmarkerOptions(
    base_options=base_options(model_asset_path=pose_model_path), running_mode=VisionRunningMode.IMAGE)
hand_options = HandLandmarkerOptions(
    base_options=base_options(model_asset_path=hand_model_path), running_mode=VisionRunningMode.IMAGE, num_hands=2)


# --- Part 2: Keypoint Extraction Function ---
def extract_keypoints(pose_result, hand_result):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in pose_result.pose_landmarks[0]]).flatten() if pose_result.pose_landmarks else np.zeros(33 * 4)
    lh, rh = np.zeros(21 * 3), np.zeros(21 * 3)
    if hand_result.hand_landmarks:
        for i, hand_landmarks in enumerate(hand_result.hand_landmarks):
            handedness = hand_result.handedness[i][0].category_name
            if handedness == "Left": lh = np.array([[res.x, res.y, res.z] for res in hand_landmarks]).flatten()
            elif handedness == "Right": rh = np.array([[res.x, res.y, res.z] for res in hand_landmarks]).flatten()
    return np.concatenate([pose, lh, rh])

# --- Part 3: Video Processing and Data Loading ---
def process_videos_and_load_data():
    print("Starting video processing and data loading...")
    label_map = {label: num for num, label in enumerate(actions)}
    sequences, labels = [], []

    with PoseLandmarker.create_from_options(pose_options) as pose_landmarker, \
         HandLandmarker.create_from_options(hand_options) as hand_landmarker:

        for action in actions:
            action_path = os.path.join(DATA_PATH, action)
            if not os.path.isdir(action_path):
                print(f"Warning: Directory not found for action '{action}': {action_path}")
                continue

            print(f"Processing videos for action: '{action}'")
            for video_file in os.listdir(action_path):
                if not video_file.lower().endswith('.mp4'):
                    continue

                video_path = os.path.join(action_path, video_file)
                cap = cv2.VideoCapture(video_path)

                frame_landmarks = []
                while True:
                    ret, frame = cap.read()
                    if not ret:
                        break

                    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

                    # **FIX: Using simple 'detect' since we are in IMAGE mode**
                    pose_result = pose_landmarker.detect(mp_image)
                    hand_result = hand_landmarker.detect(mp_image)

                    keypoints = extract_keypoints(pose_result, hand_result)
                    frame_landmarks.append(keypoints)

                cap.release()

                if len(frame_landmarks) > 0:
                    if len(frame_landmarks) >= sequence_length:
                        indices = np.linspace(0, len(frame_landmarks) - 1, sequence_length, dtype=int)
                        sampled_landmarks = [frame_landmarks[i] for i in indices]
                    else:
                        sampled_landmarks = frame_landmarks
                        padding = [frame_landmarks[-1]] * (sequence_length - len(frame_landmarks))
                        sampled_landmarks.extend(padding)

                    sequences.append(sampled_landmarks)
                    labels.append(label_map[action])

    return np.array(sequences), to_categorical(labels).astype(int)

# --- Part 4: Training the Model ---
def train_model():
    X, y = process_videos_and_load_data()

    if X.shape[0] == 0:
        print("Error: No data was loaded. Please check your DATA_PATH and video files.")
        return

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15)

    print(f"\nData successfully loaded and processed.")
    print(f"Training data shape: {X_train.shape}")
    print(f"Test data shape: {X_test.shape}")

    log_dir = os.path.join('Logs')
    tb_callback = TensorBoard(log_dir=log_dir)
    num_features = 258

    model = Sequential([
        GRU(64, return_sequences=True, input_shape=(sequence_length, num_features)),
        GRU(128, return_sequences=True),
        GRU(64, return_sequences=False),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(actions.shape[0], activation='softmax')
    ])

    model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
    model.summary()

    print("\nStarting model training...")
    model.fit(X_train, y_train, epochs=150, callbacks=[tb_callback], validation_data=(X_test, y_test))
    print("Model training complete.")

    model.save('asl_action_recognizer.h5')
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    tflite_model = converter.convert()
    with open('asl_model.tflite', 'wb') as f:
        f.write(tflite_model)
    print("Models saved successfully.")

# --- Main Execution ---
if __name__ == "__main__":
    train_model()
    print("\n✅ --- PROCESS COMPLETE --- ✅")

Starting video processing and data loading...
Processing videos for action: 'obrigado'
Processing videos for action: 'null'

Data successfully loaded and processed.
Training data shape: (334, 100, 258)
Test data shape: (60, 100, 258)


  super().__init__(**kwargs)



Starting model training...
Epoch 1/150
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 360ms/step - categorical_accuracy: 0.5662 - loss: 0.6977 - val_categorical_accuracy: 0.6000 - val_loss: 0.6628
Epoch 2/150
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 410ms/step - categorical_accuracy: 0.5752 - loss: 0.6725 - val_categorical_accuracy: 0.5667 - val_loss: 0.7106
Epoch 3/150
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 289ms/step - categorical_accuracy: 0.6538 - loss: 0.6227 - val_categorical_accuracy: 0.6833 - val_loss: 0.5753
Epoch 4/150
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 436ms/step - categorical_accuracy: 0.7019 - loss: 0.5734 - val_categorical_accuracy: 0.8000 - val_loss: 0.4861
Epoch 5/150
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 350ms/step - categorical_accuracy: 0.7959 - loss: 0.4473 - val_categorical_accuracy: 0.8667 - val_loss: 0.3405
Epoch 6/150
[1m11/11[0m [32m━━━━━



Model training complete.
Saved artifact at '/tmp/tmp99sa3o1f'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 100, 258), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 2), dtype=tf.float32, name=None)
Captures:
  137839947021008: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137839947019088: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137839947020240: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137839947021392: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137839947018896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137839947020048: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137839947021200: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137839947018128: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137839947019664: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137839947021776: TensorSpec(shape=(), dtype=tf.resource, name=

ConverterError: <unknown>:0: error: loc(callsite(callsite(fused["TensorListReserve:", "sequential_1/gru_1/TensorArrayV2_1@__inference_function_64307"] at fused["StatefulPartitionedCall:", "StatefulPartitionedCall@__inference_signature_wrapper_64378"]) at fused["StatefulPartitionedCall:", "StatefulPartitionedCall_1"])): 'tf.TensorListReserve' op requires element_shape to be static during TF Lite transformation pass
<unknown>:0: note: loc(fused["StatefulPartitionedCall:", "StatefulPartitionedCall_1"]): called from
<unknown>:0: error: loc(callsite(callsite(fused["TensorListReserve:", "sequential_1/gru_1/TensorArrayV2_1@__inference_function_64307"] at fused["StatefulPartitionedCall:", "StatefulPartitionedCall@__inference_signature_wrapper_64378"]) at fused["StatefulPartitionedCall:", "StatefulPartitionedCall_1"])): failed to legalize operation 'tf.TensorListReserve' that was explicitly marked illegal
<unknown>:0: note: loc(fused["StatefulPartitionedCall:", "StatefulPartitionedCall_1"]): called from
<unknown>:0: error: Lowering tensor list ops is failed. Please consider using Select TF ops and disabling `_experimental_lower_tensor_list_ops` flag in the TFLite converter object. For example, converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]\n converter._experimental_lower_tensor_list_ops = False


In [None]:
import tensorflow as tf

# --- Configuration ---
# The path to your saved Keras .h5 model file
H5_MODEL_PATH = '/content/asl_action_recognizer.h5'

# The desired path for the output .tflite model file
TFLITE_MODEL_PATH = '/content/asl_model.tflite'

print(f"Loading Keras model from: {H5_MODEL_PATH}")

# 1. Load the Keras model
try:
    model = tf.keras.models.load_model(H5_MODEL_PATH)
    print("Model loaded successfully.")
except Exception as e:
    print(f"Error loading model: {e}")
    exit()

# 2. Create a TFLite converter from the Keras model
converter = tf.lite.TFLiteConverter.from_keras_model(model)

# 3. **Apply the fix for the GRU/LSTM conversion error**
# This tells the converter to use a broader set of operations
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,  # Enable TFLite builtin ops
    tf.lite.OpsSet.SELECT_TF_OPS    # Enable TensorFlow ops
]
converter._experimental_lower_tensor_list_ops = False

# 4. (Optional) Apply optimizations for mobile deployment
converter.optimizations = [tf.lite.Optimize.DEFAULT]

print("\nStarting TFLite conversion...")

# 5. Perform the conversion
try:
    tflite_model = converter.convert()
    print("Conversion successful.")
except Exception as e:
    print(f"Error during conversion: {e}")
    exit()

# 6. Save the converted model to a file
with open(TFLITE_MODEL_PATH, 'wb') as f:
    f.write(tflite_model)

print(f"\nTFLite model saved successfully to: {TFLITE_MODEL_PATH}")
print("You can now use this '.tflite' file in your Android Studio project.")



Loading Keras model from: /content/asl_action_recognizer.h5
Model loaded successfully.

Starting TFLite conversion...
Saved artifact at '/tmp/tmpl0rfn94b'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 100, 258), dtype=tf.float32, name='input_layer')
Output Type:
  TensorSpec(shape=(None, 2), dtype=tf.float32, name=None)
Captures:
  137839049594000: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137839049595728: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137839049595536: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137839049591888: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137839049594576: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137839049594192: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137839049591120: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137839049594768: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137839049593040: TensorSpec(shape=(), dt

In [None]:
import cv2
import numpy as np
import os
import tensorflow as tf
import mediapipe as mp
import warnings

# Suppress Protobuf deprecation warning from MediaPipe
warnings.filterwarnings("ignore", category=UserWarning, module='google.protobuf.symbol_database')

# --- Part 1: Configuration & Setup ---

# *** IMPORTANT: SET THIS TO THE FOLDER WITH YOUR TEST VIDEOS ***
VIDEO_TEST_FOLDER = '/content/testes' # <-- CHANGE THIS

# --- Paths to your models ---
H5_MODEL_PATH = 'asl_action_recognizer.h5'
POSE_MODEL_PATH = 'pose_landmarker_lite.task'
HAND_MODEL_PATH = 'hand_landmarker.task'

# --- Model & Detection Parameters ---
actions = np.array(['obrigado', 'null']) # Must be the same as in training
sequence_length = 100 # Must be the same as in training
CONFIDENCE_THRESHOLD = 0.7 # Only show predictions with >= 70% confidence

# Check for model files
if not all(os.path.exists(p) for p in [H5_MODEL_PATH, POSE_MODEL_PATH, HAND_MODEL_PATH]):
    print("Error: Make sure your .h5 model and the MediaPipe .task files are in the correct directory.")
    exit()

# --- Part 2: MediaPipe Setup and Helper Functions ---

from mediapipe.tasks import python
from mediapipe.tasks.python import vision

# Use the stable IMAGE mode for processing video frames
base_options = python.BaseOptions
PoseLandmarker = vision.PoseLandmarker
PoseLandmarkerOptions = vision.PoseLandmarkerOptions
HandLandmarker = vision.HandLandmarker
HandLandmarkerOptions = vision.HandLandmarkerOptions
VisionRunningMode = vision.RunningMode

pose_options = PoseLandmarkerOptions(
    base_options=base_options(model_asset_path=POSE_MODEL_PATH), running_mode=VisionRunningMode.IMAGE)
hand_options = HandLandmarkerOptions(
    base_options=base_options(model_asset_path=HAND_MODEL_PATH), running_mode=VisionRunningMode.IMAGE, num_hands=2)

def extract_keypoints(pose_result, hand_result):
    """Extracts landmark data into a numpy array from Pose and Hand landmark results."""
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in pose_result.pose_landmarks[0]]).flatten() if pose_result.pose_landmarks else np.zeros(33 * 4)
    lh, rh = np.zeros(21 * 3), np.zeros(21 * 3)
    if hand_result.hand_landmarks:
        for i, hand_landmarks in enumerate(hand_result.hand_landmarks):
            handedness = hand_result.handedness[i][0].category_name
            if handedness == "Left": lh = np.array([[res.x, res.y, res.z] for res in hand_landmarks]).flatten()
            elif handedness == "Right": rh = np.array([[res.x, res.y, res.z] for res in hand_landmarks]).flatten()
    return np.concatenate([pose, lh, rh])

# --- Part 3: Main Inference Logic ---

def analyze_videos():
    # Load the trained Keras model
    model = tf.keras.models.load_model(H5_MODEL_PATH)
    print("Model loaded successfully.")

    # Create the MediaPipe landmarker objects
    with PoseLandmarker.create_from_options(pose_options) as pose_landmarker, \
         HandLandmarker.create_from_options(hand_options) as hand_landmarker:

        # Iterate through all video files in the specified folder
        for video_file in sorted(os.listdir(VIDEO_TEST_FOLDER)):
            if not video_file.lower().endswith('.mp4'):
                continue

            video_path = os.path.join(VIDEO_TEST_FOLDER, video_file)
            cap = cv2.VideoCapture(video_path)

            frame_landmarks = []
            while True:
                ret, frame = cap.read()
                if not ret:
                    break

                # Convert frame and run detection
                mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
                pose_result = pose_landmarker.detect(mp_image)
                hand_result = hand_landmarker.detect(mp_image)

                keypoints = extract_keypoints(pose_result, hand_result)
                frame_landmarks.append(keypoints)

            cap.release()

            # --- Prepare sequence for the model ---
            if not frame_landmarks:
                print(f"\nVideo: {video_file}")
                print("Result: Could not process video, no frames found.")
                continue

            # Pad or sample the sequence to match the required length
            if len(frame_landmarks) >= sequence_length:
                indices = np.linspace(0, len(frame_landmarks) - 1, sequence_length, dtype=int)
                final_sequence = [frame_landmarks[i] for i in indices]
            else:
                final_sequence = frame_landmarks
                padding = [frame_landmarks[-1]] * (sequence_length - len(frame_landmarks))
                final_sequence.extend(padding)

            # --- Make Prediction ---
            prediction = model.predict(np.expand_dims(final_sequence, axis=0))[0]
            predicted_action = actions[np.argmax(prediction)]
            confidence = prediction[np.argmax(prediction)]

            # --- Print the Result ---
            print(f"\nVideo: {video_file}")
            if confidence >= CONFIDENCE_THRESHOLD:
                print(f"Result: Found sign '{predicted_action}' with {confidence:.2%} confidence.")
            else:
                print(f"Result: No sign detected with high confidence. (Best guess: '{predicted_action}' at {confidence:.2%})")

# --- Main Execution ---
if __name__ == "__main__":
    if os.path.isdir(VIDEO_TEST_FOLDER):
        analyze_videos()
    else:
        print(f"Error: The folder '{VIDEO_TEST_FOLDER}' does not exist. Please check the path.")



Model loaded successfully.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 611ms/step

Video: WhatsApp Video 2025-09-25 at 7.24.29 PM.mp4
Result: Found sign 'obrigado' with 100.00% confidence.
