In [2]:
import os
import json
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.sequence import pad_sequences

# ============================
# 1Ô∏è‚É£ Load and Preprocess Data
# ============================

def load_keypoints(json_folder):
    data = []
    labels = []
    for file in os.listdir(json_folder):
        if file.endswith('.json'):
            phrase = file.replace('_keypoints.json', '').replace('_', ' ')
            with open(os.path.join(json_folder, file), 'r') as f:
                keypoints = json.load(f)
                all_frames = []
                for frame in keypoints.values():
                    frame_data = []
                    for part in ['pose', 'left_hand', 'right_hand', 'face']:
                        if part in frame:
                            frame_data.extend([coord for point in frame[part] for coord in point])
                        else:
                            frame_data.extend([0]*99)  # Assuming 33 keypoints * 3 (x, y, z)
                    all_frames.append(frame_data)
                data.append(all_frames)
                labels.append(phrase)
    return np.array(data, dtype=object), np.array(labels)

# Path to keypoints
json_folder = r'./keypoints_output'  # Update with actual path

data, labels = load_keypoints(json_folder)

# Encode text labels
le = LabelEncoder()
encoded_labels = le.fit_transform(labels)

# ============================
# 2Ô∏è‚É£ Pad Sequences
# ============================

# Find max frame length for padding
max_frames = max([len(seq) for seq in data])

# Standardize and pad sequences
data_padded = pad_sequences(data.tolist(), maxlen=max_frames, dtype='float32', padding='post', value=0.0)

# Split Data
X_train, X_test, y_train, y_test = train_test_split(encoded_labels, data_padded, test_size=0.2, random_state=42)

# ============================
# 3Ô∏è‚É£ Build LSTM Model
# ============================

model = Sequential([
    Embedding(input_dim=len(le.classes_), output_dim=64, input_length=1),
    LSTM(128, return_sequences=True),
    LSTM(64, return_sequences=True),
    Dense(data_padded.shape[2], activation='linear')
])

# Compile Model
model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])

# ============================
# 4Ô∏è‚É£ Train Model
# ============================

model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test))

# ============================
# 5Ô∏è‚É£ Save Trained Model
# ============================

model.save('text_to_sign_model.h5')

# ============================
# 6Ô∏è‚É£ Dynamic Frame-by-Frame Prediction
# ============================

def dynamic_predict_sign(phrase):
    encoded = le.transform([phrase])
    prediction = model.predict(encoded)

    # Stream keypoints frame by frame
    for frame_keypoints in prediction[0]:
        yield frame_keypoints  # Integrate with 3D avatar for real-time rendering

# Example Usage
new_phrase = "Thank you"
for frame in dynamic_predict_sign(new_phrase):
    print("Frame keypoints:", frame)

# ============================
# üéâ Now Supports Dynamic Frame-by-Frame Predictions!
# ============================


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (50,) + inhomogeneous part.