In [1]:
!pip install XlsxWriter


  pid, fd = os.forkpty()


Collecting XlsxWriter
  Downloading XlsxWriter-3.2.0-py3-none-any.whl.metadata (2.6 kB)
Downloading XlsxWriter-3.2.0-py3-none-any.whl (159 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m159.9/159.9 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: XlsxWriter
Successfully installed XlsxWriter-3.2.0


In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import random
import tensorflow as tf
from tensorflow.keras import layers, models
import ast
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import os
from datetime import datetime
import joblib

In [3]:
df = pd.read_csv('/kaggle/input/sign-langauge-translater/combined_dataset.csv')
# df2 = pd.read_csv('hand_landmarks_from_4500_to_5000.csv')

# df = pd.concat([df1, df2], ignore_index=True)
# df

In [4]:
df.head()

Unnamed: 0,class,landmarks
0,a,"[[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0],..."
1,a,"[[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0],..."
2,a,"[[[[0.5544530153274536, 0.8488561511039734, -6..."
3,a,"[[[[0.558426558971405, 0.804563581943512, -5.1..."
4,a,"[[[[0.5819562673568726, 0.8947145342826843, -6..."


In [5]:
def preprocess_data(df, landmarks_column="landmarks", class_column="class"):
    """
    Preprocess the dataset for model training.

    Args:
        df (pd.DataFrame): DataFrame with class labels and landmarks columns.
        landmarks_column (str): Name of the column containing landmarks.
        class_column (str): Name of the column containing class labels.

    Returns:
        pd.DataFrame: Processed DataFrame with numeric landmarks and encoded labels.
        LabelEncoder: Encoder for decoding class labels.
    """
    # Ensure landmarks are in numeric format
    df[landmarks_column] = df[landmarks_column].apply(
        lambda x: np.array(ast.literal_eval(x)) if isinstance(x, str) else np.array(x)
    )

    # Encode class labels
    le = LabelEncoder()
    df["encoded_class"] = le.fit_transform(df[class_column])

    return df, le

# Apply the preprocessing
df, label_encoder = preprocess_data(df)


In [6]:
num_classes = len(label_encoder.classes_)
print(label_encoder.classes_, num_classes)

['a' 'about' 'aim' 'all' 'and' 'audio' 'b' 'barrier' 'break' 'c' 'can'
 'communication' 'creative' 'd' 'detect' 'developed' 'e' 'f' 'g' 'h'
 'have' 'i' 'j' 'k' 'l' 'm' 'n' 'o' 'our' 'p' 'project' 'q' 'r' 's'
 'sign language' 'solution' 't' 'team' 'text' 'that' 'to' 'translate' 'u'
 'v' 'w' 'what' 'x' 'y' 'you' 'z'] 50


In [7]:
def augment_landmarks(landmarks, max_noise=0.01, scale_range=(0.9, 1.1)):
    """
    Apply random augmentations to landmarks.

    Args:
        landmarks (np.ndarray): Original landmarks array.
        max_noise (float): Maximum noise to add to each coordinate.
        scale_range (tuple): Range for random scaling factors.

    Returns:
        np.ndarray: Augmented landmarks.
    """
    # Ensure landmarks are float64 for operations
    landmarks = landmarks.astype(np.float64)
    
    # Add random noise
    noise = np.random.uniform(-max_noise, max_noise, size=landmarks.shape)
    landmarks += noise

    # Apply random scaling
    scale_factor = random.uniform(*scale_range)
    landmarks *= scale_factor

    return landmarks

In [8]:
def augment_dataset_in_batches(df, num_augmentations=5, batch_size=1000):
    """
    Augment the dataset in batches to avoid memory overflow.

    Args:
        df (pd.DataFrame): DataFrame with 'class' and 'landmarks' columns.
        num_augmentations (int): Number of augmented rows to create per original row.
        batch_size (int): Number of augmented rows to process in a batch.

    Yields:
        pd.DataFrame: DataFrame containing a batch of augmented data.
    """
    augmented_rows = []

    for _, row in df.iterrows():
        original_landmarks = np.array(row["landmarks"])
        
        for _ in range(num_augmentations):
            augmented_landmarks = augment_landmarks(original_landmarks)
            new_row = {
                "class": row["class"],
                "landmarks": augmented_landmarks.tolist(),
                "encoded_class": row.get("encoded_class", None)
            }
            augmented_rows.append(new_row)
            
            # Yield batch when it reaches the specified size
            if len(augmented_rows) >= batch_size:
                yield pd.DataFrame(augmented_rows)
                augmented_rows = []
    
    # Yield remaining rows
    if augmented_rows:
        yield pd.DataFrame(augmented_rows)


In [9]:
def save_augmented_data(df, num_augmentations=5, batch_size=1000, output_file="augmented_data.csv"):
    with open(output_file, 'w', newline='') as csvfile:
        for idx, batch in enumerate(augment_dataset_in_batches(df, num_augmentations, batch_size)):
            # Write header only for the first batch
            batch.to_csv(csvfile, mode='a', header=(idx == 0), index=False)

In [None]:
save_augmented_data(df, num_augmentations=50, batch_size=1000, output_file="augmented_data.csv")

In [None]:
# Define timesteps, frames, and features
timesteps = 50  # Time steps
frames = 21  # Number of frames per time step
features = 3  # Number of features per frame

In [None]:
# Split the dataset into training and validation sets
df_train, df_val = train_test_split(df_org_augmented, test_size=0.2, random_state=42)

In [None]:
# Prepare X_train and y_train from the training set
X_train = np.array([
    np.array(landmark).reshape(50, 21, 6) 
    for landmark in df_train["landmarks"]
])
y_train = df_train["encoded_class"].values

# Prepare X_val and y_val from the validation set
X_val = np.array([
    np.array(landmark).reshape(50, 21, 6) 
    for landmark in df_val["landmarks"]
])
y_val = df_val["encoded_class"].values

# Verify the split and shapes
print(f"Training set size: {len(X_train)}")
print(f"Validation set size: {len(X_val)}")
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_val shape: {X_val.shape}")
print(f"y_val shape: {y_val.shape}")

In [None]:
# Flatten the data to 2D (samples, timesteps * features)
X_train_flattened = X_train.reshape(X_train.shape[0], -1)
X_val_flattened = X_val.reshape(X_val.shape[0], -1)

# Normalize landmarks (fit_transform on the training set, and transform on validation)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_flattened)
X_val_scaled = scaler.transform(X_val_flattened)

# Reshape the scaled data back to the original 3D shape (50, 21, 6)
X_train = X_train_scaled.reshape(X_train.shape[0], 50, 21, 6)
X_val = X_val_scaled.reshape(X_val.shape[0], 50, 21, 6)

In [None]:
# Check the shapes after reshaping
print(f"X_train shape after reshaping: {X_train.shape}")
print(f"X_val shape after reshaping: {X_val.shape}")

In [None]:
# Convert labels to one-hot encoding
from keras.utils import to_categorical
y_train_one_hot = to_categorical(y_train, num_classes)
y_val_one_hot = to_categorical(y_val, num_classes)

In [None]:
# Flatten the 3D input for ViT model (timesteps, features)
timesteps = 50  # Number of timesteps (frames)
features = 21 * 6  # Features per frame (21 landmarks * 6 values)

In [None]:
X_train_vit = X_train.reshape(-1, timesteps, features)
X_val_vit = X_val.reshape(-1, timesteps, features)

In [None]:
# Define Vision Transformer (ViT) Model
def build_vit_model(input_shape, num_classes):
    """
    Build a Vision Transformer (ViT) model for gesture classification.

    Args:
        input_shape (tuple): Shape of the input (timesteps, features).
        num_classes (int): Number of gesture classes.

    Returns:
        keras.Model: Compiled model.
    """
    input_layer = layers.Input(shape=input_shape)

    # Add LayerNormalization
    x = layers.LayerNormalization()(input_layer)

    # Add Transformer block (Multi-Head Attention)
    x = layers.MultiHeadAttention(num_heads=8, key_dim=64)(x, x)
    x = layers.Dropout(0.1)(x)
    x = layers.Add()([input_layer, x])  # Skip connection

    # Add Feed-forward Network
    x = layers.LayerNormalization()(x)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.Dropout(0.1)(x)
    x = layers.Dense(256, activation='relu')(x)

    # Add Global Average Pooling and Output Layer
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dense(128, activation='relu')(x)
    output = layers.Dense(num_classes, activation='softmax')(x)

    # Compile the model
    model = models.Model(inputs=input_layer, outputs=output)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    return model

# Define input shape and build the ViT model
input_shape = (timesteps, features)
model = build_vit_model(input_shape, num_classes)

# Display model summary
model.summary()

In [None]:
# Set up the log directory for TensorBoard
log_dir = os.path.join("logs", "fit", datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
# Train the model
history = model.fit(
    X_train_vit,  # Reshaped training data
    y_train_one_hot,  # One-hot encoded labels for training
    validation_data=(X_val_vit, y_val_one_hot),  # Reshaped validation data and labels
    epochs=50,  # Number of epochs
    batch_size=8,  # Mini-batch size
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',  # Monitor validation loss
            patience=5,  # Stop training if no improvement for 5 epochs
            restore_best_weights=True  # Restore the best model weights
        ),
        tensorboard_callback  # TensorBoard callback
    ],
    verbose=1  # Display training progress
)

In [None]:
import matplotlib.pyplot as plt

# Plot training and validation loss
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.legend()
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.show()

# Plot training and validation accuracy
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.legend()
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.show()


In [None]:

# Assuming 'scaler' is your scaler object
scaler_save_path = "scaler.pkl"
joblib.dump(scaler, scaler_save_path)
print(f"Scaler saved to: {scaler_save_path}")


In [None]:
model_save_path = "my_vit_model.h5"
# Save the entire model
model.save(model_save_path)
print(f"Model saved to: {model_save_path}")

In [None]:
# Load the saved model
loaded_model = tf.keras.models.load_model(model_save_path)

# Verify the model structure
loaded_model.summary()
