In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

swethas658768767_datasetone_path = kagglehub.dataset_download('swethas658768767/datasetone')
swethas658768767_newkeypoints_path = kagglehub.dataset_download('swethas658768767/newkeypoints')

print('Data source import complete.')


In [None]:
pip install mediapipe

In [None]:
pip install opencv-python

In [None]:
pip install SpeechRecognition

In [None]:
import os
import zipfile

def zip_folder(folder_path, output_path):
    """Zips a folder and its contents.

    Args:
        folder_path: The path to the folder to zip.
        output_path: The path to the output zip file.
    """
    with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, _, files in os.walk(folder_path):
            for file in files:
                zipf.write(os.path.join(root, file),
                           os.path.relpath(os.path.join(root, file),
                                           os.path.join(folder_path, '..')))

# Example usage:
folder_to_zip = '/kaggle/working/keypoints'  # Replace with your folder path
output_zip_file = '/kaggle/working/keypoints.zip'  # Provide the full path including file name

zip_folder(folder_to_zip, output_zip_file)
print(f"Folder '{folder_to_zip}' zipped to '{output_zip_file}'")


In [None]:
import cv2
import numpy as np
import os
from mediapipe import solutions

# Initialize Mediapipe Holistic
mp_holistic = solutions.holistic
holistic = mp_holistic.Holistic(static_image_mode=False, model_complexity=2)

# Path to new dataset directory
dataset_path = '/kaggle/input/datasetone/videosone'
output_path = '/kaggle/working/keypoints/'  # Directory to save keypoint .npy files
os.makedirs(output_path, exist_ok=True)     # Ensure output directory exists

# Target length to pad sequences to (e.g., 30 frames)
target_length = 30

# Function to extract keypoints from a frame
def extract_keypoints(frame):
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = holistic.process(frame_rgb)

    # Extract features: face, left and right hands, and pose
    keypoints = []

    # Face
    if results.face_landmarks:
        keypoints += [np.array([lm.x, lm.y, lm.z]) for lm in results.face_landmarks.landmark]
    else:
        keypoints += [np.zeros(3)] * 468

    # Left Hand
    if results.left_hand_landmarks:
        keypoints += [np.array([lm.x, lm.y, lm.z]) for lm in results.left_hand_landmarks.landmark]
    else:
        keypoints += [np.zeros(3)] * 21

    # Right Hand
    if results.right_hand_landmarks:
        keypoints += [np.array([lm.x, lm.y, lm.z]) for lm in results.right_hand_landmarks.landmark]
    else:
        keypoints += [np.zeros(3)] * 21

    # Pose
    if results.pose_landmarks:
        keypoints += [np.array([lm.x, lm.y, lm.z]) for lm in results.pose_landmarks.landmark]
    else:
        keypoints += [np.zeros(3)] * 33

    return np.array(keypoints).flatten()  # Flatten to a 1D array

# Function to process each video and save its features as a .npy file
def process_videos():
    for video_file in os.listdir(dataset_path):
        video_path = os.path.join(dataset_path, video_file)
        cap = cv2.VideoCapture(video_path)
        frames = []

        # Read each frame
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            keypoints = extract_keypoints(frame)
            frames.append(keypoints)

        cap.release()

        # Check if frames list is empty
        if frames:
            # Pad or truncate frames to the target length
            if len(frames) < target_length:
                frames += [np.zeros_like(frames[0])] * (target_length - len(frames))
            frames = np.array(frames[:target_length])  # Truncate if longer

            # Save features for the current video
            video_name = os.path.splitext(video_file)[0]  # Remove file extension
            npy_save_path = os.path.join(output_path, f"{video_name}_features.npy")
            np.save(npy_save_path, frames)
            print(f'Saved features for video {video_file} at {npy_save_path}')
        else:
            print(f"Warning: No frames found in video {video_file}")

# Run the feature extraction
process_videos()
holistic.close()


In [None]:
import os
import numpy as np
import tensorflow as tf
import pandas as pd
from tensorflow.keras import layers, Input, Model, Sequential
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import pickle  # For saving and loading the tokenizer

# Paths to data folders and save path
keypoints_folder = '/kaggle/input/newkeypoints/keypoints/keypoints'
metadata_path = '/kaggle/input/newkeypoints/metadata_with_transcriptions.csv'
save_path = '/kaggle/working/'

# GAN Parameters
latent_dim = 100
sequence_length = 30
num_keypoints = 543 * 3
batch_size = 8
epochs = 350

# Load metadata and perform train-test split
metadata = pd.read_csv(metadata_path)
train_metadata, test_metadata = train_test_split(metadata, test_size=0.2, random_state=42)

# Initialize and fit the Tokenizer on the training metadata if needed
tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")  # Adjust num_words to fit vocabulary size
tokenizer.fit_on_texts(train_metadata['phrase'])  # Assuming 'transcription' column holds text data

# Save the tokenizer to use in deployment
with open(os.path.join(save_path, 'tokenizer.pickle'), 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Load keypoints function
def load_keypoints(data_folder, metadata):
    features = []
    for _, row in metadata.iterrows():
        file_path = os.path.join(data_folder, row['filename'].replace(".mp4", "_features.npy"))
        if os.path.exists(file_path):
            feature = np.load(file_path)
            if feature.shape == (sequence_length, num_keypoints):
                features.append(feature)
            else:
                print(f"Warning: Skipping {row['filename']} due to shape mismatch {feature.shape}")
        else:
            print(f"Warning: {file_path} not found.")
    return np.array(features)

# Load datasets
train_features = load_keypoints(keypoints_folder, train_metadata)
test_features = load_keypoints(keypoints_folder, test_metadata)
train_dataset = tf.data.Dataset.from_tensor_slices(train_features).shuffle(buffer_size=1000).batch(batch_size)
test_dataset = tf.data.Dataset.from_tensor_slices(test_features).batch(batch_size)

# Text-to-Feature Mapping Model
def build_text_to_feature_mapping():
    text_input = Input(shape=(None,), dtype="int32")  # Updated to accept integer sequences
    x = layers.Embedding(input_dim=5000, output_dim=latent_dim)(text_input)
    x = layers.GlobalAveragePooling1D()(x)
    latent_vector = layers.Dense(latent_dim)(x)
    return Model(text_input, latent_vector, name="text_to_feature_mapping")

# Generator Model
def build_generator():
    input_noise = Input(shape=(latent_dim,))
    x = layers.Dense(sequence_length * num_keypoints, activation="relu")(input_noise)
    x = layers.BatchNormalization()(x)
    x = layers.Reshape((sequence_length, num_keypoints))(x)
    x = layers.Bidirectional(layers.LSTM(512, return_sequences=True, activation="tanh"))(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Bidirectional(layers.LSTM(256, return_sequences=True, activation="relu"))(x)
    attention_output = layers.MultiHeadAttention(num_heads=4, key_dim=128)(x, x)
    x = layers.Add()([x, attention_output])
    x = layers.Bidirectional(layers.LSTM(128, return_sequences=True, activation="relu"))(x)
    x = layers.LSTM(256, return_sequences=True, activation="tanh")(x)
    output = layers.TimeDistributed(layers.Dense(num_keypoints, activation="tanh"))(x)
    return Model(input_noise, output)

# Discriminator Model
def build_discriminator():
    model = Sequential([
        layers.Input(shape=(sequence_length, num_keypoints)),
        layers.GaussianNoise(0.1),
        layers.Conv1D(64, kernel_size=3, strides=2, padding="same"),
        layers.LeakyReLU(0.2),
        layers.Conv1D(128, kernel_size=3, strides=2, padding="same"),
        layers.LeakyReLU(0.2),
        layers.Conv1D(256, kernel_size=3, strides=2, padding="same"),
        layers.LeakyReLU(0.2),
        layers.Dropout(0.4),
        layers.Conv1D(512, kernel_size=3, strides=2, padding="same"),
        layers.LeakyReLU(0.2),
        layers.Conv1D(256, kernel_size=3, strides=2, padding="same"),
        layers.LeakyReLU(0.2),
        layers.Dropout(0.4),
        layers.Flatten(),
        layers.Dense(1, activation="sigmoid"),
    ])
    return model

# Instantiate models
generator = build_generator()
discriminator = build_discriminator()
text_to_feature_mapping = build_text_to_feature_mapping()

# Losses and Optimizers
cross_entropy = tf.keras.losses.BinaryCrossentropy()
generator_optimizer = tf.keras.optimizers.Adam(1e-4, beta_1=0.5)
discriminator_optimizer = tf.keras.optimizers.Adam(1e-4, beta_1=0.5)


# Training code remains the same as before, and can be executed to pre-train the models
# Training Step Function
@tf.function
def train_step(video_batch):
    noise = tf.random.normal([batch_size, latent_dim])
    with tf.GradientTape() as disc_tape, tf.GradientTape() as gen_tape:
        generated_videos = generator(noise, training=True)

        real_output = discriminator(video_batch, training=True)
        fake_output = discriminator(generated_videos, training=True)

        disc_loss = cross_entropy(tf.ones_like(real_output), real_output) + \
                    cross_entropy(tf.zeros_like(fake_output), fake_output)

        gen_loss = cross_entropy(tf.ones_like(fake_output), fake_output)

    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)
    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)

    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))
    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))

    return disc_loss, gen_loss

# Save generated frames after training
def save_generated_frames(generator, num_samples, save_dir):
    os.makedirs(save_dir, exist_ok=True)
    noise = tf.random.normal([num_samples, latent_dim])
    generated_videos = generator.predict(noise)

    for i, video in enumerate(generated_videos):
        video_path = os.path.join(save_dir, f"generated_video_{i}")
        os.makedirs(video_path, exist_ok=True)
        for j, frame in enumerate(video):
            frame_path = os.path.join(video_path, f"frame_{j}.npy")
            np.save(frame_path, frame)  # Save each frame as a .npy file
        print(f"Saved generated video {i} to {video_path}")

# Training Loop with frame-saving at the end
def train(train_dataset, test_dataset, epochs, save_dir):
    for epoch in range(epochs):
        for video_batch in train_dataset:
            disc_loss, gen_loss = train_step(video_batch)

        print(f"Epoch {epoch + 1}/{epochs}, Disc Loss: {disc_loss:.4f}, Gen Loss: {gen_loss:.4f}")

        # Evaluate generator on test set at each epoch
        for test_video_batch in test_dataset:
            test_generated = generator(tf.random.normal([batch_size, latent_dim]), training=False)
            print(f"Test Batch Generated Shape: {test_generated.shape}")

    # Save models after training
    generator.save(os.path.join(save_path, "asr_generator.keras"))
    discriminator.save(os.path.join(save_path, "asr_discriminator.keras"))
    text_to_feature_mapping.save(os.path.join(save_path, "final_text_to_feature_mapping.keras"))

    # Save generated frames to a folder
    save_generated_frames(generator, num_samples=10, save_dir=os.path.join(save_path, "generated_frames"))

# Start training
train(train_dataset, test_dataset, epochs, save_dir=save_path)


In [None]:
import os
import numpy as np
import cv2

# Directory containing the generated video folders with .npy files
generated_frames_dir = "/kaggle/working/generated_frames"  # Adjust this path to where your frames are stored
output_png_dir = "/kaggle/working/color_color_png"  # Output folder for the PNG files

# Define the connections between keypoints for skeletal representation (only relevant body parts)
connections = [
    (0, 1), (1, 2), (2, 3), (3, 4),   # Thumb
    (0, 5), (5, 6), (6, 7), (7, 8),   # Index
    (0, 9), (9, 10), (10, 11), (11, 12),  # Middle
    (0, 13), (13, 14), (14, 15), (15, 16),  # Ring
    (0, 17), (17, 18), (18, 19), (19, 20),  # Pinky
    (11, 12), (12, 13),  # Left arm
    (14, 15), (15, 16),  # Right arm
    (11, 14),  # Shoulders
]

# Function to draw skeletal gestures with colors and annotations
def draw_colored_skeleton(image, keypoints, connections, colors, annotations, radius=5, thickness=2):
    for idx, (start, end) in enumerate(connections):
        if start < len(keypoints) and end < len(keypoints):
            pt1 = tuple(keypoints[start][:2].astype(int))
            pt2 = tuple(keypoints[end][:2].astype(int))
            color = colors.get(idx, (255, 255, 255))  # Default to white if no color specified
            cv2.line(image, pt1, pt2, color, thickness)

    for idx, keypoint in enumerate(keypoints):
        pt = tuple(keypoint[:2].astype(int))
        cv2.circle(image, pt, radius, (0, 255, 0), -1)  # Default point color is green

        # Annotate keypoints
        if idx in annotations:
            cv2.putText(
                image, annotations[idx], pt, cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1, cv2.LINE_AA
            )

# Example of color mapping and annotations
color_map = {
    0: (255, 0, 0),  # Red for thumb
    1: (0, 255, 0),  # Green for index finger
    2: (0, 0, 255),  # Blue for middle finger
    # Add more colors for other connections if needed
}

annotations = {
    33: "L-Shoulder",
    34: "L-Elbow",
    35: "L-Wrist",
    44: "R-Shoulder",
    45: "R-Elbow",
    46: "R-Wrist",
    21: "Right-Hand Start",
    42: "Left-Hand Start",
}

# Function to convert keypoints .npy files to gesture-like .png images with colors and annotations
def npy_to_colored_gesture_png(input_dir, output_dir, image_size=(512, 512)):
    os.makedirs(output_dir, exist_ok=True)

    # Iterate through each video folder
    for video_folder in os.listdir(input_dir):
        video_path = os.path.join(input_dir, video_folder)
        if os.path.isdir(video_path):
            output_video_path = os.path.join(output_dir, video_folder)
            os.makedirs(output_video_path, exist_ok=True)

            # Iterate through all .npy files in the folder
            for frame_file in sorted(os.listdir(video_path)):
                if frame_file.endswith(".npy"):
                    # Load keypoints from .npy file
                    keypoints = np.load(os.path.join(video_path, frame_file))

                    # Reshape keypoints for visualization (assuming it's flattened)
                    keypoints = keypoints.reshape(-1, 3)  # Assumes (num_keypoints / 3, 3) structure

                    # Filter out only the relevant keypoints (hands and pose)
                    relevant_keypoints = np.concatenate([
                        keypoints[21:42],  # Right Hand (21 keypoints)
                        keypoints[42:63],  # Left Hand (21 keypoints)
                        keypoints[33:34],  # Left Shoulder
                        keypoints[34:35],  # Left Elbow
                        keypoints[35:36],  # Left Wrist
                        keypoints[44:45],  # Right Shoulder
                        keypoints[45:46],  # Right Elbow
                        keypoints[46:47],  # Right Wrist
                    ])

                    # Create a blank image
                    image = np.zeros((image_size[0], image_size[1], 3), dtype=np.uint8)

                    # Normalize keypoints to image coordinates
                    relevant_keypoints[:, 0] = (relevant_keypoints[:, 0] + 1) * (image_size[1] / 2)  # X-axis
                    relevant_keypoints[:, 1] = (relevant_keypoints[:, 1] + 1) * (image_size[0] / 2)  # Y-axis

                    # Draw the skeleton with relevant keypoints
                    draw_colored_skeleton(image, relevant_keypoints, connections, color_map, annotations)

                    # Save as PNG
                    frame_png_path = os.path.join(output_video_path, frame_file.replace(".npy", ".png"))
                    cv2.imwrite(frame_png_path, image)

                    print(f"Converted {frame_file} to {frame_png_path}")

# Convert .npy to gesture PNGs with colors and annotations
npy_to_colored_gesture_png(generated_frames_dir, output_png_dir)


In [None]:
import shutil
import os

def zip_folder(folder_path, output_zip_path):
    """
    Compress a folder into a zip file.

    Args:
        folder_path (str): Path to the folder to be compressed.
        output_zip_path (str): Path to save the output .zip file (without the extension).
    """
    # Make sure the output path does not have .zip at the end (shutil adds it automatically)
    if output_zip_path.endswith(".zip"):
        output_zip_path = output_zip_path[:-4]

    # Compress the folder into a zip file
    shutil.make_archive(output_zip_path, 'zip', folder_path)
    print(f"Folder '{folder_path}' compressed into '{output_zip_path}.zip'.")

# Example usage
folder_to_compress = "/kaggle/working/color_color_png"  # Replace with your folder path
output_zip_file = "/kaggle/working/colorframes"  # Replace with the desired zip file name (without extension)

# Call the function
zip_folder(folder_to_compress, output_zip_file)
