In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, LSTM, TimeDistributed, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

def load_video_dataset(data_path, sequence_length=10):
    data = []
    labels = []
    label_map = {chr(i): idx for idx, i in enumerate(range(ord('A'), ord('Z')+1))}
    
    print(f"Label map: {label_map}")
    
    for root, _, files in os.walk(data_path):
        root = root.replace('\\', '/')
        print(f"Reading folder: {root}")
        folder_name = os.path.basename(root)
        if folder_name in label_map:
            label = label_map[folder_name]
            for file_name in files:
                file_path = os.path.join(root, file_name)
                if file_name.lower().endswith(('.mp4', '.avi', '.mov', 'webm')):
                    cap = cv2.VideoCapture(file_path)
                    frames = []
                    while cap.isOpened():
                        ret, frame = cap.read()
                        if not ret:
                            break
                        frame = cv2.resize(frame, (64, 64))
                        frames.append(frame)
                        if len(frames) == sequence_length:
                            data.append(frames)
                            labels.append(label)
                            frames = []
                    cap.release()
    
    data = np.array(data)
    labels = np.array(labels)
    return data, labels

# Load dataset
data_path = 'E:/Kuliah/Semester 6/MBKM/Bangkit/Capstone/Data Mentah'
sequence_length = 10
X, y = load_video_dataset(data_path, sequence_length)

# Debugging to ensure data is loaded correctly
print(f"Shape of X: {X.shape}")
print(f"Shape of y: {y.shape}")

# Normalize images
X = X / 255.0

# Convert labels to one-hot encoding
y = to_categorical(y, num_classes=26)

# Debugging to ensure normalization and one-hot encoding succeeded
print(f"First 5 labels (one-hot encoded):\n {y[:5]}")

# Split dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create model
def create_model(input_shape, num_classes):
    model = Sequential()
    model.add(TimeDistributed(Conv2D(32, (3, 3), activation='relu'), input_shape=input_shape))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Conv2D(64, (3, 3), activation='relu')))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Flatten()))
    model.add(LSTM(128))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    return model

input_shape = (sequence_length, 64, 64, 3)
num_classes = 26
model = create_model(input_shape, num_classes)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate and save model
model.evaluate(X_test, y_test)
model.save('gesture_to_text_model.h5')


In [2]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, LSTM, TimeDistributed, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import Sequence

class VideoDataGenerator(Sequence):
    def __init__(self, data_path, label_map, sequence_length=10, batch_size=32, is_train=True, shuffle=True):
        self.data_path = data_path
        self.label_map = label_map
        self.sequence_length = sequence_length
        self.batch_size = batch_size
        self.is_train = is_train
        self.shuffle = shuffle
        self.video_files, self.labels = self._load_video_paths_and_labels()
        self.on_epoch_end()

    def _load_video_paths_and_labels(self):
        video_files = []
        labels = []
        for root, _, files in os.walk(self.data_path):
            root = root.replace('\\', '/')
            folder_name = os.path.basename(root)
            if folder_name in self.label_map:
                label = self.label_map[folder_name]
                for file_name in files:
                    if file_name.lower().endswith(('.mp4', '.avi', '.mov', 'webm')):
                        file_path = os.path.join(root, file_name)
                        video_files.append(file_path)
                        labels.append(label)
        return np.array(video_files), np.array(labels)

    def __len__(self):
        return int(np.floor(len(self.video_files) / self.batch_size))

    def __getitem__(self, index):
        batch_files = self.video_files[index * self.batch_size:(index + 1) * self.batch_size]
        batch_labels = self.labels[index * self.batch_size:(index + 1) * self.batch_size]
        X, y = self.__data_generation(batch_files, batch_labels)
        return X, y

    def on_epoch_end(self):
        if self.shuffle:
            indices = np.arange(len(self.video_files))
            np.random.shuffle(indices)
            self.video_files = self.video_files[indices]
            self.labels = self.labels[indices]

    def __data_generation(self, batch_files, batch_labels):
        X = np.empty((self.batch_size, self.sequence_length, 64, 64, 3), dtype=np.float32)
        y = np.empty((self.batch_size), dtype=int)
        for i, (file_path, label) in enumerate(zip(batch_files, batch_labels)):
            cap = cv2.VideoCapture(file_path)
            frames = []
            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break
                frame = cv2.resize(frame, (64, 64))
                frame = frame / 255.0  # Normalize
                frames.append(frame)
                if len(frames) == self.sequence_length:
                    break
            cap.release()
            if len(frames) < self.sequence_length:
                frames = frames + [np.zeros((64, 64, 3)) for _ in range(self.sequence_length - len(frames))]
            X[i,] = np.array(frames, dtype=np.float32)
            y[i] = label
        y = to_categorical(y, num_classes=26)
        return X, y

In [3]:
# Mapping labels to integers
label_map = {chr(i): idx for idx, i in enumerate(range(ord('A'), ord('Z') + 1))}
data_path = 'E:/Kuliah/Semester 6/MBKM/Bangkit/Capstone/Data Mentah'

In [4]:
# Create data generators
train_generator = VideoDataGenerator(data_path, label_map, sequence_length=10, batch_size=32, is_train=True)
validation_generator = VideoDataGenerator(data_path, label_map, sequence_length=10, batch_size=32, is_train=False)

In [5]:
# Create model
def create_model(input_shape, num_classes):
    model = Sequential()
    model.add(TimeDistributed(Conv2D(32, (3, 3), activation='relu'), input_shape=input_shape))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Conv2D(64, (3, 3), activation='relu')))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Flatten()))
    model.add(LSTM(128))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    return model

input_shape = (10, 64, 64, 3)
num_classes = 26
model = create_model(input_shape, num_classes)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

  super().__init__(**kwargs)


In [None]:
# Train model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

In [None]:
# Evaluate and save model
model.evaluate(X_test, y_test)
model.save('gesture_to_text_model.h5')