In [7]:
import os
# os.chdir("../")
%pwd

'/Users/niralpatel/Desktop/Projects/speaking_silence'

In [16]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    trained_model_path: Path
    base_model_path: Path

    training_data: Path
    epochs: int
    batch_size: int
    is_augmentation: bool
    input_shape: list

In [3]:
from Speaking_Silence.constants import *
from Speaking_Silence.utils.common import read_yaml, create_directories
import tensorflow as tf



In [17]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_training_config(self) -> TrainingConfig:
        training = self.config.training
        prepare_base_model = self.config.prepare_base_model
        params = self.params
        training_data = "artifacts/data_ingestion"
        create_directories([
            Path(training.root_dir)
        ])

        training_config = TrainingConfig(
            root_dir=Path(training.root_dir),
            trained_model_path=Path(training.trained_model_path),
            base_model_path=Path(prepare_base_model.base_model_path),
            training_data=Path(training_data),
            epochs=params.EPOCHS,
            batch_size=params.BATCH_SIZE,
            is_augmentation=params.AUGMENTATION,
            input_shape=params.INPUT_SHAPE,
        )

        return training_config

In [6]:
import os
from pathlib import Path
import tensorflow as tf
import cv2

In [None]:
class Training:
    def __init__(self, config: TrainingConfig):
        self.config = config

    
    def get_base_model(self):
        self.model = tf.keras.models.load_model(
            self.config.base_model_path
        )

    def train_valid_generator(self):
        datagenerator_kwargs = dict(
            rescale=1./255,
            validation_split=0.20
        )

        dataflow_kwargs = dict(
            target_size=self.config.input_shape[:-1],
            batch_size=self.config.batch_size,
            interpolation="bilinear"
        )

        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
            **datagenerator_kwargs
        )

        self.valid_generator = valid_datagenerator.flow_from_directory(
            directory=self.config.training_data,
            subset="validation",
            shuffle=False,
            **dataflow_kwargs
        )

        if self.config.is_augmentation:
            train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
                rotation_range=40,
                horizontal_flip=True,
                width_shift_range=0.2,
                height_shift_range=0.2,
                shear_range=0.2,
                zoom_range=0.2,
                **datagenerator_kwargs
            )
        else:
            train_datagenerator = valid_datagenerator

        self.train_generator = train_datagenerator.flow_from_directory(
            directory=self.config.training_data,
            subset="training",
            shuffle=True,
            **dataflow_kwargs
        )

    def save_model(self, model_path):
        self.model.save(model_path)

    def train(self):
        self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size
        self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size

        self.model.fit(
            self.train_generator,
            epochs=self.config.epochs,
            steps_per_epoch=self.steps_per_epoch,
            validation_steps=self.validation_steps,
            validation_data=self.valid_generator
        )

        self.save_model(self.config.trained_model_path)

In [22]:
class Training:
    def __init__(self, config):
        self.config = config

    def get_base_model(self):
        self.model = tf.keras.models.load_model(self.config.base_model_path)

    def preprocess_video(self, video_path):
        # Open the video file
        cap = cv2.VideoCapture(video_path)
        frames = []

        # Read frames from the video
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            # Preprocess frame (resize, normalize, etc.)
            # Add any preprocessing steps needed here
            frame = cv2.resize(frame, (224, 224))  # Example resizing to fit VGG16 input shape
            frame = frame / 255.0  # Example normalization
            frames.append(frame)

        cap.release()
        return frames

    def train(self):
        self.get_base_model()

        # Iterate over video files in the training directory
        for video_folder in os.listdir(self.config.training_data):
            folder = f"{self.config.training_data}/{video_folder}"
            for video_file in os.listdir(folder):
                video_path = os.path.join(self.config.training_data, video_file)
                frames = self.preprocess_video(video_path)

            # Assuming 'frames' is a list of preprocessed frames from the video
            # Perform any necessary additional preprocessing, such as padding frames or converting to a numpy array
            # Then feed the frames into the model for training

            # Example:
            # frames = np.array(frames)
            # self.model.fit(frames, ...)  # Example training step

        # Save the trained model
        self.model.save(self.config.trained_model_path)

In [24]:
try:
    config = ConfigurationManager()
    training_config = config.get_training_config()
    training = Training(config=training_config)
    training.get_base_model()
    training.train()
    
except Exception as e:
    raise e

[32m[2024-03-23 09:36:28: INFO: common][0m yaml file: config/config.yaml loaded successfully[0m


INFO:Speaking_Silence:yaml file: config/config.yaml loaded successfully


[32m[2024-03-23 09:36:28: INFO: common][0m yaml file: params.yaml loaded successfully[0m


INFO:Speaking_Silence:yaml file: params.yaml loaded successfully


[32m[2024-03-23 09:36:28: INFO: common][0m created directory at: artifacts[0m


INFO:Speaking_Silence:created directory at: artifacts


[32m[2024-03-23 09:36:28: INFO: common][0m created directory at: artifacts/training[0m


INFO:Speaking_Silence:created directory at: artifacts/training
OpenCV: Couldn't read video stream from file "artifacts/data_ingestion/man_34746.mp4"
OpenCV: Couldn't read video stream from file "artifacts/data_ingestion/man_34744.mp4"
OpenCV: Couldn't read video stream from file "artifacts/data_ingestion/man_34741.mp4"
OpenCV: Couldn't read video stream from file "artifacts/data_ingestion/man_67872.mp4"
OpenCV: Couldn't read video stream from file "artifacts/data_ingestion/man_34742.mp4"
OpenCV: Couldn't read video stream from file "artifacts/data_ingestion/man_34743.mp4"
OpenCV: Couldn't read video stream from file "artifacts/data_ingestion/man_34733.mp4"
OpenCV: Couldn't read video stream from file "artifacts/data_ingestion/man_34732.mp4"
OpenCV: Couldn't read video stream from file "artifacts/data_ingestion/man_34685.mp4"
OpenCV: Couldn't read video stream from file "artifacts/data_ingestion/man_34736.mp4"
OpenCV: Couldn't read video stream from file "artifacts/data_ingestion/man_69

NotADirectoryError: [Errno 20] Not a directory: 'artifacts/data_ingestion/.DS_Store'

In [None]:
def create_LRCN_model():

    model = Sequential()

    model.add(TimeDistributed(Conv2D(16, (3, 3), padding='same',activation = 'relu'),
                              input_shape = (SEQUENCE_LENGTH, IMAGE_HEIGHT, IMAGE_WIDTH, 3)))
    
    model.add(TimeDistributed(MaxPooling2D((4, 4)))) 
    model.add(TimeDistributed(Dropout(0.25)))
    
    model.add(TimeDistributed(Conv2D(32, (3, 3), padding='same',activation = 'relu')))
    model.add(TimeDistributed(MaxPooling2D((4, 4))))
    model.add(TimeDistributed(Dropout(0.25)))
    
    model.add(TimeDistributed(Conv2D(64, (3, 3), padding='same',activation = 'relu')))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Dropout(0.25)))
    
    model.add(TimeDistributed(Conv2D(64, (3, 3), padding='same',activation = 'relu')))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    #model.add(TimeDistributed(Dropout(0.25)))
                                      
    model.add(TimeDistributed(Flatten()))
                                      
    model.add(LSTM(32))
                                      
    model.add(Dense(len(CLASSES_LIST), activation = 'softmax'))

    model.summary()
    return model

In [None]:
import pymongo
import cv2
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

class DataIngestion:
    def __init__(self, config):
        self.config = config
        self.client = pymongo.MongoClient(self.config.db_host)
        self.db = self.client[self.config.db_name]

    def load_video_data(self):
        # Implement logic to load video data from MongoDB
        pass

class BaseModelBuilder:
    def __init__(self, config):
        self.config = config

    def build_base_model(self):
        model = tf.keras.Sequential([
            tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=self.config.input_shape),
            tf.keras.layers.MaxPooling2D((2, 2)),
            tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
            tf.keras.layers.MaxPooling2D((2, 2)),
            tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
            tf.keras.layers.MaxPooling2D((2, 2)),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dense(self.config.num_classes, activation='softmax')
        ])
        return model

class DataPreprocessor:
    def __init__(self, config):
        self.config = config

    def preprocess_video(self, video_data):
        frames = []
        for frame_data in video_data:
            # Load frame from video_data
            frame = cv2.imread(frame_data['frame_path'])
            # Resize frame to match model input shape
            frame = cv2.resize(frame, (self.config.input_shape[0], self.config.input_shape[1]))
            # Perform normalization or other preprocessing steps if necessary
            frame = frame / 255.0  # Example normalization
            frames.append(frame)
        return frames

class VideoTrainer:
    def __init__(self, config):
        self.config = config

    def train(self, train_data, validation_data, model):
        # Implement training logic using train_data and validation_data
        pass

    def evaluate(self, test_data, model):
        # Implement evaluation logic using test_data
        pass

    def get_model_summary(self, model):
        return model.summary()

def main():
    # Initialize components
    config = load_config()
    data_ingestion = DataIngestion(config)
    base_model_builder = BaseModelBuilder(config)
    data_preprocessor = DataPreprocessor(config)
    video_trainer = VideoTrainer(config)

    # Load video data from MongoDB
    video_data = data_ingestion.load_video_data()

    # Preprocess video data
    preprocessed_data = data_preprocessor.preprocess_video(video_data)

    # Split data into training and validation sets
    train_data, val_data = train_test_split(preprocessed_data, test_size=0.2, random_state=42)

    # Build base model
    model = base_model_builder.build_base_model()

    # Train model
    video_trainer.train(train_data, val_data, model)

    # Evaluate model
    test_data = load_test_data()  # Load test data from somewhere
    video_trainer.evaluate(test_data, model)

    # Get model summary
    model_summary = video_trainer.get_model_summary(model)
    print(model_summary)

if __name__ == "__main__":
    main()


In [55]:
DB_NAME = "college-project"
DB_HOST = "mongodb+srv://niral0901:d3gAL01dwlOnqmLQ@college-project.iwlsqpp.mongodb.net"

In [56]:
from mongoengine import disconnect, connect, Document, StringField, IntField

# Define MongoDB connection
disconnect()
connect(DB_NAME, host=DB_HOST)
# Define MongoDB Document schema
class LabelMapping(Document):
    gloss = StringField(unique=True)
    value = IntField()

# Function to populate MongoDB with label mappings
def populate_label_mappings(train_dir):
    for index, gloss_label in enumerate(os.listdir(train_dir)):
        LabelMapping(gloss=gloss_label, value=index+1).save()

In [82]:
import keras
import os
import cv2
import numpy as np
from keras.utils import to_categorical


def preprocess_video(video_path, target_size=(224, 224)):
    frames = []
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Couldn't open video file: {video_path}")
        return None
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, target_size)
        frame = frame / 255.0  # Normalize pixel values
        frames.append(frame)
    cap.release()
    
    if not frames:
        print(f"Error: No frames found in video file: {video_path}")
        return None
    
    # Find the maximum dimensions among all frames
    max_height = max(frame.shape[0] for frame in frames)
    max_width = max(frame.shape[1] for frame in frames)
    
    # Process frames to have the same dimensions
    frames_processed = []
    for frame in frames:
        if frame.shape[0] < max_height:
            pad_top = (max_height - frame.shape[0]) // 2
            pad_bottom = max_height - frame.shape[0] - pad_top
            frame = cv2.copyMakeBorder(frame, pad_top, pad_bottom, 0, 0, cv2.BORDER_CONSTANT, value=0)
        elif frame.shape[0] > max_height:
            crop_top = (frame.shape[0] - max_height) // 2
            crop_bottom = crop_top + max_height
            frame = frame[crop_top:crop_bottom, :]
        
        if frame.shape[1] < max_width:
            pad_left = (max_width - frame.shape[1]) // 2
            pad_right = max_width - frame.shape[1] - pad_left
            frame = cv2.copyMakeBorder(frame, 0, 0, pad_left, pad_right, cv2.BORDER_CONSTANT, value=0)
        elif frame.shape[1] > max_width:
            crop_left = (frame.shape[1] - max_width) // 2
            crop_right = crop_left + max_width
            frame = frame[:, crop_left:crop_right]
        
        frames_processed.append(frame)
    
    # Pad or crop frames to a fixed number of frames
    num_frames = 16
    if len(frames_processed) < num_frames:
        padding = [np.zeros((max_height, max_width, 3))] * (num_frames - len(frames_processed))
        frames_processed.extend(padding)
    elif len(frames_processed) > num_frames:
        frames_processed = frames_processed[:num_frames]
    
    video = np.stack(frames_processed)
    return video


# Function to generate batches of video data
def video_data_generator(train_dir, batch_size):
    # Query MongoDB for label mappings
    label_mappings = {mapping.gloss: mapping.value for mapping in LabelMapping.objects}

    while True:
        video_paths = [os.path.join(train_dir, gloss_label, video_name) 
               for gloss_label in os.listdir(train_dir) if gloss_label != '.DS_Store'
               for video_name in os.listdir(os.path.join(train_dir, gloss_label))]
        np.random.shuffle(video_paths)
        for i in range(0, len(video_paths), batch_size):
            batch_video_paths = video_paths[i:i+batch_size]
            batch_videos = [preprocess_video(video_path) for video_path in batch_video_paths]
            batch_videos = [video for video in batch_videos if video is not None]  # Remove None values
            if not batch_videos:
                continue
            batch_labels = [os.path.basename(video_path).split('_')[0] for video_path in batch_video_paths]  # Extract gloss labels
            batch_labels = [label_mappings.get(label, 0) for label in batch_labels]  # Map gloss labels to numerical values
            batch_labels = to_categorical(batch_labels, num_classes=num_classes)
            yield np.array(batch_videos), np.array(batch_labels)

In [85]:
train_dir = 'artifacts/data_ingestion'
val_dir = 'artifacts/data_ingestion'

# Define batch size and number of classes
batch_size = 4
num_classes = 51  # Number of sign classes

# Create generators for training and validation data
train_generator = video_data_generator(train_dir, batch_size)
test_generator = video_data_generator(val_dir, batch_size)

In [86]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Define the model architecture
def create_sign_language_model(num_classes):
    # CNN backbone
    cnn_model = tf.keras.applications.ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    
    # Temporal processing for each frame
    video_input = layers.Input(shape=(None, 224, 224, 3))  # Input shape: (batch_size, num_frames, height, width, channels)
    frame_features = layers.TimeDistributed(cnn_model)(video_input)
    flattened_features = layers.TimeDistributed(layers.Flatten())(frame_features)
    
    # LSTM layer for temporal processing
    lstm_output = layers.LSTM(128)(flattened_features)
    
    # Classification head
    output = layers.Dense(num_classes, activation='softmax')(lstm_output)
    
    model = models.Model(inputs=video_input, outputs=output)
    return model

# Instantiate the model
num_classes = 51  # Number of sign classes
sign_language_model = create_sign_language_model(num_classes)

# Compile the model
sign_language_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = sign_language_model.fit(train_generator, epochs=10)

# Evaluate the model
test_loss, test_acc = sign_language_model.evaluate(test_generator)
print("Test Accuracy:", test_acc)


Error: Couldn't open video file: artifacts/data_ingestion/white/white_63211.mp4
Epoch 1/10


[mov,mp4,m4a,3gp,3g2,mj2 @ 0x3edfd92f0] moov atom not found
OpenCV: Couldn't read video stream from file "artifacts/data_ingestion/white/white_63211.mp4"
[mov,mp4,m4a,3gp,3g2,mj2 @ 0x39838ab50] moov atom not found
OpenCV: Couldn't read video stream from file "artifacts/data_ingestion/yes/yes_64295.mp4"
[mov,mp4,m4a,3gp,3g2,mj2 @ 0x3465f0ac0] moov atom not found
OpenCV: Couldn't read video stream from file "artifacts/data_ingestion/hearing/hearing_26978.mp4"
[mov,mp4,m4a,3gp,3g2,mj2 @ 0x346b5c580] moov atom not found
OpenCV: Couldn't read video stream from file "artifacts/data_ingestion/fine/fine_65717.mp4"


Error: Couldn't open video file: artifacts/data_ingestion/yes/yes_64295.mp4
Error: Couldn't open video file: artifacts/data_ingestion/hearing/hearing_26978.mp4
Error: Couldn't open video file: artifacts/data_ingestion/fine/fine_65717.mp4


[mov,mp4,m4a,3gp,3g2,mj2 @ 0x345d05a60] moov atom not found
OpenCV: Couldn't read video stream from file "artifacts/data_ingestion/like/like_33281.mp4"


Error: Couldn't open video file: artifacts/data_ingestion/like/like_33281.mp4


InvalidArgumentError: Graph execution error:

Detected at node categorical_crossentropy/softmax_cross_entropy_with_logits defined at (most recent call last):
  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/runpy.py", line 197, in _run_module_as_main

  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/runpy.py", line 87, in _run_code

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/ipykernel_launcher.py", line 18, in <module>

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/ipykernel/kernelapp.py", line 739, in start

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/tornado/platform/asyncio.py", line 205, in start

  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/asyncio/base_events.py", line 596, in run_forever

  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/asyncio/base_events.py", line 1890, in _run_once

  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/asyncio/events.py", line 80, in _run

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 545, in dispatch_queue

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 534, in process_one

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 362, in execute_request

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 778, in execute_request

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 449, in do_execute

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3048, in run_cell

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3103, in _run_cell

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3308, in run_cell_async

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3490, in run_ast_nodes

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3550, in run_code

  File "/var/folders/cn/59_k8cws6g95xx26rdq842z80000gn/T/ipykernel_39002/1566937283.py", line 31, in <module>

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/keras/src/engine/training.py", line 1807, in fit

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/keras/src/engine/training.py", line 1401, in train_function

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/keras/src/engine/training.py", line 1384, in step_function

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/keras/src/engine/training.py", line 1373, in run_step

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/keras/src/engine/training.py", line 1151, in train_step

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/keras/src/engine/training.py", line 1209, in compute_loss

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/keras/src/engine/compile_utils.py", line 277, in __call__

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/keras/src/losses.py", line 143, in __call__

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/keras/src/losses.py", line 270, in call

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/keras/src/losses.py", line 2221, in categorical_crossentropy

  File "/Users/niralpatel/Desktop/Projects/speaking_silence/env/lib/python3.9/site-packages/keras/src/backend.py", line 5579, in categorical_crossentropy

logits and labels must be broadcastable: logits_size=[3,51] labels_size=[4,51]
	 [[{{node categorical_crossentropy/softmax_cross_entropy_with_logits}}]] [Op:__inference_train_function_174834]