In [1]:
# Import the required libraries.
import os
import cv2
import random
import pandas as pd
import numpy as np
import datetime as dt
import tensorflow as tf 
import matplotlib.pyplot as plt

#from moviepy.editor import *
%matplotlib inline

from sklearn.model_selection import train_test_split

from tensorflow.keras.layers import *
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import BatchNormalization, Conv2D, Layer
from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, LSTM, TimeDistributed
from tensorflow.keras.models import load_model



In [2]:
label_map = {"cousin":0,"go":1}

In [3]:
import cv2
import os

def frames_extraction(video_path, IMAGE_HEIGHT=300, IMAGE_WIDTH=300):
    '''
    This function will extract all frames from a directory after resizing and normalizing them.
    Args:
        video_path: The path to the directory where the frames are stored.
        IMAGE_HEIGHT: The height to which frames will be resized.
        IMAGE_WIDTH: The width to which frames will be resized.
    Returns:
        frames_list: A list containing the resized and normalized frames.
    '''
    # video path->  D:\\Datasets\\minor2_dataset2\\3_word_aug_dataset//cousin//13630.mp4//_0
    
    # Declare a list to store video frames.
    frames_list = []
    
    # Get a list of all frame files in the directory.
    frame_files = os.listdir(video_path)
    # video path->  D:\\Datasets\\minor2_dataset2\\3_word_aug_dataset//cousin//13630.mp4//_0//frame_0.jpg

    # Iterate through the frame files.
    for frame_file in frame_files:
        # Path of the frame
        frame_path = os.path.join(video_path, frame_file)
        
        # Read the frame from the file.
        frame = cv2.imread(frame_path)
        cv2.imshow('Frame',frame)
        
        # Check if the frame is not None
        if frame is not None:
            gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            resized_frame = cv2.resize(gray_frame, (IMAGE_HEIGHT, IMAGE_WIDTH))
            normalized_frame = resized_frame / 255
            frames_list.append(normalized_frame)
        else:
            print(f"Failed to read frame from {frame_path}. Skipping.")
            break
    
        if cv2.waitKey(10) & 0xFF == ord('q'):      # apply this logic for interrupt:
            break
    
    cv2.destroyAllWindows()
    # Return the frames list.
    return frames_list

# Now, frames_list contains all frames from the video, resized and normalized

In [4]:
features = []
labels = []
video_files_paths = []
count = 0
dataset_path = "D:\\Datasets\\minor2_dataset2\\2_word_aug_dataset"
data_dir = ["cousin", "go"]

In [5]:
for word_folder in data_dir:
    word_folder_path = os.path.join(dataset_path, word_folder)
    # D:\\Datasets\\minor2_dataset2\\3_word_aug_dataset//cousin
    video_dir = os.listdir(word_folder_path)
    for video in video_dir:
        video_file_path = os.path.join(word_folder_path, video)
        # D:\\Datasets\\minor2_dataset2\\3_word_aug_dataset//cousin//13630.mp4
        aug_vid_dir = os.listdir(video_file_path)
        for aug_vid in aug_vid_dir:
            aug_vid_path = os.path.join(video_file_path, aug_vid)
            # D:\\Datasets\\minor2_dataset2\\3_word_aug_dataset//cousin//13630.mp4//_0
            frames = frames_extraction(aug_vid_path)
            features.append(frames)
            labels.append(count)
            video_files_paths.append(video_file_path)
    count += 1

In [6]:
# Converting the list to numpy arrays
features

[[array([[0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         ...,
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.]]),
  array([[0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         ...,
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.]]),
  array([[0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         ...,
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.]]),
  array([[0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         ...,
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 

In [7]:
# Convert each element to a NumPy array if not already
features_arrays = [np.array(feature) for feature in features]

# Determine the maximum shape
max_shape = np.max([arr.shape for arr in features_arrays], axis=0)

# Pad each array
padded_features = []
for arr in features_arrays:
    padding = [(0, max_dim - arr.shape[axis]) for axis, max_dim in enumerate(max_shape)]
    padded_arr = np.pad(arr, pad_width=padding, mode='constant', constant_values=0)
    padded_features.append(padded_arr)

# Convert to NumPy array
features_array = np.array(padded_features)


In [8]:
features_array.shape

(145, 60, 300, 300)

In [9]:
labels

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1]

In [10]:
# Using Keras's to_categorical method to convert labels into one-hot-encoded (ohe) vectors
ohe_labels = to_categorical(labels)
ohe_labels

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.

In [11]:
# Split the Data into Train ( 75% ) and Test Set ( 25% ).
seed_constant = 42
X_train, X_test, y_train, y_test = train_test_split(features_array, ohe_labels,
                                                                            test_size = 0.25, shuffle = True,
                                                                            random_state = seed_constant)

In [12]:
X_train.shape

(108, 60, 300, 300)

In [13]:
y_train.shape

(108, 2)

In [14]:
X_test.shape

(37, 60, 300, 300)

In [15]:
y_test.shape

(37, 2)

# 300x300

In [98]:
from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, LSTM, Dropout, BatchNormalization

# Input layer for the CNN
input_layer = Input(shape=(60, 300, 300))

# First set of CNN layers
x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_layer)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

# Second set of CNN layers
x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

# Third set of CNN layers
x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

# Flatten the output
x = Flatten()(x)

# Output layer for the CNN
cnn_output = Dense(128, activation='relu')(x)

# Create the CNN model
#cnn_model = Model(inputs=input_layer, outputs=cnn_output)


In [99]:
from keras.layers import Reshape

# Reshape the CNN output to be compatible with LSTM
reshaped_output = Reshape((1, 128))(cnn_output)

# LSTM layer
lstm_output = LSTM(128)(reshaped_output)

# Output layer for the LSTM
output_layer = Dense(2, activation='softmax')(lstm_output)

# Create the full model
model = Model(inputs=input_layer, outputs=output_layer)


In [113]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs= 30, batch_size=32)


Epoch 1/30
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 6s/step - accuracy: 1.0000 - loss: 4.7757e-05 - val_accuracy: 0.9459 - val_loss: 0.2035
Epoch 2/30
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2s/step - accuracy: 1.0000 - loss: 2.3779e-05 - val_accuracy: 0.9459 - val_loss: 0.5771
Epoch 3/30
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1s/step - accuracy: 1.0000 - loss: 2.1970e-05 - val_accuracy: 0.9459 - val_loss: 0.3418
Epoch 4/30
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1s/step - accuracy: 1.0000 - loss: 1.2094e-05 - val_accuracy: 0.9459 - val_loss: 0.1977
Epoch 5/30
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1s/step - accuracy: 1.0000 - loss: 6.0368e-06 - val_accuracy: 0.8919 - val_loss: 0.4322
Epoch 6/30
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1s/step - accuracy: 1.0000 - loss: 2.6743e-05 - val_accuracy: 0.9189 - val_loss: 0.6502
Epoch 7/30
[1m4/4[0m [32m━━━━━

<keras.src.callbacks.history.History at 0x1699fce9250>

In [114]:
# Assuming 'model' is your compiled Keras model and 'X_test' and 'y_test' are your test data
score = model.evaluate(X_test, y_test, verbose=0)

# Print the loss
print('Test loss:', score[0])

# If you have specified metrics during compilation, print them as well
# For example, if you have specified accuracy as a metric
print('Test accuracy:', score[1])

Test loss: 0.636292576789856
Test accuracy: 0.9729729890823364


In [115]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [116]:
ypred = model.predict(X_test)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 949ms/step


In [117]:
ytrue = np.argmax(y_test, axis=1).tolist()
ypred = np.argmax(ypred, axis=1).tolist()

In [118]:
multilabel_confusion_matrix(ytrue, ypred)

array([[[19,  0],
        [ 1, 17]],

       [[17,  1],
        [ 0, 19]]], dtype=int64)

In [119]:
accuracy_score(ytrue, ypred)

0.972972972972973

In [120]:
import h5py

In [121]:
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
model.save('my_model.keras')

In [109]:
#model.save_weights('my_model.h5')

In [110]:
# import cv2
# import numpy as np
# from keras.models import load_model
# import time

# # Load the model
# model = load_model('my_model.keras')

# # Open the video capture
# cap = cv2.VideoCapture(0) # Use 0 for webcam, or replace with video file path

# # Set the frame rate
# cap.set(cv2.CAP_PROP_FPS, 13)

# # Initialize an empty list to hold the frames
# frames = []

# # Start time for capturing frames
# start_time = time.time()
# frame_count = 0

# while True:
#     # Capture frame-by-frame
#     ret, frame = cap.read()
    
#     if not ret:
#         break
    
#     #to gray scale
#     frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
#     # Resize the frame to 300x300
#     frame_resized = cv2.resize(frame_gray, (300, 300))
    
#     # Preprocess the frame if necessary (e.g., normalization)
#     # frame_preprocessed = preprocess(frame_resized)
#     frame_norm = frame_resized / 255
#     # Expand dimensions for model prediction
    
#     # Add the frame to the list
#     frames.append(frame_norm)
#     # Convert the list of frames to a 4D numpy array
#     frames_array = np.array(frames)

#     # Reshape the array to match the expected input shape (batch size, number of frames, height, width, channels)
#     # Since we're working with grayscale images, we only have 1 channel
#     frames_array = np.expand_dims(frames_array, axis=0)
    
#     # Predict using the model
#     prediction = model.predict(frames_array)
    
#     # Assuming the model outputs probabilities for each class
#     # and you have a list of class names
#     class_names = ['cousin', 'go', 'help'] # Replace with your actual class names
#     predicted_class = np.argmax(prediction)
#     predicted_word = class_names[predicted_class]
#     accuracy = np.max(prediction)
    
#     # Display the predicted word and its accuracy on the frame
#     cv2.putText(frame, f"Predicted: {predicted_word}, Accuracy: {accuracy:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    
#     # Show the resulting frame
#     cv2.imshow('Video', frame)
    
#     frame_count += 1
#     if frame_count >= 60:
#         break

#     # Break the loop on 'q' key press
#     if cv2.waitKey(1) & 0xFF == ord('q'):
#         break

# # When everything done, release the capture
# cap.release()
# cv2.destroyAllWindows()

In [111]:
import cv2
import numpy as np
from keras.models import load_model
import time

# Load the model
model = load_model('my_model.keras')

# Open the video capture
cap = cv2.VideoCapture(0) # Use 0 for webcam, or replace with video file path

# Set the frame rate for 5 seconds
cap.set(cv2.CAP_PROP_FPS, 63)

# Initialize an empty list to hold the frames
frames = []

# Start time for capturing frames
start_time = time.time()

# Mapping of model output indices to class names
class_names = ['cousin', 'go', 'help'] # Replace with your actual class names

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()
    
    if not ret:
        break
    
    # Convert the frame to grayscale
    frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    # Resize the grayscale frame to 300x300
    frame_resized = cv2.resize(frame_gray, (300, 300))
    
    #Normalization
    frame_norm = frame_resized/255
    
    # Add the frame to the list
    frames.append(frame_norm)
    
    # Check if we have captured 63 frames or if 5 seconds have passed
    if len(frames) >= 60 or time.time() - start_time >= 5:
        break

# Convert the list of frames to a 4D numpy array
frames_array = np.array(frames)

# Reshape the array to match the expected input shape (batch size, number of frames, height, width, channels)
frames_array = np.expand_dims(frames_array, axis=0)

# Predict using the model
prediction = model.predict(frames_array)

# Convert the model's output to the class name
predicted_class_index = np.argmax(prediction, axis=1)
predicted_class_name = class_names[predicted_class_index[0]]

# Display the class name on the video frame
frame_with_text = cv2.putText(frame, predicted_class_name, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)

# Display the frame with the class name
cv2.imshow('Prediction', frame_with_text)

# Wait for a key press and close the window
cv2.waitKey(0)
cv2.destroyAllWindows()

ValueError: Input 0 of layer "functional_27" is incompatible with the layer: expected shape=(None, 60, 300, 300), found shape=(1, 0)