# 1. Import and Install Dependencies

In [1]:
%pip install tensorflow opencv-python numpy mediapipe scikit-learn matplotlib

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'c:\Users\bolte\SignLanguage\.venv\Scripts\python.exe -m pip install --upgrade pip' command.


In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

# 2. Keypoints using MP Holistic

In [2]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities
mp_selfie_segmentation = mp.solutions.selfie_segmentation # Segmentation masking

In [3]:
# potential TODOs: 
# -> add hand specific segmentation for better detections
# -> apply joint bilateral filter to results.segmentation_mask w/ image

def mediapipe_segmentation(image):
    bg_image = None                                             # Can set color or image as bg if desired
    image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                               # Image is no longer writeable
    results = selfie_segmentation.process(image)                # Apply segmentation mask
    image.flags.writeable = True                                # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)              # COLOR COVERSION RGB 2 BGR
    
    # referenced nicolai nielsen segmentation tutorial #
    # Draw segmentation on background of video
    condition = np.stack((results.segmentation_mask,) * 3, axis=-1) > 0.1 #was 0.15
    
    # Filter background
    # Can apply an image or flat color instead of blur, but would need implimentation atm
    if bg_image is None:
        bg_image = cv2.GaussianBlur(image, (55,55),0)

    output_image = np.where(condition, image, bg_image)
    return output_image

In [4]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [5]:
def draw_landmarks(image, results):
    #mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS) # Draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections

In [6]:
def draw_styled_landmarks(image, results):
    # Draw face connections
    #mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
    #                          mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
    #                          mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
    #                          ) 
     # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                              mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                              ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [7]:
cap = cv2.VideoCapture(0)
# Set mediapipe model 
# \ for newline wrap on with statement
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5, model_complexity=2) as holistic, \
    mp_selfie_segmentation.SelfieSegmentation(model_selection=0) as selfie_segmentation:

    '''
    Test and see on gestures that have more movement if we should add the parameter model_complexity =2
    0 it will be faster, but less accurate and if it is 2 it will be more accurate, but also slower.

    '''
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Segment video background
        frame = mediapipe_segmentation(frame)

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)

        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

Downloading model to c:\Users\LUKE\SignLanguage\.venv\lib\site-packages\mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediap

In [12]:
draw_landmarks(frame, results)

AttributeError: module 'mediapipe.python.solutions.holistic' has no attribute 'FACE_CONNECTIONS'

In [None]:
plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

# 3. Extract Keypoint Values

In [8]:
# collect landmark info for error handling
pose = []
for res in results.pose_landmarks.landmark:
    test = np.array([res.x, res.y, res.z, res.visibility])
    pose.append(test)

In [9]:
# for checking individual landmark arrays

pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(132)
face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(1404)
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)

In [10]:
# get all keypoints and return
def extract_keypoints(results):
   # flatten detections for consistent output to lstm
   # fill pose arrays with zeros if/when not detected
   pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
   face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
   lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
   rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
   return np.concatenate([pose, face, lh, rh])

In [11]:
result_test = extract_keypoints(results)

In [12]:
result_test

array([ 0.45565993,  0.37364346, -1.48601162, ...,  0.        ,
        0.        ,  0.        ])

# 4. Setup Folders for Collection

In [11]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('Test Data') 

# Actions that we try to detect
actions = np.array(['A', 'B', 'C', 'hello', 'ilu', 'none', 'thanks'])

# old models
# Actions that we try to detect
#alphabets = np.array(['A', 'B', 'C'])
# Actions that we try to detect
#actions = np.array(['hello', 'more', 'iloveyou', 'neutral'])  # Add 'neutral' to the list

num_sequences = 30  # Videos
sequence_length = 30  # Frames as .npy files

# Folder start
start_folder = 30

In [12]:
for action in actions:
    for sequence in range(num_sequences):
        try:
            # make directories for training data
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            # if data folders exist, skip
            pass

In [16]:
# Actions that we try to detect
actions = np.array(['hello', 'more', 'iloveyou', 'neutral'])  # Include 'neutral'

# Update folders_and_items to include the new action
folders_and_items = [('action', actions)]

def create_folders(folder_name, items):
    folder_path = os.path.join(DATA_PATH, folder_name)
    # Ensure the base folder path exists
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
        print(f"Created directory: {folder_path}")

    for item in items:
        item_folder_path = os.path.join(folder_path, item)
        # Check and create item-specific folder if it doesn't exist
        if not os.path.exists(item_folder_path):
            os.makedirs(item_folder_path)
            print(f"Created directory: {item_folder_path}")

        # Check and create sequence folders within the item folder
        for sequence in range(1, no_sequences + 1):
            sequence_path = os.path.join(item_folder_path, str(sequence))
            if not os.path.exists(sequence_path):
                os.makedirs(sequence_path)
                print(f"Created directory: {sequence_path}")

# Create folders for actions including the new 'neutral' category
for folder_name, items in folders_and_items:
    create_folders(folder_name, items)


# 5. Collect Keypoint Values for Training and Testing

In [18]:
# !DON'T RUN THIS! #
# unless you want to re-take landmark training data, then go for it


# collect training data with face/pose/hand landmark detections
cap = cv2.VideoCapture(0)

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    #loop thru actions
    for action in actions:
        #loop thru videos
        for sequence in range(num_sequences):
            #loop thru video length
            for frame_num in range(sequence_length):

                # Read feed
                ret, frame = cap.read()

                # Make detections
                image, results = mediapipe_detection(frame, holistic)
                print(results)

                # Draw landmarks
                draw_styled_landmarks(image, results)

                # Apply wait logic
                if frame_num == 0: 
                    cv2.putText(image, 'STARTING COLLECTION', (120, 200), 
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 4, cv2.LINE_AA)
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15, 12), 
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    cv2.waitKey(2000)
                else: 
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15, 12), 
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)

                # Extract keypoints
                keypoints = extract_keypoints(results)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                np.save(npy_path, keypoints)

                # Show to screen
                cv2.imshow('OpenCV Feed',image)

                # Break gracefully
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break

    cap.release()
    cv2.destroyAllWindows

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti

: 

In [10]:
# force exit camera if needed
cap.release()
cv2.destroyAllWindows()

In [49]:
# alternate training input code
'''
import cv2


# Set up video capture
cap = cv2.VideoCapture(0)
export_complete = False  # Flag to track completion status

# Set mediapipe model 
with mp.solutions.holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5, model_complexity=2) as holistic:

    paused = False  # Initialize the pause state
    while True:  # Infinite loop for continuous processing    
        if not paused and not export_complete: 
            # Loop through the list of tuples
            for folder_name, items in folders_and_items:
                # Loop through items (actions or alphabets)
                for item in items:
                    # Show message for the next action or alphabet
                    cv2.putText(image, f'Next {folder_name}: {item}', (120, 200), 
                                cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 4, cv2.LINE_AA)
                    cv2.imshow('OpenCV Feed', image)
                    cv2.waitKey(3000)  # Wait for 3 seconds

                    # Loop through sequences aka videos
                    for sequence in range(1, no_sequences + 1):
                        # Loop through video length aka sequence length
                        for frame_num in range(sequence_length):
                            # Read feed
                            ret, frame = cap.read()

                            # Make detections
                            image, results = mediapipe_detection(frame, holistic)

                            # Draw landmarks
                            draw_styled_landmarks(image, results)

                            # Apply wait logic
                            if frame_num == 0: 
                                cv2.putText(image, 'STARTING COLLECTION', (120, 200), 
                                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 4, cv2.LINE_AA)
                                cv2.putText(image, f'Collecting frames for {item} Video Number {sequence}', (15, 12), 
                                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                                # Show to screen
                                cv2.imshow('OpenCV Feed', image)
                                cv2.waitKey(4000)
                            else: 
                                cv2.putText(image, f'Collecting frames for {item} Video Number {sequence}', (15, 12), 
                                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                                # Show to screen
                                cv2.imshow('OpenCV Feed', image)

                            # Export keypoints
                            if sequence in range(1, 41):  # Ensure saving keypoints only for frames within the sequence length
                                keypoints = extract_keypoints(results)
                                npy_path = os.path.join(DATA_PATH, folder_name, item, str(sequence), str(frame_num + 1)) + ".npy"
                                np.save(npy_path, keypoints)

                            # Check if keypoints export is complete
                            if sequence == no_sequences and frame_num == sequence_length - 1:
                                export_complete = True

                            # Break gracefully
                            if cv2.waitKey(10) & 0xFF == ord('q'):
                                raise KeyboardInterrupt

                            # Check for pause key (spacebar)
                            key = cv2.waitKey(1)
                            if key == ord(' '):  # Spacebar pressed
                                paused = not paused  # Toggle the paused state

                                # If paused, wait for a key press to resume
                                if paused:
                                    cv2.waitKey(-1)  # Wait indefinitely for a key press

        # If export is complete, break the loop
        if export_complete:
            break

cap.release()
cv2.destroyAllWindows()
'''

'\nimport cv2\n\n\n# Set up video capture\ncap = cv2.VideoCapture(0)\nexport_complete = False  # Flag to track completion status\n\n# Set mediapipe model \nwith mp.solutions.holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5, model_complexity=2) as holistic:\n\n    paused = False  # Initialize the pause state\n    while True:  # Infinite loop for continuous processing    \n        if not paused and not export_complete: \n            # Loop through the list of tuples\n            for folder_name, items in folders_and_items:\n                # Loop through items (actions or alphabets)\n                for item in items:\n                    # Show message for the next action or alphabet\n                    cv2.putText(image, f\'Next {folder_name}: {item}\', (120, 200), \n                                cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 4, cv2.LINE_AA)\n                    cv2.imshow(\'OpenCV Feed\', image)\n                    cv2.waitKey(3000)  # Wait f

# Update Model with more gestures

1. This only adds to already created models

In [None]:
import cv2
import mediapipe as mp

# Set up video capture
cap = cv2.VideoCapture(0)
export_complete = False  # Flag to track completion status

# Initialize MediaPipe holistic model
with mp.solutions.holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5, model_complexity=2) as holistic:
    paused = False
    while not export_complete:
        for folder_name, items in folders_and_items:
            for item in items:
                if item == 'neutral':  # Check if the current item is 'neutral'
                    for sequence in range(1, no_sequences + 1):
                        for frame_num in range(sequence_length):
                            if not paused:
                                ret, frame = cap.read()
                                if not ret:
                                    continue

                                # Processing frame
                                image, results = mediapipe_detection(frame, holistic)
                                draw_styled_landmarks(image, results)

                                # Display status for collecting frames
                                cv2.putText(image, f'Collecting frames for {item}, Sequence {sequence}', 
                                            (15, 12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                                cv2.imshow('OpenCV Feed', image)

                                # Save keypoints
                                keypoints = extract_keypoints(results)
                                npy_path = os.path.join(DATA_PATH, folder_name, item, str(sequence), f"{frame_num + 1}.npy")
                                np.save(npy_path, keypoints)

                                # Check for exit or pause conditions
                                if cv2.waitKey(10) & 0xFF == ord('q'):
                                    export_complete = True
                                    break
                                if cv2.waitKey(1) == ord(' '):  # Pause on spacebar
                                    cv2.waitKey(-1)  # Wait indefinitely until another key is pressed

                            if export_complete:
                                break
                        if export_complete:
                            break
                if export_complete:
                    break
            if export_complete:
                break

cap.release()
cv2.destroyAllWindows()

# 6. Preprocess Data and Create Labels and Features

In [13]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import os
import numpy as np

In [14]:
# set up labels and process data for predictions, 7 action version

label_map = {label:num for num, label in enumerate(actions)}

sequences, labels = [], []
for action in actions:
    for sequence in range (num_sequences):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

X = np.array(sequences)
y = to_categorical(labels).astype(int)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=42)

In [None]:
alphabet_map = {label:num for num, label in enumerate(alphabets)}

In [None]:
action_map = {label:num for num, label in enumerate(actions)}

In [None]:
import os
import numpy as np

alphabet_folder_path = os.path.join(DATA_PATH, 'alphabet') 


# Initialize the sequences and labels lists
act_sequences, act_list_labels = [], []

# Loop through the alphabets
for alphabet in alphabets:
    # Get the sequence directories for the current alphabet
    sequence_dirs = os.listdir(os.path.join(alphabet_folder_path, alphabet))
    
    # Loop through the sequence directories
    for sequence_dir in sequence_dirs:
        # Initialize the window list for the current sequence
        window = []
        
        # Loop through the .npy files in the current sequence directory
        for frame_num in range(sequence_length):
            # Load the .npy file
            res = np.load(os.path.join(DATA_PATH, 'alphabet', alphabet, sequence_dir, "{}.npy".format(frame_num + 1)))
            
            # Append the loaded .npy file to the window list
            window.append(res)
        
        # Append the window list to the sequences list
        act_sequences.append(window)
        
        # Append the label for the current alphabet to the labels list
        act_list_labels.append(alphabet_map[alphabet]) # Use alphabet as the label


        "Number of sequences and labels should be the number of items inside the gesture folder"

print("Number of sequences:", len(act_sequences))
print("Number of labels:", len(act_list_labels))


In [None]:
# Check if each sequence has the same amount of sequence and keypoints
for i, seq in enumerate(act_sequences):
    print(f"Sequence {i} shape: {np.array(seq).shape}")
    if np.array(seq).shape != (sequence_length, 258):  # keypoints
        print(f"Error in sequence {i}")

In [None]:
for i, seq in enumerate(act_sequences):
    seq_array = np.array(seq)
    print(f"Sequence {i} shape: {seq_array.shape}, dtype: {seq_array.dtype}")
    if seq_array.shape != (sequence_length, 258):  # Assuming each frame represented by 258 keypoints
        print(f"Error in sequence {i} with shape {seq_array.shape}")


In [None]:
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical

# Convert labels to categorical
y = to_categorical(act_list_labels).astype(int)

# Ensure shapes are consistent
print("Shapes of sequences and labels:")
print("Sequences shape:", np.array(act_sequences).shape)
print("Labels shape:", np.array(act_list_labels).shape)

# Split data into training and testing sets
X = np.array(act_sequences)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=42)

# Verify shapes after splitting
print("Shapes after splitting:")
print("X_train shape:", X_train.shape)  # Labels of train shape
print("X_test shape:", X_test.shape)   # Labels of 5 random test 
print("y_train shape:", y_train.shape)      #(train shape, classes)
print("y_test shape:", y_test.shape)       

# Inspect testing labels
print("Testing labels:")
print(y_test)

In [None]:
# Iterate through the testing set and print labels and sequences
for i in range(len(y_test)):
    label = y_test[i]
    sequence = X_test[i]
    print(f"Label: {label}, Sequence Frame: {sequence}")


In [None]:
import os
import pickle

# Model names with gestures

#alphabets = np.array(['A', 'B', 'C'])
actions = np.array(['hello', 'more', 'iloveyou', 'neutral'])  # Add 'neutral' to the list


# Define the folder path
folder_path = 'Model Labels'

# Check if the folder exists, if not, create it
if not os.path.exists(folder_path):
    os.makedirs(folder_path)

# Save the variables to a file using pickle
data = {
    'actions': actions,
    #'alphabets': alphabets,
}

file_path = os.path.join(folder_path, 'model_data.pkl')

with open(file_path, 'wb') as f:
    pickle.dump(data, f)


# 7. Build and Train LSTM Neural Network

In [15]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [16]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

# Graph

Epoch at 20000 is a little high for low data. STOP training early if accuracy is acceptable and loss has stopped cosistently decreashing

tb_callback: should all fit in our memory for now

while the model is running and being trained. You can look at the tensorboard call back
* Check the Logs folder/events.out
* go to cmd
* cd into the log/train
* type: tensorboard --logdir=.
* it will give you a localhost url that will give you graph of the model:  http://localhost:6006/



In [17]:
# set up LSTM with actions data
model = Sequential()
model.add(LSTM(128, return_sequences=True, activation='relu', input_shape=(30,1662)))
model.add(LSTM(256, return_sequences=True, activation='relu'))
model.add(LSTM(128, return_sequences=False, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

  super().__init__(**kwargs)


In [9]:
model.compile(optimizer='Adadelta', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [10]:
# train the model
model.fit(X_train, y_train, epochs=1000, callbacks=[tb_callback])

Epoch 1/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 102ms/step - categorical_accuracy: 0.1177 - loss: 1.9487
Epoch 2/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 110ms/step - categorical_accuracy: 0.1675 - loss: 1.9442
Epoch 3/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 93ms/step - categorical_accuracy: 0.1574 - loss: 1.9473
Epoch 4/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 87ms/step - categorical_accuracy: 0.1574 - loss: 1.9433
Epoch 5/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 84ms/step - categorical_accuracy: 0.1281 - loss: 1.9468
Epoch 6/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 86ms/step - categorical_accuracy: 0.1651 - loss: 1.9421
Epoch 7/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 87ms/step - categorical_accuracy: 0.1263 - loss: 1.9496
Epoch 8/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 87ms/s

KeyboardInterrupt: 

In [11]:
model.summary()

Old Alphabet/Action models

In [None]:
# for alphabets: input_shape=(30,126)))
action_model = Sequential()
action_model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,258)))
action_model.add(LSTM(128, return_sequences=True, activation='relu'))
action_model.add(LSTM(64, return_sequences=False, activation='relu'))
action_model.add(Dense(64, activation='relu'))
action_model.add(Dense(32, activation='relu'))
action_model.add(Dense(actions.shape[0], activation='softmax'))  # actions.shape number of actions

In [None]:
action_model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [None]:
action_model.fit(X_train, y_train, epochs=500, callbacks=[tb_callback])

In [None]:
action_model.summary()

# 8. Make Predictions

In [18]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

# Get predictions for the test set
res = model.predict(X_test)

# Print the predicted action for the 5th sample in the test set
print("Predicted action:", actions[np.argmax(res[6])])

# Print the actual action for the 5th sample in the test set
print("Actual action:", actions[np.argmax(y_test[6])])

# Convert predictions and true labels to indices
yhat = np.argmax(res, axis=1)
ytrue = np.argmax(y_test, axis=1)

# Calculate the multilabel confusion matrix
confusion_matrix = multilabel_confusion_matrix(ytrue, yhat)
print("Multilabel Confusion Matrix:")
print(confusion_matrix)

# Calculate the accuracy score
accuracy = accuracy_score(ytrue, yhat)
print("Accuracy Score:", accuracy)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 635ms/step
Predicted action: B
Actual action: C
Multilabel Confusion Matrix:
[[[19  0]
  [ 2  0]]

 [[ 0 16]
  [ 0  5]]

 [[19  0]
  [ 2  0]]

 [[19  0]
  [ 2  0]]

 [[19  0]
  [ 2  0]]

 [[17  0]
  [ 4  0]]

 [[17  0]
  [ 4  0]]]
Accuracy Score: 0.23809523809523808


# 9. Save Weights & Export Models

In [20]:
# Save the model

models_folder = 'models'

model.save(os.path.join(models_folder,'action.h5'))  # change this per model



In [None]:
# export tflite from keras model

import tensorflow as tf

# Load the alphabet model if not already loaded
alphabet_model_filename = 'alphabet.h5'
alphabet_model_filepath = os.path.join(models_folder, alphabet_model_filename)
try:
    alphabet_model = load_model(alphabet_model_filepath)
    print(f"Alphabet model {alphabet_model_filename} loaded successfully.")
except Exception as e:
    print(f"Error loading alphabet model {alphabet_model_filename}: {str(e)}")

# Convert the model
converter = tf.lite.TFLiteConverter.from_keras_model(alphabet_model)

# shoutout user Jae sung Chung: https://stackoverflow.com/questions/67251401/tflite-converter-error-operation-not-supported
converter.target_spec.supported_ops = [
  tf.lite.OpsSet.TFLITE_BUILTINS, # enable TensorFlow Lite ops.
  tf.lite.OpsSet.SELECT_TF_OPS # enable TensorFlow ops.
]
tflite_model = converter.convert()

# Save the model
with open('Tensorflow_Lite/alphabet.tflite', 'wb') as f:
    f.write(tflite_model)

In [19]:
# fix unsupported datatype in android studio
# this is a WIP because I still don't understand the nature of the error entirely
# https://github.com/tensorflow/tensorflow/issues/53279
# via usermohantym ^^^^^^^

# https://www.tensorflow.org/lite/guide/python

# tflite in android studio returning unsupported data type 14

import tflite_runtime.interpreter as tflite
interpreter = tflite.Interpreter(model_path="Tensorflow_Lite/alphabet.tflite")
interpreter.allocate_tensors()
interpreter.get_input.details()[0]

ModuleNotFoundError: No module named 'tflite_runtime'

# 10. Load Models and Labels to Run in Real Time if they are not stored in the Kernal

To run the models in Real Time

1. Run all the cell blocks in section 1-3 
  - This will load all the libraries and methods to extract landmarks


2. Load the labels and the models in this section 10. 

3 Now you should be able to test the cell blocks in 
  -Test One Model 
  -Test Two Models 

In [20]:
# Labels for each models

actions = (['A', 'B', 'C', 'hello', 'ilu', 'none', 'thanks'])

# old model labels
#alphabets = (['A', 'B', 'C'])
#actions = np.array(['hello', 'more', 'iloveyou', 'neutral'])

In [22]:
"Import the previous models after restart or when we train a new model to load the previous model"

from keras.models import load_model
import os

def load_model_safely(model_path):
    try:
        return load_model(model_path), None
    except Exception as e:
        return None, str(e)

models_folder = 'models'

action_model, err = load_model_safely(os.path.join(models_folder, 'action.h5'))
if action_model:
    print("Action model loaded successfully.")
else:
    print(f"Error loading action model: {err}")

# uncomment if using alphabet model
#alphabet_model, err = load_model_safely(os.path.join(models_folder, 'alphabet.h5'))
#if alphabet_model:
#    print("Alphabet model loaded successfully.")
#else:
#    print(f"Error loading alphabet model: {err}")





Action model loaded successfully.


# 11. Test in Real Time

In [32]:
import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model

# Load the model from the actions.h5 file
model = load_model('C:\\Users\\LUKE\\SignLanguage\\Model Trainer\\models\\action.h5')

# Define the prob_viz function
def prob_viz(prediction_labels, input_frame):
    output_frame = input_frame.copy()
    y_offset = 60
    x_offset = 5  # Set x-coordinate for the text
    for i, label in enumerate(prediction_labels):
        # Draw text with stroke
        stroke_color = (0, 0, 0)  # Black for the text stroke effect
        text_color = (255, 255, 255)  # White color for the text
        thickness = 2
        stroke_thickness = 4

        cv2.putText(output_frame, label, (x_offset + i * 120, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 1.5, stroke_color, stroke_thickness, cv2.LINE_AA)
        cv2.putText(output_frame, label, (x_offset + i * 120, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 1.5, text_color, thickness, cv2.LINE_AA)
    return output_frame


sequence = []
#sentence = []
predictions = []
threshold = 0.4

cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp.solutions.holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5, model_complexity=2) as holistic, \
    mp_selfie_segmentation.SelfieSegmentation(model_selection=0) as selfie_segmentation:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Segment video background
        frame = mediapipe_segmentation(frame)

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            predicted_label = actions[np.argmax(res)]
            predictions.append(predicted_label)
            
            #if np.unique(predictions[-10:])[0] == predicted_label and res[np.argmax(res)] > threshold: 
            #    if len(sentence) > 0: 
            #        if predicted_label != sentence[-1]:
            #            sentence.append(predicted_label)
            #    else:
            #        sentence.append(predicted_label)

            #if len(sentence) > 5: 
            #    sentence = sentence[-5:]


            # Display the label text only
            image = prob_viz(predicted_label, image)

            # Show the image with the label text
            cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

# Release the video capture and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()




<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti

Old Model load/usage below

In [None]:
                                            # Testing One Model Only
# Define the prob_viz function
def prob_viz(prediction_labels, input_frame):
    output_frame = input_frame.copy()
    y_offset = 60
    for i, label in enumerate(prediction_labels):
        cv2.putText(output_frame, label, (20, y_offset + i * 40), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 2, cv2.LINE_AA)
    return output_frame
sequence = []
sentence = []
predictions = []
threshold = 1

cap = cv2.VideoCapture(0)
# Set mediapipe model 
#with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5, model_complexity=2) as holistic, \
    mp_selfie_segmentation.SelfieSegmentation(model_selection=0) as selfie_segmentation:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

              # Segment video background
        frame = mediapipe_segmentation(frame)

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            res = alphabet_model.predict(np.expand_dims(sequence, axis=0))[0]
            predicted_label = alphabets[np.argmax(res)]
            predictions.append(predicted_label)
            
            if np.unique(predictions[-5:])[0] == predicted_label and res[np.argmax(res)] > threshold: 
                if len(sentence) > 0: 
                    if predicted_label != sentence[-1]:
                        sentence.append(predicted_label)
                else:
                    sentence.append(predicted_label)

            if len(sentence) > 5: 
                sentence = sentence[-5:]


            # Display the label text only
            
            image = prob_viz(predicted_label, image)


            # Show the image with the label text
            cv2.imshow('OpenCV Feed', image)


        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

# Release the video capture and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()


In [None]:
                                  #Test two models
# Define the prob_viz function to handle both alphabet and action labels
def prob_viz(prediction_label, input_frame):
    output_frame = input_frame.copy()
    y_offset = 60
    cv2.putText(output_frame, prediction_label, (20, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 2, cv2.LINE_AA)
    return output_frame

# Testing Both Models
sequence = []
sentence = []
threshold = 1
current_prediction = ""

cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # Initialize predictions list for each iteration
        predictions = []

        # Prediction logic for the alphabet model
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            # Prediction logic for the alphabet model
            res_alphabet = alphabet_model.predict(np.expand_dims(sequence, axis=0))[0]
            predicted_label_alphabet = alphabets[np.argmax(res_alphabet)]
            predictions.append(predicted_label_alphabet)
            
            # Logic for sentence formation
            if np.unique(predictions[-5:])[0] == predicted_label_alphabet and res_alphabet[np.argmax(res_alphabet)] > threshold: 
                if len(sentence) > 0: 
                    if predicted_label_alphabet != sentence[-1]:
                        sentence.append(predicted_label_alphabet)
                else:
                    sentence.append(predicted_label_alphabet)

            if len(sentence) > 5: 
                sentence = sentence[-5:]

            # Predict using the action model
            res_action = action_model.predict(np.expand_dims(sequence, axis=0))[0]
            predicted_label_action = actions[np.argmax(res_action)]
            predictions.append(predicted_label_action)

            # Display the label text for both alphabet and action
            current_prediction = " ".join(predictions)
            image = prob_viz(current_prediction, image)

        # Show the image with the label text
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

# Release the video capture and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()

In [24]:
cap.release()
cv2.destroyAllWindows()

# 12.  Improved Testing with "neutral" model trained

In [None]:
import cv2
import numpy as np
import mediapipe as mp

# Constants
NEUTRAL_ACTION_INDEX = 4  # Update this based on your model's 'neutral' index
CONFIDENCE_THRESHOLD = 0.97  # Confidence threshold to accept predictions

# Initialize video capture and MediaPipe model
cap = cv2.VideoCapture(0)
mp_holistic = mp.solutions.holistic

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    sequence_action = []
    sequence_alphabet = []
    label_action = ''
    label_alphabet = ''
    display_text = ''

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab frame")
            break

        image, results = mediapipe_detection(frame, holistic)
        draw_styled_landmarks(image, results)

        keypoints_action = extract_keypoints(results, "action")
        keypoints_alphabet = extract_keypoints(results, "alphabet")
        sequence_action.append(keypoints_action)
        sequence_alphabet.append(keypoints_alphabet)

        if len(sequence_action) == 30:
            pred_action = action_model.predict(np.expand_dims(sequence_action, axis=0), verbose=0)[0]
            action_confidence = np.max(pred_action)
            action_index = np.argmax(pred_action)
            sequence_action.pop(0)  # Remove the oldest frame

            if action_index != NEUTRAL_ACTION_INDEX and action_confidence > CONFIDENCE_THRESHOLD:
                label_action = actions[action_index]
                # Now check the alphabet model
                if len(sequence_alphabet) == 30:
                    pred_alphabet = alphabet_model.predict(np.expand_dims(sequence_alphabet, axis=0), verbose=0)[0]
                    alphabet_confidence = np.max(pred_alphabet)
                    if alphabet_confidence > CONFIDENCE_THRESHOLD:
                        label_alphabet = alphabets[np.argmax(pred_alphabet)]
                        display_text = f"Action: {label_action}, Alphabet: {label_alphabet}"
                    sequence_alphabet.pop(0)  # Remove the oldest frame
            else:
                label_action = 'Neutral'  # Or just use an empty string ''
                display_text = ''

        image = prob_viz([display_text], image)
        cv2.imshow('OpenCV Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()


In [None]:
cap.release()
cv2.destroyAllWindows()

# 13. Web

In [None]:
                                            # Web
import os
import cv2
import numpy as np

from matplotlib import pyplot as plt
import time
import mediapipe as mp
from keras.models import load_model

models_folder = 'models'

mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities
mp_selfie_segmentation = mp.solutions.selfie_segmentation # Segmentation masking

# potential TODOs: 
# -> add hand specific segmentation for better detections
# -> apply joint bilateral filter to results.segmentation_mask w/ image

def mediapipe_segmentation(image):
    bg_image = None                                             # Can set color or image as bg if desired
    image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                               # Image is no longer writeable
    results = selfie_segmentation.process(image)                # Apply segmentation mask
    image.flags.writeable = True                                # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)              # COLOR COVERSION RGB 2 BGR
    
    # referenced nicolai nielsen segmentation tutorial #
    # Draw segmentation on background of video
    condition = np.stack((results.segmentation_mask,) * 3, axis=-1) > 0.1 #was 0.15
    
    # Filter background
    # Can apply an image or flat color instead of blur, but would need implimentation atm
    if bg_image is None:
        bg_image = cv2.GaussianBlur(image, (55,55),0)

    output_image = np.where(condition, image, bg_image)
    return output_image

def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

# Define the prob_viz function
def prob_viz(prediction_labels, input_frame):
    output_frame = input_frame.copy()
    y_offset = 60
    for i, label in enumerate(prediction_labels):
        cv2.putText(output_frame, label, (20, y_offset + i * 40), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 2, cv2.LINE_AA)
    return output_frame

def extract_keypoints(results):
   # pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
   # face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([lh, rh])



# Load the alphabet model
alphabet_model_filename = 'alphabet.h5'
alphabet_model_filepath = os.path.join(models_folder, alphabet_model_filename)
try:
    alphabet_model = load_model(alphabet_model_filepath)
    print(f"Alphabet model {alphabet_model_filename} loaded successfully.")
except Exception as e:
    print(f"Error loading alphabet model {alphabet_model_filename}: {str(e)}")
    
alphabets = (['A', 'B', 'C'])

                                            # Testing One Model Only
                                            # Testing One Model Only
# Define the prob_viz function
def prob_viz(prediction_labels, input_frame):
    output_frame = input_frame.copy()
    y_offset = 60
    for i, label in enumerate(prediction_labels):
        cv2.putText(output_frame, label, (20, y_offset + i * 40), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 2, cv2.LINE_AA)
    return output_frame
sequence = []
sentence = []
predictions = []
threshold = 1

cap = cv2.VideoCapture(0)
# Set mediapipe model 
#with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5, model_complexity=2) as holistic, \
    mp_selfie_segmentation.SelfieSegmentation(model_selection=0) as selfie_segmentation:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

              # Segment video background
        frame = mediapipe_segmentation(frame)

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        # Draw landmarks
        #draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            res = alphabet_model.predict(np.expand_dims(sequence, axis=0))[0]
            predicted_label = alphabets[np.argmax(res)]
            predictions.append(predicted_label)
            
            if np.unique(predictions[-5:])[0] == predicted_label and res[np.argmax(res)] > threshold: 
                if len(sentence) > 0: 
                    if predicted_label != sentence[-1]:
                        sentence.append(predicted_label)
                else:
                    sentence.append(predicted_label)

            if len(sentence) > 5: 
                sentence = sentence[-5:]


            # Display the label text only
            
            image = prob_viz(predicted_label, image)


            # Show the image with the label text
            cv2.imshow('OpenCV Feed', image)


        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

# Release the video capture and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()