# 1.Libraries

In [3]:
import cv2 
import numpy as np 
import os 
import matplotlib.pyplot as plt 
import time 
import mediapipe as mp

# 2. Keypoints using MP Holistic

In [4]:
mp_holistic= mp.solutions.holistic # model
mp_drawing= mp.solutions.drawing_utils # drawing utils 

In [5]:
def mediapipe_detection(image,model):
    image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB) # Image color conversion
    image.flags.writeable = False#image now not  writeable
    results=model.process(image) #detection 
    image.flags.writeable = True #image now writeable
    image= cv2.cvtColor(image,cv2.COLOR_RGB2BGR)# Image color conversion 
    return image,results
    

In [6]:
def draw_landmarks(image,results):
    mp_drawing.draw_landmarks(image,results.face_landmarks,mp_holistic.FACEMESH_CONTOURS)
    mp_drawing.draw_landmarks(image,results.pose_landmarks,mp_holistic.POSE_CONNECTIONS)
    mp_drawing.draw_landmarks(image,results.left_hand_landmarks,mp_holistic.HAND_CONNECTIONS)
    mp_drawing.draw_landmarks(image,results.right_hand_landmarks,mp_holistic.HAND_CONNECTIONS)

In [7]:
def draw_styled_landmarks(image,results):
    mp_drawing.draw_landmarks(image,results.face_landmarks,mp_holistic.FACEMESH_CONTOURS,
                            mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1),
                            mp_drawing.DrawingSpec(color=(80,256,121),thickness=1,circle_radius=1))
    mp_drawing.draw_landmarks(image,results.pose_landmarks,mp_holistic.POSE_CONNECTIONS,
                            mp_drawing.DrawingSpec(color=(80,22,10),thickness=2,circle_radius=4),
                            mp_drawing.DrawingSpec(color=(80,44,121),thickness=2,circle_radius=2))
    mp_drawing.draw_landmarks(image,results.left_hand_landmarks,mp_holistic.HAND_CONNECTIONS,
                            mp_drawing.DrawingSpec(color=(121,22,76),thickness=2,circle_radius=4),
                            mp_drawing.DrawingSpec(color=(121,44,250),thickness=2,circle_radius=2))
    mp_drawing.draw_landmarks(image,results.right_hand_landmarks,mp_holistic.HAND_CONNECTIONS,
                            mp_drawing.DrawingSpec(color=(245,117,66),thickness=1,circle_radius=1),
                            mp_drawing.DrawingSpec(color=(245,66,23),thickness=1,circle_radius=1))


In [8]:
cap =cv2.VideoCapture(0) # webcam access
# Set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read Feed
        ret,frame=cap.read()
        #Make Detections
        image , results = mediapipe_detection(frame,holistic)
        print(results)
        #Draw Landmarks
        draw_styled_landmarks(image,results)
        #Show to the screen # "frame" is the image from my webcam
        cv2.imshow('OpenCV Feed',image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()



<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti

# 3.Extract Keypoint Values

In [9]:
def extract_keypoints(results):
    pose = np.array([[res.x,res.y,res.z,res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x,res.y,res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x,res.y,res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x,res.y,res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose,face,lh,rh])

In [10]:
extract_keypoints(results).shape

(1662,)

# 4. Setup Folders for Collection

In [11]:
DATA_PATH= os.path.join('MP_Data')

actions=np.array(['hello', 'thanks','iloveyou'])

no_sequences=30

sequence_length=30

In [20]:
for action in actions:
    for sequence in range(no_sequences):
        try: 
            os.makedirs(os.path.join(DATA_PATH,action,str(sequence)))
        except:
            pass

# Collect Keypoint Values for Training and Testing

In [21]:
cap =cv2.VideoCapture(0) # webcam access
# Set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    #Loop through actions
    for action in actions:
        #loop through sequences
        for sequence in range(no_sequences):
            #loop through video_sequences
            for frame_num in range(sequence_length):


                # Read Feed
                ret,frame=cap.read()
                #Make Detections

                image , results = mediapipe_detection(frame,holistic)
                print(results)

                #Draw Landmarks
                draw_styled_landmarks(image,results)

                # Apply Wait Logic
                if frame_num==0:
                    cv2.putText(image,'STARTING COLLECTION',(120,200),
                                cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0),4,cv2.LINE_AA)
                    cv2.putText(image,'Collecting frames for {} Video Number {}'.format(action,sequence),(15,12),
                                cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255),1,cv2.LINE_AA)

                    cv2.waitKey(2000)

                else:
                    cv2.putText(image,'Collecting frames for {} Video Number {}'.format(action,sequence),(15,12),
                                cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255),1,cv2.LINE_AA)
                        
                # NEW Export Keypoints
                keypoints= extract_keypoints(results)
                npy_path=os.path.join(DATA_PATH,action,str(sequence),str(frame_num))
                np.save(npy_path,keypoints)


                #Show to the screen # "frame" is the image from my webcam
                cv2.imshow('OpenCV Feed',image)

                # Break gracefully
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break

    cap.release()
    cv2.destroyAllWindows()

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti

# 6. Preprocess Data and Create Labels and Features

In [12]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [13]:
label_map={label: num for num, label in enumerate(actions)}

In [14]:
label_map

{'hello': 0, 'thanks': 1, 'iloveyou': 2}

In [15]:
actions

array(['hello', 'thanks', 'iloveyou'], dtype='<U8')

In [16]:
sequences,labels= [],[]
for action in actions:
    for sequence in range(no_sequences):
        window=[]
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH,action,str(sequence),"{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [17]:
np.array(sequences).shape

(90, 30, 1662)

In [18]:
np.array(labels).shape

(90,)

In [19]:
X = np.array(sequences)
X.shape

(90, 30, 1662)

In [20]:
y=to_categorical(labels).astype(int)

In [21]:
y

array([[1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0,

In [22]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.05)

# 7. Build and Train LSTM Network


In [23]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense
from tensorflow.keras.callbacks import TensorBoard

In [24]:
home_dir = os.path.expanduser("~")
log_dir = os.path.join(home_dir, 'tensorboard_logs', 'fit')

In [25]:
log_dir

'C:\\Users\\enesm\\tensorboard_logs\\fit'

In [26]:
# Check if the directory exists, and create it if it doesn't
if not os.path.exists(log_dir):
    os.makedirs(log_dir)
# Define the TensorBoard callback
tb_callback = TensorBoard(log_dir=log_dir)


In [27]:
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import LearningRateScheduler, EarlyStopping

# Model definition
model = tf.keras.Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30, 1662)))
model.add(Dropout(0.2))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(Dropout(0.2))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# # Callbacks
# def scheduler(epoch, lr):
#     if epoch < 10:
#         return lr
#     else:
#         return float(lr * tf.math.exp(-0.1))

# lr_scheduler = LearningRateScheduler(scheduler)
# early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Fit the model
history = model.fit(X_train, y_train, epochs=170, callbacks=[tb_callback])


  super().__init__(**kwargs)


Epoch 1/170
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step - categorical_accuracy: 0.3464 - loss: 1.1758
Epoch 2/170
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - categorical_accuracy: 0.3327 - loss: 1.0921
Epoch 3/170
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - categorical_accuracy: 0.3835 - loss: 1.1246
Epoch 4/170
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - categorical_accuracy: 0.2584 - loss: 3.9791
Epoch 5/170
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - categorical_accuracy: 0.4012 - loss: 1.1596
Epoch 6/170
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - categorical_accuracy: 0.3171 - loss: 1.5860
Epoch 7/170
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - categorical_accuracy: 0.4051 - loss: 1.0952
Epoch 8/170
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - cate

In [27]:
model.summary()

# 8. Make Predictions

In [28]:
res = model.predict(X_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 190ms/step


In [29]:
res

array([[2.41105650e-02, 1.74842414e-03, 9.74141002e-01],
       [2.19286419e-03, 1.64112949e-04, 9.97642934e-01],
       [1.29754224e-14, 1.00000000e+00, 5.09901843e-10],
       [2.82716680e-25, 1.00000000e+00, 2.76232649e-19],
       [3.27104628e-02, 1.94144750e-03, 9.65348065e-01]], dtype=float32)

In [30]:
actions[np.argmax(res,axis=1)]

array(['iloveyou', 'iloveyou', 'thanks', 'thanks', 'iloveyou'],
      dtype='<U8')

In [31]:
actions[np.argmax(y_test,axis=1)]

array(['iloveyou', 'iloveyou', 'thanks', 'thanks', 'iloveyou'],
      dtype='<U8')

# 9. Save Weights

In [32]:
model.save('action.keras')

In [32]:
model.load_weights(r'C:\Users\enesm\OneDrive\Masaüstü\Computer Vision\Sign Language Detection\Model weights\action.keras')

  saveable.load_own_variables(weights_store.get(inner_path))


In [33]:
model.summary()

In [53]:
del model

# 10. Evaluation using Confusion Matrix and Accuracy

In [33]:
from sklearn.metrics import multilabel_confusion_matrix , accuracy_score

In [34]:
yhat=model.predict(X_train)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step 


In [35]:
ytrue= np.argmax(y_train,axis=1).tolist()
yhat=np.argmax(yhat,axis=1).tolist()

In [36]:
multilabel_confusion_matrix(ytrue,yhat)

array([[[55,  0],
        [15, 15]],

       [[57,  0],
        [ 1, 27]],

       [[42, 16],
        [ 0, 27]]], dtype=int64)

In [37]:
accuracy_score(ytrue,yhat)

0.8117647058823529

# 11.Test in Real Time

In [38]:
colors= [(245,117,16),(117,245,16),(16,117,245)]

def prob_viz(res,actions,input_frame,colors):
    output_frame=input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame,(0,60+num*40), (int(prob*100), 90+num*40), colors[num]-1)

        cv2.putText(output_frame,actions[num],(0,85+num*40), cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255),2,cv2.LINE_AA)
    
    return output_frame

In [None]:
# 1. New Detection Variables
sequence=[]
sentence=[]
threshold= 0.4


cap =cv2.VideoCapture(0) # webcam access
# Set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read Feed
        ret,frame=cap.read()
        #Make Detections
        image , results = mediapipe_detection(frame,holistic)
        print(results)
        #Draw Landmarks
        draw_styled_landmarks(image,results)
        
        # 2. Prediction Logic
        keypoints=extract_keypoints(results)
        sequence.insert(0,keypoints)
        sequence=sequence[:30]

        if len(sequence)==30:
            res = model.predict(np.expand_dims(sequence,axis=0))[0]
            print(actions[np.argmax(res)])

        # 3. Visualization Logic
        if res[np.argmax(res)]> threshold:
            if len(sentence) > 0 :
                if actions[np.argmax(res)] != sentence[-1]:
                    sentence.append(actions[np.argmax(res)])
            else:
                sentence.append(actions[np.argmax(res)])
        
        if len(sentence) > 5:
            sentence = sentence [-5:]
        
        # Viz probabilities
        image=prob_viz(res,actions,image,colors)
        cv2.rectangle(image,(0,0), (640,40), (245,117,16),-1)
        cv2.putText(image,' '.join(sentence), (3,30),
                    cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255),2,cv2.LINE_AA)

        #Show to the screen # "frame" is the image from my webcam
        cv2.imshow('OpenCV Feed',image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

In [41]:
res

array([[2.41105650e-02, 1.74842414e-03, 9.74141002e-01],
       [2.19286419e-03, 1.64112949e-04, 9.97642934e-01],
       [1.29754224e-14, 1.00000000e+00, 5.09901843e-10],
       [2.82716680e-25, 1.00000000e+00, 2.76232649e-19],
       [3.27104628e-02, 1.94144750e-03, 9.65348065e-01]], dtype=float32)