# 1. Dependencies

In [None]:
!pip install tensorflow==2.4.1 tensorflow-gpu==2.4.1 opencv-python mediapipe sklearn matplotlib djitellopy

# 2. Define functions

In [None]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
from djitellopy import tello
from time import sleep 
from scipy import stats

mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils 

def mediapipe_detection(image, model):
    if image is not None:
      image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 
      image.flags.writeable = False                  
      results = model.process(image)                 
      image.flags.writeable = True                   
      image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 
      return image, results
    
    else:
      print("empty frame")
    
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS) 
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) 
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) 
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) 

def draw_styled_landmarks(image, results):

    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 

    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 

    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 

    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

def extract_keypoints_holistic(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh]) 

def extract_face_keypoints(results):
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    return face 

actions_holistic = np.array(['grpA','grpB','grpC','grpD','grpE'])
no_sequences_holistic = 15
sequence_length_holistic = 30

actions_face = np.array(['normal-face', 'elongated'])
no_sequences_face = 15
sequence_length_face = 30

# 3. LSTM Model 1 

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

model_hol = Sequential() 
model_hol.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
model_hol.add(LSTM(128, return_sequences=True, activation='relu'))
model_hol.add(LSTM(64, return_sequences=False, activation='relu'))
model_hol.add(Dense(64, activation='relu')) 
model_hol.add(Dense(32, activation='relu'))
model_hol.add(Dense(actions_holistic.shape[0], activation='softmax')) 

model_hol.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])


In [None]:
#download the LSTM_hol.h5 file and update the path. 
model_hol.load_weights(r"C:\Users\tangk\LSTM_hol.h5") 

In [None]:
model_hol.summary()

# 4. LSTM Model 2

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

model_f = Sequential() 
model_f.add(LSTM(32, return_sequences=True, activation='relu', input_shape=(30,1404)))
model_f.add(LSTM(32, return_sequences=False, activation='relu'))
model_f.add(Dense(32, activation='relu'))
model_f.add(Dense(actions_face.shape[0], activation='softmax')) 

model_f.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['binary_accuracy']) 


In [None]:
#download the LSTM_face.h5 file and update the path. 
model_f.load_weights(r"C:\Users\tangk\LSTM_face.h5") 

In [None]:
model_f.summary()

# 5. Test in Real Time 

## Output: [__face-status, word-group__]
## Supposed to be tested with masks on. The relative postions of gesture to your body matters. 
## refer to PPT for the gestures of group A, B, C, D and E

# DEMO 1: Prototype capture

## INSTRUCTIONS: Turn on the Tello. Connect to Tello wifi. Run the cell below (may require multiple runs) until the battery is displayed and the light turns green. Short-press to turn off

In [None]:
me = tello.Tello()
me.connect()
print(me.get_battery())

## Run the following cell to stream from Tello. Press 'q' to exit. 

In [None]:
sequence_hol = []
sequence_f = []
#sentence = []
predictions_hol = []
predictions_f = []
present = []

me.connect()
me.streamon()

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while True: 

        frame = me.get_frame_read().frame
        frame = cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE)
        
        image, results = mediapipe_detection(frame, holistic)
        print(results)

        draw_styled_landmarks(image, results)
        
        keypoints_hol = extract_keypoints_holistic(results)
        keypoints_f = extract_face_keypoints(results)
        
        sequence_hol.append(keypoints_hol)
        sequence_f.append(keypoints_f)
        
        sequence_hol = sequence_hol[-30:]
        sequence_f = sequence_f[-30:]
        
        
        if len(sequence_hol) == 30:
            res_hol = model_hol.predict(np.expand_dims(sequence_hol, axis=0))[0]
            print(actions_holistic[np.argmax(res_hol)])
            predictions_hol = np.argmax(res_hol)
            
            res_f = model_f.predict(np.expand_dims(sequence_f, axis=0))[0]
            print(actions_face[np.argmax(res_f)])
            predictions_f = np.argmax(res_f)
            elongation = str(actions_face[np.argmax(res_f)])
            present = str(elongation +' '+ actions_holistic[np.argmax(res_hol)])
            

        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(present), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1, cv2.LINE_AA)

            
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break       

    cv2.destroyAllWindows()

# DEMO 2: Webcam capture

In [None]:
sequence_hol = []
sequence_f = []
predictions_hol = []
predictions_f = []
present = []

cap = cv2.VideoCapture(0)

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        draw_styled_landmarks(image, results)
        keypoints_hol = extract_keypoints_holistic(results)
        keypoints_f = extract_face_keypoints(results)
        
        sequence_hol.append(keypoints_hol)
        sequence_f.append(keypoints_f)
        
        sequence_hol = sequence_hol[-30:]
        sequence_f = sequence_f[-30:]
        
        
        if len(sequence_hol) == 30:
            res_hol = model_hol.predict(np.expand_dims(sequence_hol, axis=0))[0]
            print(actions_holistic[np.argmax(res_hol)])
            predictions_hol = np.argmax(res_hol)
            
            res_f = model_f.predict(np.expand_dims(sequence_f, axis=0))[0]
            print(actions_face[np.argmax(res_f)])
            predictions_f = np.argmax(res_f)
            elongation = str(actions_face[np.argmax(res_f)])
            present = str(elongation + ' '+ actions_holistic[np.argmax(res_hol)])

        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(present), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
      
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break       

    cap.release()
    cv2.destroyAllWindows()