# Text To Speech Translator

## Imports And Libaries 

In [1]:
pip list # All The Pip installs have been made 

Package                      Version
---------------------------- ----------------
absl-py                      2.0.0
aiohttp                      3.8.6
aiosignal                    1.3.1
anyio                        4.0.0
apturl                       0.5.2
argon2-cffi                  23.1.0
argon2-cffi-bindings         21.2.0
arrow                        1.3.0
asttokens                    2.4.1
astunparse                   1.6.3
async-lru                    2.0.4
async-timeout                4.0.3
attrs                        23.1.0
Babel                        2.13.1
beautifulsoup4               4.12.2
bleach                       6.1.0
blinker                      1.4
Brlapi                       0.8.3
cachetools                   5.3.1
certifi                      2020.6.20
cffi                         1.16.0
chardet                      4.0.0
charset-normalizer           3.3.0
click                        8.0.3
colorama                     0.4.4
comm                         0.1.4

In [2]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as pltlist_physical_devices
import time
import mediapipe as mp

## Keypoints and values for evulation.

In [3]:
mp_holistic = mp.solutions.holistic #Skeleton Model,
mp_drawing = mp.solutions.drawing_utils #Drawing Points On the Skeleton 

In [4]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Allow to convert Colour of image going from BGR to RGC
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction on image 
    image.flags.writeable = True                   # Then Write to Image 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) #Going Back to BGR
    return image, results


In [5]:
def draw_landmarks(image, results):
    
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, mp_drawing.DrawingSpec(color=(80, 110, 10), thickness=1, circle_radius=1))
# Face
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Upper Body
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Left Hand
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Right Hand  

In [6]:
#Creating Camera for Testing
cap = cv2.VideoCapture(0)

#Using Full resolution of caemra.
cap.set(3, 1920)
cap.set(4, 1080)

#Set FPS
fps_time = time.time()
#Set Mediapipe Model To start tracking and then Go off that Point 
with mp_holistic.Holistic(min_detection_confidence=0.7, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        #Reading video
        ret, frame = cap.read()
    
        #Making Detections:
        image, results = mediapipe_detection(frame,holistic)
        #Draw Sketelon 
        draw_landmarks(image,results)
        
        # Calculate FPS
        current_time = time.time()
        fps = 1 / (current_time - fps_time)
        fps_time = current_time
    
        # Display the FPS on the video feed
        cv2.putText(image, f"FPS: {int(fps)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        
        #Show on Laptop
        cv2.imshow('Feed' , image)
        #Quit When Q is pressed ( Hopefully Gracefully) 
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


## Getting Values From MediaPipe to Train & Create Folders for data


In [7]:
facesize = 468*3
print(facesize)

1404


In [8]:
posesize = 33*4 #4 Because Includes Visiblity 
print(posesize)

132


In [9]:
lhsize = 21*3
print(lhsize)

63


In [10]:
rhsize = 21*3
print(rhsize)

63


In [11]:
def extract_keypoints(results): # Extract The X Y Z Values of All the Points. If not on screen Replace with 0
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(posesize)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(facesize )
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(lhsize)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(rhsize)
    return np.concatenate([pose, face, lh, rh])

In [12]:
if(facesize+posesize+lhsize+rhsize == len(extract_keypoints(results))):
    print(len(extract_keypoints(results)))
else:
    print("Error")

1662


In [13]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('MP_Data')

# Actions that we try to detect
actions = np.array(['Hello', 'You', 'ME' , 'Please' , 'Are you Ok' , 'I Love You' , 'Thank You' , 'Yes', 'No' , 'My Name is: ' , 'A' , 'B'  , 'C' , 'D' , 'E' , 'F' , 'G' , 'H' , 'I' , 'J' , 'K' , 'L' ,'M' , 'N' , 'O' , 'P' , 'Q' , 'R' , 'S' , 'T' , 'U' , 'V' , 'W' , 'X' , 'Y' , 'Z'])
no_sequences = 12

#30 Frames ( 10 FPS Video - 3 Secs A Vid)
sequence_length = 30


In [14]:
#Make a Folder for each Word
for action in actions: 
    for sequence in range(no_sequences):
        try: 
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass

## Making Videos For Dataset 


In [15]:
cap = cv2.VideoCapture(0)

#Using Full resolution of caemra.
cap.set(3, 1920)
cap.set(4, 1080)


# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    # NEW LOOP
    # Loop through actions
    for action in actions[22:]:
        # Wait for spacebar press to start the action collection
        cv2.putText(image, 'Press SPACE to start collecting frames for {}'.format(action), (15, 12), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        
        while True:
            key = cv2.waitKey(1) & 0xFF
            if key == ord(' '):  # Spacebar press
                break
        # Loop through sequences aka videos
        for sequence in range(no_sequences):
            # Loop through video length aka sequence length
            for frame_num in range(sequence_length):

                # Read feed
                ret, frame = cap.read()

                # Make detections
                image, results = mediapipe_detection(frame, holistic)
#                 print(results)

                # Draw landmarks
                draw_landmarks(image, results)
                
                # NEW Apply wait logic
                if frame_num == 0: 
                    cv2.putText(image, 'STARTING COLLECTION', (120,200), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    # Show to screen
                    cv2.imshow('OpenCV Feed', image)
                    cv2.waitKey(2000)
                else: 
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    # Show to screen
                    cv2.imshow('OpenCV Feed', image)
                
                # NEW Export keypoints
                keypoints = extract_keypoints(results)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                np.save(npy_path, keypoints)

                # Break gracefully
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
                    
    cap.release()
    cv2.destroyAllWindows()

In [None]:
cap.release()
cv2.destroyAllWindows()


# # Using Data Collected For For Machine LEearning 