In [70]:
import pyautogui
import time

# media pipe dependencies
import cv2
import mediapipe as mp
from scipy.spatial import distance as dist
from datetime import datetime

# audio file dependencies
import speech_recognition as sr
import pyttsx3
import pyperclip

media pipe variables and functions


In [71]:
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_face_mesh = mp.solutions.face_mesh

drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)

In [72]:
morse = {
  ".-": 'a',
  "-...": 'b',
  "-.-.": 'c',
  "-..": 'd',
  ".": 'e',
  "..-.": 'f',
  "--.": 'g',
  "....": 'h',
  "..": 'i',
  ".---": 'j',
  "-.-": 'k',
  ".-..": 'l',
  "--": 'm',
  "-.": 'n',
  "---": 'o',
  ".--.": 'p',
  "--.-": 'q',
  ".-.": 'r',
  "...": 's',
  "-": 't',
  "..-": 'u',
  "...-": 'v',
  ".--": 'w',
  "-..-": 'x',
  "-.--": 'y',
  "--..": 'z',
  ".----": '1',
  "..---": '2',
  "...--": '3',
  "....-": '4',
  ".....": '5',
  "-....": '6',
  "--...": '7',
  "---..": '8',
  "----.": '9',
  "-----": '0',
  ".-.-.-": '.',
  "--..--": ',',
  "---...": ';',
  ".----.": "'",
  ".----.-": '`',
  "-....-": '-',
  "-...-": '=',
  "-..-.": '/',
  "-..-.-": '\\',
  "----.-": '[',
  "------": ']',
  ".-.-": 'tab',
  "space": "space",
  "enter": "enter"
}

In [84]:
CAMERA = 0 # Usually 0, depends on input device(s)

# Optionally record the video feed to a timestamped AVI in the current directory
RECORDING = False
FPS = 10
RECORDING_FILENAME = str(datetime.now()).replace('.','').replace(':','') + '.avi'

FACE_TILT = .5

EYE_BLINK_HEIGHT = .15
EYE_SQUINT_HEIGHT = .18
EYE_OPEN_HEIGHT = .25
EYE_BUGGED_HEIGHT = .7

MOUTH_OPEN_HEIGHT = .2
MOUTH_OPEN_SHORT_FRAMES = 1
MOUTH_OPEN_LONG_FRAMES = 4
MOUTH_CLOSED_FRAMES = 1

MOUTH_FROWN = .006
MOUTH_NOSE_SCRUNCH = .09
MOUTH_SNARL = .1
MOUTH_DUCKFACE = 1.6

BROW_RAISE_LEFT = .0028
BROW_RAISE_RIGHT = .025
BROWS_RAISE = .19

WAIT_FRAMES = 6


blinking = False
blink_count = 0
blinking_frames = 0

squinting = False
squinting_frames = 0

bugeyed = False
bugeyed_frames = 0

winkedR = False
winkedR_frames = 0

winkedL = False
winkedL_frames = 0

mouth_open = False
mouth_open_frames = 0
mouth_closed_frames = 0

mouth_scrunched = False
mouth_scrunched_count = 0
mouth_scrunched_frames = 0

duckfacing = False

brows_raised = False
brows_raised_count = 0
brows_raised_frames = 0

command_on = False
control_on = False
shift_on = False

current_morse = ''
last_typed = ''

In [74]:
def type_and_remember():
  global current_morse, last_typed
  keys = []

  if command_on:
    keys.append('command')
  if control_on:
    keys.append('control')
  if shift_on:
    keys.append('shift')

  letter = morse.get(current_morse, '')
  if len(letter):
    keys.append(letter)
  current_morse = ''

  keystring = '+'.join(keys)
  if len(keystring):
    print("keys:", keystring)
    #keyboard.press_and_release(keystring)
    last_typed = keystring


In [75]:
def get_aspect_ratio(top, bottom, right, left):
  height = dist.euclidean([top.x, top.y], [bottom.x, bottom.y])
  width = dist.euclidean([right.x, right.y], [left.x, left.y])
  return height / width

In [76]:
def timeout_double(state, frames):
  if state:
    frames += 1
  if frames > WAIT_FRAMES:
    frames = 0
    state = False
  return state, frames

In [77]:
def draw_frame(image, face_landmarks):
  mp_drawing.draw_landmarks(
      image=image,
      landmark_list=face_landmarks,
      connections=mp_face_mesh.FACEMESH_TESSELATION,
      landmark_drawing_spec=None,
      connection_drawing_spec=mp_drawing_styles
      .get_default_face_mesh_tesselation_style())
  mp_drawing.draw_landmarks(
      image=image,
      landmark_list=face_landmarks,
      connections=mp_face_mesh.FACEMESH_CONTOURS,
      landmark_drawing_spec=None,
      connection_drawing_spec=mp_drawing_styles
      .get_default_face_mesh_contours_style())
  mp_drawing.draw_landmarks(
      image=image,
      landmark_list=face_landmarks,
      connections=mp_face_mesh.FACEMESH_IRISES,
      landmark_drawing_spec=None,
      connection_drawing_spec=mp_drawing_styles
      .get_default_face_mesh_iris_connections_style())
  frame = cv2.flip(image, 1) # Flip image horizontally
  # Add current Morse code as supertitle
  cv2.putText(frame, current_morse, (620, 30),
    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
  cv2.imshow('face', frame)


speech to text dependencies


In [78]:
r = sr.Recognizer()

In [79]:
def SpeakText(command):
    engine = pyttsx3.init()
    engine.say(command)
    engine.runAndWait()

In [80]:
parrot = False

In [81]:
def get_audio(): 
    spoken_text = []

    while True:
        with sr.Microphone() as source2:
            r.adjust_for_ambient_noise(source2, duration = 0.5)
            audio2 = r.listen(source2)
            try:
                MyText = r.recognize_google(audio2)
                MyText = MyText.lower()
                if MyText.endswith('terminate'):
                    print("Stopping...")
                    words = MyText.lower().split()
                    words.pop()
                    new_text = " ".join(words)
                    print("parrot says : ", new_text)
                    spoken_text.append(new_text)
                    pyperclip.copy(new_text)
                    time.sleep(3)
                    pyautogui.hotkey('command', 'v')
                    break
                print("parrot says : ", MyText)
                spoken_text.append(MyText)
                pyperclip.copy(MyText)
                time.sleep(3)
                pyautogui.hotkey('command', 'v')

            except sr.UnknownValueError:
                print("Could not understand audio, please try again.")
            except sr.RequestError as e:
                print("Could not request results from Google Speech Recognition service; {0}".format(e))


Main loop

In [85]:
cap = cv2.VideoCapture(CAMERA)

# to get output video, set RECORDING to true
if RECORDING:
  frame_size = (int(cap.get(3)), int(cap.get(4)))
  recording = cv2.VideoWriter(
    RECORDING_FILENAME, cv2.VideoWriter_fourcc(*'MJPG'), FPS, frame_size)

with mp_face_mesh.FaceMesh(
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as face_mesh:
  while cap.isOpened():
    success, image = cap.read()
    if not success: break

    image.flags.writeable = False
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(image)

    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

    if results.multi_face_landmarks and len(results.multi_face_landmarks) > 0:
      face_landmarks = results.multi_face_landmarks[0]
      face = face_landmarks.landmark


      eyeR_top = face[159]
      eyeR_bottom = face[145]
      eyeR_inner = face[133]
      eyeR_outer = face[33]
      eyeR_ar = get_aspect_ratio(eyeR_top, eyeR_bottom, eyeR_outer, eyeR_inner)

      eyeL_top = face[386]
      eyeL_bottom = face[374]
      eyeL_inner = face[362]
      eyeL_outer = face[263]
      eyeL_ar = get_aspect_ratio(eyeL_top, eyeL_bottom, eyeL_outer, eyeL_inner)
      eyeA_ar = (eyeR_ar + eyeL_ar) / 2

      command_on = False
      shift_on = False
      squinting = False
      bugeyed = False
      if eyeR_ar < EYE_BLINK_HEIGHT:
        if eyeL_ar > EYE_OPEN_HEIGHT:
          print("R wink", eyeR_ar)
          shift_on = True
          winkedR = True
          if winkedL and (winkedL_frames < WAIT_FRAMES):
            print("ESCAPE")
            #keyboard.press_and_release('escape')
            winkedL_frames = 0
            winkedL = False
        elif eyeR_ar < EYE_BLINK_HEIGHT:
          if not blinking:
            blink_count += 1
            print("blink", blink_count)
            if duckfacing and blink_count == 2:
              print("BACKSPACE")
              #keyboard.press_and_release("backspace")
          blinking = True
      elif eyeL_ar < EYE_BLINK_HEIGHT and eyeR_ar > EYE_OPEN_HEIGHT:
        print("L wink", eyeL_ar)
        command_on = True
        winkedL = True
        if winkedR and (winkedR_frames < WAIT_FRAMES):
          print("clear Morse queue")
          current_morse = ''
          winkedR_frames = 0
          winkedR = False
      elif eyeA_ar < EYE_SQUINT_HEIGHT:
        squinting = True
        squinting_frames += 1
        if squinting_frames > WAIT_FRAMES:
          print("squint", eyeA_ar)
          #keyboard.press_and_release("command+-") # zoom out
          squinting_frames = 0
      elif eyeA_ar > EYE_BUGGED_HEIGHT:
        bugeyed = True
        bugeyed_frames += 1
        if bugeyed_frames > WAIT_FRAMES:
          bugeyed_frames = 0
          print("big eyes", eyeA_ar)
          #keyboard.press_and_release("command+shift+=") # zoom in
      else:
        blinking = False

      winkedL, winkedL_frames = timeout_double(winkedL, winkedL_frames)
      winkedR, winkedR_frames = timeout_double(winkedR, winkedR_frames)
      blink_count, blinking_frames = timeout_double(blink_count, blinking_frames)

      mouth_inner_top = face[13]
      mouth_inner_bottom = face[14]
      mouth_inner_right = face[78]
      mouth_inner_left = face[308]
      mouth_inner_ar = get_aspect_ratio(
        mouth_inner_top, mouth_inner_bottom, mouth_inner_right, mouth_inner_left)

      nose_bottom = face[2]

      mouth_open = mouth_inner_ar > MOUTH_OPEN_HEIGHT
      if mouth_open:
        print("mouth open", mouth_inner_ar)
        get_audio()






       



      draw_frame(image, face_landmarks)
      if RECORDING:
        recording.write(image)

    # Type 'q' on the video frame to quit
    if cv2.waitKey(5) & 0xFF == ord('q'):
      break

if RECORDING:
  recording.release()

cap.release()
cv2.destroyAllWindows()

blink 1
mouth open 0.2540744589158107
result2:
{   'alternative': [   {   'confidence': 0.66399163,
                           'transcript': 'open my mouth hello 10 minut as '
                                         'whole'},
                       {'transcript': 'open my mouth hello 10 minut as hole'},
                       {'transcript': 'open my mouth hello cabinet as hole'},
                       {'transcript': 'open my mouth hello 10 minute as hole'},
                       {'transcript': 'open my mouth hello cabinet hole'}],
    'final': True}
parrot says :  open my mouth hello 10 minut as whole
result2:
[]
Could not understand audio, please try again.
result2:
{   'alternative': [   {   'confidence': 0.69034266,
                           'transcript': 'open my mouth hello 10 minutes as '
                                         'more'},
                       {'transcript': 'open my mouth hello 10 minut asthma'},
                       {'transcript': 'open my mouth hello 10 

KeyboardInterrupt: 

hellosolid terminatorhellohello daminihello germanytom open my mouth hello 10 minut as wholeopen my mouth hello 10 minutes as moretubemateimageindian stick minutaudio system

In [68]:
time.sleep(3)
pyautogui.hotkey('command', 'v')

In [None]:
delay(200)
pyautogui.hotkey('command', 'v')