# Accessibility Improvements for the Disabled

### Importing all dependencies

In [2]:
# importing dependencies
import speech_recognition as sr
import time
from gtts import gTTS
from playsound import playsound
import os
import pickle
import mediapipe as mp
import cv2
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np

## Sign language recognition

### Collect images

In [2]:
DATA_DIR = './data'
if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

class_labels = ['a', 'b', 'c']  # Example class labels
dataset_size = 100

cap = cv2.VideoCapture(0)
for class_index in range(len(class_labels)):
    class_name = class_labels[class_index]
    
    class_dir = os.path.join(DATA_DIR, class_name)
    if not os.path.exists(class_dir):
        os.makedirs(class_dir)

    print('Collecting data for class {}'.format(class_name))

    # Wait for the 'r' key press to start capturing
    while True:
        ret, frame = cap.read()

        # Check if frame was successfully captured
        if not ret:
            print("Failed to grab frame")
            break  # Exit the loop if the frame was not captured

        cv2.putText(frame, "Ready? Press 'r' to start capturing!", (50, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow("Capturing datasets", frame)
        
        key = cv2.waitKey(25)


        if key == ord('r'):
            print('Starting image capture for class {}'.format(class_name))
            break  # Exit the loop and start capturing images
        elif key == ord('x'):
            cap.release()
            cv2.destroyAllWindows()
            break  # Exit the entire program if 'x' is pressed

    # Image capture process
    counter = 0
    while counter < dataset_size:
        ret, frame = cap.read()

        # Check if frame was successfully captured
        if not ret:
            print("Failed to grab frame during capture")
            break  # Exit the loop if the frame was not captured

        cv2.imshow("Capturing datasets", frame)
        cv2.waitKey(25)
        cv2.imwrite(os.path.join(class_dir, '{}.jpg'.format(counter)), frame)
        counter += 1

cap.release()
cv2.destroyAllWindows()


Collecting data for class a
Starting image capture for class a
Collecting data for class b
Starting image capture for class b
Collecting data for class c
Starting image capture for class c


### Create the dataset

In [3]:
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

DATA_DIR = './data'

data = []
labels = []
for sub_dir in os.listdir(DATA_DIR):
    for img_path in os.listdir(os.path.join(DATA_DIR, sub_dir)):
        data_aux = []

        x_ = []
        y_ = []

        img = cv2.imread(os.path.join(DATA_DIR, sub_dir, img_path))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        results = hands.process(img_rgb)
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y

                    x_.append(x)
                    y_.append(y)

                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y

                    data_aux.append(x - min(x_))
                    data_aux.append(y - min(y_))

            data.append(data_aux)
            labels.append(sub_dir)

f = open('data.pickle', 'wb')
pickle.dump({'data': data, 'labels': labels}, f)
f.close()

### Train classifier

In [14]:
data_dict = pickle.load(open('./data.pickle', 'rb'))

data = np.asarray(data_dict['data'])
labels = np.asarray(data_dict['labels'])

x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)

model = RandomForestClassifier()

model.fit(x_train, y_train)

y_predict = model.predict(x_test)

score = accuracy_score(y_predict, y_test)

print('{}% of smaples were classified correctly !'.format(score * 100))

f = open('model.p', 'wb')
pickle.dump({'model': model}, f)
f.close()


100.0% of smaples were classified correctly !


### Make predictions with classifier

In [6]:
model_dict = pickle.load(open('./model.p', 'rb'))
model = model_dict['model']

cap = cv2.VideoCapture(0)

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

labels_dict = {0: 'A', 1: 'B', 2: 'C'}
while True:

    data_aux = []
    x_ = []
    y_ = []

    ret, frame = cap.read()

    H, W, _ = frame.shape

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    results = hands.process(frame_rgb)
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                frame,  # image to draw
                hand_landmarks,  # model output
                mp_hands.HAND_CONNECTIONS,  # hand connections
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style())

        for hand_landmarks in results.multi_hand_landmarks:
            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y

                x_.append(x)
                y_.append(y)

            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y
                data_aux.append(x - min(x_))
                data_aux.append(y - min(y_))

        x1 = int(min(x_) * W) - 10
        y1 = int(min(y_) * H) - 10

        x2 = int(max(x_) * W) - 10
        y2 = int(max(y_) * H) - 10

        prediction = model.predict([np.asarray(data_aux)])

        predicted_character = prediction[0]

        cv2.rectangle(frame, (x1, y1), (x2, y2), (25, 32, 48), 4)
        cv2.putText(frame, predicted_character, (x1, y1 - 10), cv2.FONT_HERSHEY_COMPLEX, 1.3, (25, 32, 48), 3,
                    cv2.LINE_AA)

    cv2.imshow('Sign Language Detector', frame)
    key = cv2.waitKey(1)

    if key == ord('x'):
            cap.release()
            cv2.destroyAllWindows()
            break  # Exit the entire program if 'x' is pressed


cap.release()
cv2.destroyAllWindows()


## Voice recognition and Command Execution

### Capture voice input

In [8]:
recognizer = sr.Recognizer()

def capture_voice_input():
    with sr.Microphone() as source:
        print("Listening...")
        recognizer.adjust_for_ambient_noise(source)

        try:
            audio = recognizer.listen(source, timeout=5, phrase_time_limit=5)
        except sr.WaitTimeoutError:
            print("Listening timed out while waiting for you to speak")
            return None
    return audio

### Convert text to speech

In [9]:
def text_to_speech(text):
    # Initialize gTTS with the text to convert
    speech = gTTS(text, tld='com.ng', lang='en', slow=False)

    # Save the audio file to a temporary file
    speech_file = 'speech.mp3'
    speech.save(speech_file)

    # Play the sound
    playsound('speech.mp3')

    # Remove the file after playing
    os.remove(speech_file)
    
# text_to_speech("Oh my God, what are you doing, stop that!")

### Convert Voice to Text

In [10]:
def convert_voice_to_text(audio):
    if audio is None:
        return ""
    try:
        text = recognizer.recognize_google(audio)
        print("You said: " + text)
        text_to_speech(f"You said {text}")
    except sr.UnknownValueError:
        text = ""
        print("Sorry I didn't understand that.")
        text_to_speech(f"Sorry I didn't understand that")
    except sr.RequestError as e:
        text = ""
        print("Error: {0}".format(e))
    return text

### Process Voice Command

In [11]:
def process_voice_command(text):
    if "hello" in text.lower():
        print("Hello! How can I help you?")
        text_to_speech(f"Hello! How can I help you")

    elif "what is your name" in text.lower():
        print("My name is Eden")
        text_to_speech(f"My name is Eden")

    elif ("how are you doing today" in text.lower() or
          "how are you" in text.lower() or
          "how are you doing" in text.lower()):
        print("I'm doing alright, thank you very much")
        text_to_speech(f"I'm doing  alright, thank you very much")

    elif ("alright, goodbye" in text.lower() or 
          "alright" in text.lower() or 
          "goodbye" in text.lower() or 
          "all right" in text.lower() or 
          "stop" in text.lower()):
        print("Goodbye! Have a nice day")
        text_to_speech(f"Goodbye! Have a nice day")
        
        return True
    else: 
        print("I didn't understand that command. Please try again.") 
        text_to_speech(f"I didn't understand that command. Please try again.")
    return False

### Main Function

In [12]:
def main():
    end_program = False
    attempts = 0
    max_attempts = 3  # Limit to the number of attempts

    while not end_program and attempts < max_attempts:
        audio = capture_voice_input()
        if audio is None:
            attempts += 1
            print(f"Retrying... ({attempts}/{max_attempts})")
            time.sleep(1)  # Delay to prevent rapid looping
        else:
            text = convert_voice_to_text(audio)
            if text == "":
                attempts += 1
                print(f"Retrying... ({attempts}/{max_attempts})")
            else:
                end_program = process_voice_command(text)

                attempts = 0  # Reset attempts if a valid audio input is processed
    if attempts >= max_attempts:
        print("Too many failed attempts due to timeout. Exiting program.")
        text_to_speech(f"Too many failed attempts due to timeout. Exiting program.")

if __name__ == "__main__":
    main()


Listening...
Sorry I didn't understand that.
Retrying... (1/3)
Listening...
You said: hello how are you doing today
Hello! How can I help you?
Listening...
You said: it is enough good bad
I didn't understand that command. Please try again.
Listening...
You said: all right goodbye you like this time
Goodbye! Have a nice day
