# Spanish Sign Language Gesture Detection

The aim of this notebook is to make a first approach to the detection of Spanish sign language gestures. For this purpose, a limited set of words (Abajo, Amigo, Casa, Sentir, Colegio and Cerveza) have been selected and different videos have been taken to capture their movements.  These movements will be captured with the help of the Mediapipe library.

Mediapipe is an open-source library developed by Google that performs automatic detection of different body parts. Thanks to this library, the relative position of each point in an image or a video can be easily detected. The goal, or rather the main idea, is to obtain the coordinates of the different gestures and from these, to be able to infer which gestures are being performed.

![Mediapipe](https://google.github.io/mediapipe/images/mobile/holistic_sports_and_gestures_example.gif)

In [1]:
# Imports
import os
import cv2
import pandas as pd
import numpy as np
import mediapipe as mp
from mediapipe.python.solutions.face_mesh_connections import FACEMESH_CONTOURS

In [2]:
mp_drawing = mp.solutions.drawing_utils 
mp_holistic = mp.solutions.holistic 

# 1. Create dataset



In [3]:
# Create Dataframe columns names
coords = ["x", "y", "z"]
## Columns names list
list_column_names = []
## WORD
list_column_names.append("Word")
## RIGHT HAND
count = 0
# Code to access the landmarks
for landmark in mp_holistic.HandLandmark:
    for coord in coords:
        list_column_names.append("R"+str(landmark) + "_" + str(count) + "_" + coord)
    count+=1
## LEFT HAND
count = 0
# Code to access the landmarks
for landmark in mp_holistic.HandLandmark:
    for coord in coords:
        list_column_names.append("L"+str(landmark) + "_" + str(count) + "_" + coord)
    count+=1
## POSE
count = 0
# Code to access the landmarks
for landmark in mp_holistic.PoseLandmark:
    for coord in coords:
        list_column_names.append(str(landmark) + "_" + str(count) + "_" + coord)
    count+=1
    if count == 15:
        break

In [4]:
df = pd.DataFrame(columns = [list_column_names])
df

Unnamed: 0,Word,RHandLandmark.WRIST_0_x,RHandLandmark.WRIST_0_y,RHandLandmark.WRIST_0_z,RHandLandmark.THUMB_CMC_1_x,RHandLandmark.THUMB_CMC_1_y,RHandLandmark.THUMB_CMC_1_z,RHandLandmark.THUMB_MCP_2_x,RHandLandmark.THUMB_MCP_2_y,RHandLandmark.THUMB_MCP_2_z,...,PoseLandmark.LEFT_SHOULDER_11_z,PoseLandmark.RIGHT_SHOULDER_12_x,PoseLandmark.RIGHT_SHOULDER_12_y,PoseLandmark.RIGHT_SHOULDER_12_z,PoseLandmark.LEFT_ELBOW_13_x,PoseLandmark.LEFT_ELBOW_13_y,PoseLandmark.LEFT_ELBOW_13_z,PoseLandmark.RIGHT_ELBOW_14_x,PoseLandmark.RIGHT_ELBOW_14_y,PoseLandmark.RIGHT_ELBOW_14_z


In [None]:
# Read Videos
path = "../Train_Dataset/"
words = [words for words in os.listdir(path) if os.path.isdir(os.path.join(path, words))]
# All videos must have the same resolution (400x300)
HEIGHT = 600
WIDTH  = 900
for word in words:
    for video in os.listdir(os.path.join(path, word)):
        # Path to each video
        video_path = os.path.join(os.path.join(path, word), video)
        print(video_path)
        # Capture video
        cap = cv2.VideoCapture(video_path)
        # Initialize holistic model
        with mp_holistic.Holistic(min_detection_confidence = 0.5, min_tracking_confidence = 0.5) as holistic:
            while cap.isOpened():
                # Read frame
                ret, frame = cap.read()
                if ret == True:
                    # Resize frame
                    #frame = cv2.resize(frame, (WIDTH, HEIGHT), interpolation = cv2.INTER_AREA)
                    # Change color from BGR to RGB
                    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                    frame.flags.writeable = False
                    # Detect landmarks
                    results = holistic.process(frame)
                    try:
                        # Add coords to list row
                        row = []
                        # The name of the word is added to the row list.
                        row.append(word)
                        ## Get rhand landmarks
                        for landmark in results.right_hand_landmarks.landmark:
                            row.append(landmark.x)
                            row.append(landmark.y)
                            row.append(landmark.z)
                        ## Get lhand landmarks
                        for landmark in results.left_hand_landmarks.landmark:
                            row.append(landmark.x)
                            row.append(landmark.y)
                            row.append(landmark.z)
                        ## Get pose landmarks
                        count = 0
                        for landmark in results.pose_landmarks.landmark:
                            row.append(landmark.x)
                            row.append(landmark.y)
                            row.append(landmark.z)
                            count += 1
                            if count == 15:
                                break
                        df.loc[len(df)] = row
                    except:
                        pass
                else:
                    break
            cap.release()

../Train_Dataset/Abajo\word-abajo-001-f-r10-rs10.avi
../Train_Dataset/Abajo\word-abajo-001-f-r10-rs13.avi
../Train_Dataset/Abajo\word-abajo-001-f-r10-t25-rs10.avi
../Train_Dataset/Abajo\word-abajo-001-f-r10-t25-rs13.avi
../Train_Dataset/Abajo\word-abajo-001-f-r10-t25.avi
../Train_Dataset/Abajo\word-abajo-001-f-r10-t30-rs10.avi
../Train_Dataset/Abajo\word-abajo-001-f-r10-t30-rs13.avi
../Train_Dataset/Abajo\word-abajo-001-f-r10-t30.avi
../Train_Dataset/Abajo\word-abajo-001-f-r10.avi
../Train_Dataset/Abajo\word-abajo-001-f-r13-rs10.avi
../Train_Dataset/Abajo\word-abajo-001-f-r13-rs13.avi
../Train_Dataset/Abajo\word-abajo-001-f-r13-t25-rs10.avi
../Train_Dataset/Abajo\word-abajo-001-f-r13-t25-rs13.avi
../Train_Dataset/Abajo\word-abajo-001-f-r13-t25.avi
../Train_Dataset/Abajo\word-abajo-001-f-r13-t30-rs10.avi
../Train_Dataset/Abajo\word-abajo-001-f-r13-t30-rs13.avi
../Train_Dataset/Abajo\word-abajo-001-f-r13-t30.avi
../Train_Dataset/Abajo\word-abajo-001-f-r13.avi
../Train_Dataset/Abajo\word-

In [179]:
df.to_csv("data.csv", index = False)

# Train

In [5]:
from sklearn.metrics import accuracy_score # Accuracy metrics 
import pickle 
import matplotlib as plt
# import seaborn as sns

In [6]:
# Load model
with open('../data/body_language.pkl', 'rb') as f:
    model = pickle.load(f)



In [7]:
df_detect = pd.DataFrame(columns = [list_column_names])
df_detect

Unnamed: 0,Word,RHandLandmark.WRIST_0_x,RHandLandmark.WRIST_0_y,RHandLandmark.WRIST_0_z,RHandLandmark.THUMB_CMC_1_x,RHandLandmark.THUMB_CMC_1_y,RHandLandmark.THUMB_CMC_1_z,RHandLandmark.THUMB_MCP_2_x,RHandLandmark.THUMB_MCP_2_y,RHandLandmark.THUMB_MCP_2_z,...,PoseLandmark.LEFT_SHOULDER_11_z,PoseLandmark.RIGHT_SHOULDER_12_x,PoseLandmark.RIGHT_SHOULDER_12_y,PoseLandmark.RIGHT_SHOULDER_12_z,PoseLandmark.LEFT_ELBOW_13_x,PoseLandmark.LEFT_ELBOW_13_y,PoseLandmark.LEFT_ELBOW_13_z,PoseLandmark.RIGHT_ELBOW_14_x,PoseLandmark.RIGHT_ELBOW_14_y,PoseLandmark.RIGHT_ELBOW_14_z


In [10]:
cap = cv2.VideoCapture(1)
# Initialize holistic model
with mp_holistic.Holistic(min_detection_confidence = 0.5, min_tracking_confidence = 0.5) as holistic:
    while cap.isOpened():
        # Read frame
        ret, frame = cap.read()
        if ret == True:
            # Resize frame
            #frame = cv2.resize(frame, (WIDTH, HEIGHT), interpolation = cv2.INTER_AREA)
            # Change color from BGR to RGB
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame.flags.writeable = False
            # Detect landmarks
            results = holistic.process(frame)
            
             # Mano izquieda (azul)
            mp_drawing.draw_landmarks(
                frame, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                mp_drawing.DrawingSpec(color=(255, 255, 0), thickness=2, circle_radius=1),
                mp_drawing.DrawingSpec(color=(255, 0, 0), thickness=2))

           # Mano derecha (verde)
            mp_drawing.draw_landmarks(
                frame, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=1),
                mp_drawing.DrawingSpec(color=(57, 143, 0), thickness=2))

           # Postura
            mp_drawing.draw_landmarks(
                frame, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                mp_drawing.DrawingSpec(color=(128, 0, 255), thickness=2, circle_radius=1),
                mp_drawing.DrawingSpec(color=(255, 255, 255), thickness=2))
            
            try:
                print("Try")
                # Add coords to list row
                row = []
                # The name of the word is added to the row list.
                row.append(word)
                ## Get rhand landmarks
                for landmark in results.right_hand_landmarks.landmark:
                    row.append(landmark.x)
                    row.append(landmark.y)
                    row.append(landmark.z)
                ## Get lhand landmarks
                for landmark in results.left_hand_landmarks.landmark:
                    row.append(landmark.x)
                    row.append(landmark.y)
                    row.append(landmark.z)
                ## Get pose landmarks
                count = 0
                for landmark in results.pose_landmarks.landmark:
                    row.append(landmark.x)
                    row.append(landmark.y)
                    row.append(landmark.z)
                    count += 1
                    if count == 15:
                        break
                print(row)
                df_detect.loc[len(df_detect)] = row
                body_language_class = model.predict(df_detect)[0]
                body_language_prob = model.predict_proba(df_detect)[0]
                print(body_language_class, body_language_prob)
            except:
                pass
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            cv2.imshow("Imagen a detectar", frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
        else:
            break
    cap.release()

Try


In [None]:
while True: