# Sign Language (ASL) recognition
An Artificial Intelligence project by Emile GATIGNON and Martin RAMPONT

> "Artificial Intelligence" - Course N° 12721 at Hanyang University with professor 백성용 / Sungyong Baik
> 
> Spring Semester 2023


In [None]:
# Installs
%pip install opencv-python mediapipe


In [None]:
# Dependencies
import cv2
import hashlib
import json
import mediapipe as mp
import numpy as np
import os
import urllib.request
import zipfile


In [None]:
# Global parameters

# Dataset
# ? Dataset source : https://www.kaggle.com/datasets/risangbaskoro/wlasl-processed
data_url = r'https://storage.googleapis.com/kaggle-data-sets/1589971/2632847/bundle/archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20230528%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20230528T101545Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=3d32a329ea1d2da65583832ef994b2b0ada5e17e04f938afff9fdd04fca8643b22b889accfc807458f1f8616b28cc2b0412f464649c851bdd2d0d6d4bfca08bd85f37f0be1653fdd3c85fb44b6abf81faf3051ca1eb817c1a52158574d1545d7723f498008fb2b151c5a1a2ab855299e72727c9ecc3138965c81e33660024625a3779e065613c78c7520913c5279bdbe392010d66bab023509f1a1a792fb5567ddf7865fdb51f354fd737ac202a07d02481ec04eb5e26f44a94aa942d4dd395bd1f5984ba5eb60b46a80a5cd33b7229558c69bfd524c3e7ec49b18150956e3b0b96849ec2270cadb458b38f9cee415722a91fbbecc3acebabec79d25016c0217'
data_path = r'downloads/data'
videos_folder = r'videos'
landmarks_folder = r'landmarks'
data_description_file = r'WLASL_v0.3.json'
labels_file = r'labels.json'


In [None]:
# Dataset download and extraction
data_zip_hash = '1b8198227bb3de21de639146016a7368'

if os.path.isdir(os.path.join(data_path, landmarks_folder)):
    print("Landmarks found, skipping download")
elif os.path.isfile(os.path.join(data_path, data_description_file)) \
        and os.path.isdir(os.path.join(data_path, videos_folder)):
    print("Data already unpacked, skipping")
else:
    downloaded_hash = ''
    if os.path.isfile(data_path + '.zip'):
        print("Data already downloaded, checking intergrity...")
        hash_md5 = hashlib.md5()
        with open(data_path + '.zip', "rb") as f:
            for chunk in iter(lambda: f.read(4096), b""):
                hash_md5.update(chunk)
        downloaded_hash = hash_md5.hexdigest()

    if data_zip_hash != downloaded_hash:
        def report_hook(count, block_size, total_size):
            percentage = (count * block_size / total_size) * 100
            print(f"Downloading data... {percentage:.2f}%", end='\r')
        urllib.request.urlretrieve(data_url, data_path + '.zip', reporthook=report_hook)
        print("\n")
    else:
        print("Downloaded zip integrity ok")

    with zipfile.ZipFile(data_path + '.zip', 'r') as zip_ref:
        total_files = len(zip_ref.namelist())
        extracted_files = 0
        for file in zip_ref.namelist():
            zip_ref.extract(file, data_path)

            extracted_files += 1
            progress = (extracted_files / total_files) * 100
            print(f"Extracting... {progress:.2f}%", end='\r')
        print("\n")


In [None]:
# Landmark detection - variables and functions

MP_HOLISTIC = mp.solutions.holistic
MP_DRAWING = mp.solutions.drawing_utils


def mediapipe_detection(image: cv2.Mat, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results


def draw_styled_landmarks(image, results):
    # Draw face connections
    MP_DRAWING.draw_landmarks(image, results.face_landmarks, MP_HOLISTIC.FACEMESH_CONTOURS,
                              MP_DRAWING.DrawingSpec(color=(80, 110, 10), thickness=1, circle_radius=1),
                              MP_DRAWING.DrawingSpec(color=(80, 256, 121), thickness=1, circle_radius=1)
                              )
    # Draw pose connections
    MP_DRAWING.draw_landmarks(image, results.pose_landmarks, MP_HOLISTIC.POSE_CONNECTIONS,
                              MP_DRAWING.DrawingSpec(color=(80, 22, 10), thickness=2, circle_radius=4),
                              MP_DRAWING.DrawingSpec(color=(80, 44, 121), thickness=2, circle_radius=2)
                              )
    # Draw left hand connections
    MP_DRAWING.draw_landmarks(image, results.left_hand_landmarks, MP_HOLISTIC.HAND_CONNECTIONS,
                              MP_DRAWING.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                              MP_DRAWING.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2)
                              )
    # Draw right hand connections
    MP_DRAWING.draw_landmarks(image, results.right_hand_landmarks, MP_HOLISTIC.HAND_CONNECTIONS,
                              MP_DRAWING.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                              MP_DRAWING.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2)
                              )


def extract_landmarks(results) -> np.ndarray:
    """Transforms the results from a mediapipe process to a NumPy Array

    Args:
        results: Results from a mediapipe process

    Returns:
        np.ndarray: Vectorized results, missing landmarks are representend as numpy.nan

        results.shape = (4,)

        results[0].shape = (468, 3), results[1].shape = (33, 3),
        results[2].shape = (21, 3), results[3].shape = (21, 3)
    """
    face_landmarks = np.zeros((468, 3))
    face_landmarks.fill(np.nan)
    if results.face_landmarks != None:
        for i, landmark in enumerate(results.face_landmarks.landmark):
            face_landmarks[i] = landmark.x, landmark.y, landmark.z
    else:
        face_landmarks.fill(np.nan)

    pose_landmarks = np.zeros((33, 3))
    pose_landmarks.fill(np.nan)
    if results.pose_landmarks != None:
        for i, landmark in enumerate(results.pose_landmarks.landmark):
            pose_landmarks[i] = landmark.x, landmark.y, landmark.z
    else:
        pose_landmarks.fill(np.nan)

    left_hand_landmarks = np.zeros((21, 3))
    left_hand_landmarks.fill(np.nan)
    if results.left_hand_landmarks != None:
        for i, landmark in enumerate(results.left_hand_landmarks.landmark):
            left_hand_landmarks[i] = landmark.x, landmark.y, landmark.z
    else:
        left_hand_landmarks.fill(np.nan)

    right_hand_landmarks = np.zeros((21, 3))
    right_hand_landmarks.fill(np.nan)
    if results.right_hand_landmarks != None:
        for i, landmark in enumerate(results.right_hand_landmarks.landmark):
            right_hand_landmarks[i] = landmark.x, landmark.y, landmark.z
    else:
        right_hand_landmarks.fill(np.nan)

    return np.array([face_landmarks, pose_landmarks, left_hand_landmarks, right_hand_landmarks], dtype=object)


def video_to_landmarks(video_path: str, display: bool = False) -> np.ndarray:
    cap = cv2.VideoCapture(video_path)
    landmark_frames = np.zeros((int(cap.get(cv2.CAP_PROP_FRAME_COUNT))), dtype=np.ndarray)
    i = 0
    with MP_HOLISTIC.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        while cap.isOpened():
            success, frame = cap.read()
            if not success:
                break

            image, results = mediapipe_detection(frame, holistic)
            landmark_frames[i] = extract_landmarks(results)
            i += 1

            if display:
                draw_styled_landmarks(image, results)
                cv2.imshow(f"Converting '{video_path}'...", image)
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    display = False
                    cv2.destroyAllWindows()
    cap.release()
    cv2.destroyAllWindows()
    return landmark_frames, display


In [None]:
# Landmark detection - dataset conversion
display_conversion = False
videos_path = os.path.join(data_path, videos_folder)
landmarks_path = os.path.join(data_path, landmarks_folder)


if not os.path.exists(landmarks_path):
    os.mkdir(landmarks_path)
video_count = len(os.listdir(videos_path))
skipped = 0

try:
    for i, video_path in enumerate(os.listdir(videos_path)):
        progress = 100 * i / video_count
        print(f"Generating landmarks: {progress:6.2f}% ({skipped:5} skipped) -> {video_path}", end='\r')

        array_path = os.path.join(landmarks_path, video_path[:-4]) + '.npy'
        if video_path.endswith('.mp4') and not os.path.isfile(array_path):
            landmarks, display_conversion = video_to_landmarks(os.path.join(videos_path, video_path), display_conversion)
            np.save(array_path, landmarks)
        else:
            skipped += 1
except KeyboardInterrupt:
    count = len(os.listdir(landmarks_path))
    progress = 100 * count / video_count
    print(f"Interrupted landmark generation at {progress:6.2f}% -> {skipped} skipped, {count - skipped} generated")
print(f"Finished landmark generation -> {skipped} skipped, {len(os.listdir(landmarks_path)) - skipped} generated")


In [None]:
# Labels extraction

if os.path.isfile(os.path.join(data_path, labels_file)):
    print("Labels already generated, skipping")
else:
    labels = {}

    print("Loading data description and extracting labels...")
    with open(os.path.join(data_path, data_description_file), "r") as data_descriptor:
        data_desc = json.load(data_descriptor)
        for entry in data_desc:
            for instance in entry['instances']:
                labels[instance['video_id']] = entry['gloss']

    print("Saving labels...")
    with open(os.path.join(data_path, labels_file), "w") as labels_container:
        json.dump(labels, labels_container, indent=4)

    print("Labels generated.")
