In [1]:
import cv2
import time
import mediapipe as mp

import numpy as np 
import joblib
import tensorflow as tf

from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.executors.pool import ThreadPoolExecutor
import sched
import threading
from difflib import get_close_matches
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

In [18]:
# Class labels
class_labels = ['A', 'Apa', 'B', 'Baik', 'Berapa', 'C', 'D', 'Dimana', 'E', 'F',
       'G', 'H', 'Halo', 'I', 'J', 'K', 'Kapan', 'Kemana', 'L', 'M',
       'Mengapa', 'N', 'O', 'P', 'Q', 'R', 'S', 'Sabar', 'Siapa',
       'T', 'Tidur', 'U', 'V', 'W', 'X', 'Y', 'Z']

# Load the TFLite model with preprocessing
interpreter = tf.lite.Interpreter(model_path="./model_with_preprocessing.tflite")
interpreter.allocate_tensors()

# Get input and output tensors
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()


In [23]:


# Function to extract hand features from an image
def extract_hand_features_and_draw(image):
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(
        static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5
    )
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)
    data = []

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            landmarks = []
            for landmark in hand_landmarks.landmark:
                landmarks.append(landmark.x)
                landmarks.append(landmark.y)
                landmarks.append(landmark.z)
            data.append(landmarks) 
        
        mp.solutions.drawing_utils.draw_landmarks(
            image, hand_landmarks, mp_hands.HAND_CONNECTIONS
        )

    hands.close()
    if len(data) == 2:
        # Flatten the list and concatenate features from both hands
        features = np.concatenate((data[0], data[1]))
    elif len(data) == 1:
        # Duplicate the single hand's features to simulate two-hand input
        features = np.concatenate((data[0], data[0]))
    else:
        # Return an empty array if no hands are detected
        features = np.array([])

    return features, image


# Function to run inference on an image
def run_inference(hand_features):
    if hand_features.size == 0:
        return None

    hand_features = hand_features.reshape(1, -1).astype(np.float32)

    interpreter.set_tensor(input_details[0]["index"], hand_features)
    interpreter.invoke()
    output_data = interpreter.get_tensor(output_details[0]["index"])
    predicted_class_index = np.argmax(output_data)
    return predicted_class_index

def generate_word_spelling(file_path, del_thresh):
    df = pd.read_csv(file_path, header=None)
    filtered_df = df[df[0].str.len() >= del_thresh]
    words = filtered_df[0].tolist()
    return words

def correct_spelling(word, word_list):
    same_length_words = [w for w in word_list if len(w) == len(word)]
    close_matches = get_close_matches(word, same_length_words, n=1, cutoff=0.8)
    
    print(close_matches)
    if close_matches:
        return close_matches[0]
    
    for i in range(len(word)-1, -1, -1):
        for char in 'abcdefghijklmnopqrstuvwxyz':
            if char != word[i]:  # Hanya mengganti jika karakter berbeda
                possible_word = word[:i] + char + word[i+1:]
                if possible_word in word_list:
                    return possible_word
    
    for char in 'abcdefghijklmnopqrstuvwxyz':
        possible_word = word + char
        if possible_word in word_list:
            return possible_word
    
    print(word)
    return word
    

In [4]:
list_word = generate_word_spelling('./10k-indonesia-common-words.csv', 3)
print(list_word[:5])
print(len(list_word))
correct_spelling('bagaimans', list_word)

['yang', 'dan', 'ini', 'untuk', 'mereka']
10651


'bagaimana'

In [5]:
def gabung_kalimat(kata_list):
    if len(kata_list) > 0:
        print('true')
        kata_list_cleaned = []
        for kata in kata_list:
            kata_list_cleaned.append(correct_spelling(kata, list_word))

        # kalimat = check_and_correct(kata_list[0])
        kalimat = kata_list_cleaned[0]
        
        for i in range(1, len(kata_list_cleaned)):
            kata = kata_list_cleaned[i]
            # kata = check_and_correct(kata_list_cleaned[i])
            prev_kata = kata_list_cleaned[i-1]

            if len(prev_kata) > 1 or (len(prev_kata) == 1 and len(kata) > 1):
                kalimat += " " + kata
            else:
                kalimat += kata
        
        return kalimat.capitalize()
    else:
        return ""

hasil_gabung = gabung_kalimat(['aks', 'makan', 'aygm', 'b', 'e', 'r', 's', 'a', 'm', 'a', 'ibu'])
# hasil_gabung = gabung_kalimat([])
print(hasil_gabung)

true
Aku makan ayam bersama ibu


In [28]:
# Initialize variables for sentence prediction
sentence = ""
last_detected_time = time.time()
last_prediction_time = time.time()
reset_time = 5  # seconds
prediction_delay = 1  # seconds
movement_threshold = 0.02  # Threshold for hand movement change
last_prediction = None
last_hand_features = None
hand_detected = False
detection_time = 0

# Start video capture
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    current_time = time.time()

    if not hand_detected:
        hand_features, frame_with_landmarks = extract_hand_features_and_draw(frame)
        if hand_features.size != 0:
            hand_detected = True
            detection_time = current_time

    # Wait for 400ms after hand detection
    if (
        hand_detected
        and (current_time - detection_time >= prediction_delay)
        and (current_time - last_prediction_time >= prediction_delay)
    ):
        hand_features, frame_with_landmarks = extract_hand_features_and_draw(frame)
        prediction = run_inference(hand_features)
        last_prediction_time = current_time

        if prediction is not None:
            # Check hand movement change
            if last_hand_features is not None:
                movement_change = np.linalg.norm(hand_features - last_hand_features)
                if movement_change < movement_threshold:
                    hand_detected = False
                    continue

            # Reset timer since we detected a hand
            last_detected_time = current_time
            predicted_label = class_labels[prediction]

            if (
                last_prediction is not None
                and len(predicted_label) == 1
                and len(last_prediction) == 1
            ):
                sentence += predicted_label
            else:
                # if the last predicted label is a single character,
                # we need to use correct_spelling to correct the spelling
                if last_prediction is not None and len(last_prediction) == 1:
                    print("masuk", last_prediction)
                    sentence = correct_spelling(sentence, list_word)
                    print(sentence)
                sentence += " " + predicted_label
            last_prediction = predicted_label

            last_hand_features = hand_features
        hand_detected = False

    else:
        # Check if the reset time has elapsed
        if current_time - last_detected_time > reset_time:
            sentence = ""
            last_prediction = None
            last_hand_features = None

    # Display the current sentence
    cv2.putText(
        frame,
        f"Sentence: {sentence}",
        (10, 30),
        cv2.FONT_HERSHEY_SIMPLEX,
        1,
        (255, 0, 0),
        2,
        cv2.LINE_AA,
    )
    cv2.imshow("Real-time Sentence Prediction", frame)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()