In [78]:
import cv2
import time
import mediapipe as mp

import numpy as np 
import joblib
import tensorflow as tf

from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.executors.pool import ThreadPoolExecutor
import sched
import threading
from difflib import get_close_matches
import pandas as pd

import warnings
warnings.filterwarnings('ignore')


In [79]:
def generate_word_spelling(file_path, del_thresh):
    df = pd.read_csv(file_path, header=None)
    filtered_df = df[df[0].str.len() >= del_thresh]
    words = filtered_df[0].tolist()
    return words

def correct_spelling(word, word_list):
    same_length_words = [w for w in word_list if len(w) == len(word)]
    close_matches = get_close_matches(word, same_length_words, n=1, cutoff=0.8)
    
    if close_matches:
        return close_matches[0]
    
    for i in range(len(word)-1, -1, -1):
        for char in 'abcdefghijklmnopqrstuvwxyz':
            if char != word[i]:  # Hanya mengganti jika karakter berbeda
                possible_word = word[:i] + char + word[i+1:]
                if possible_word in word_list:
                    return possible_word
    
    for char in 'abcdefghijklmnopqrstuvwxyz':
        possible_word = word + char
        if possible_word in word_list:
            return possible_word
    
    return word

In [80]:
def get_valid_landmarks(hand_landmarks, image_width, image_height):
    valid_landmarks = []
    for landmark in hand_landmarks.landmark:
        x = int(landmark.x * image_width)
        y = int(landmark.y * image_height)
        
        if 0 <= x < image_width and 0 <= y < image_height:
            valid_landmarks.append((x, y))
    
    return valid_landmarks

def initialize_hand_model():
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5)
    mp_drawing = mp.solutions.drawing_utils
    return hands, mp_drawing, mp_hands

def calculate_fps(prev_time):
    curr_time = time.time()
    fps = 1 / (curr_time - prev_time)
    return fps, curr_time

def extract_landmark(hand_landmarks):
    partial_landmarks = []
    for landmark in hand_landmarks.landmark:
        partial_landmarks.append(landmark.x)
        partial_landmarks.append(landmark.y)
        partial_landmarks.append(landmark.z)    
    # print('extract_landmark: ', partial_landmarks)
    return partial_landmarks

def predict_model_single(landmark_data):
    global class_labels, scaler, model
    
    landmark_data = np.array(landmark_data).reshape(1, -1)
    landmark_data = scaler.transform(landmark_data)
    
    prediction = model.predict(landmark_data, verbose=None)
    prediction = np.argmax(prediction, axis=1)
    prediction = class_labels[prediction[0]]
    
    return prediction

def batch_prediction(landmarks_data):
    global class_labels
    results = []
    
    for landmark_data in landmarks_data:
        prediction = predict_model_single(landmark_data)
        results.append(prediction)
    print("masuk fungsi batch prediction: ", results)
    return results

def load_model_and_scaler():
    try:
        scaler = joblib.load("scaler.pkl")
        model = tf.keras.models.load_model('./models/14-juni-1058am.h5')
    except Exception as e:
        print(f"Error loading model or scaler: {e}")
        exit()
    return scaler, model


def put_text_bottom_left(frame, text, text_color=(0, 255, 0), font=cv2.FONT_HERSHEY_SIMPLEX, font_scale=1, font_thickness=2, margin=10):
    text_size = cv2.getTextSize(text, font, font_scale, font_thickness)[0]
    text_width, text_height = text_size[0], text_size[1]
    text_x = margin
    text_y = frame.shape[0] - margin  # frame.shape[0] is the height of the frame (vertical dimension)
    
    cv2.putText(frame, text, (text_x, text_y), font, font_scale, text_color, font_thickness, cv2.LINE_AA)
        

def display_hand_status_with_buffer(frame, results, mp_drawing, mp_hands, image_width, image_height):
    global landmarks_buffer
    total_hands = 0
    status_text = ""
    
    if results.multi_hand_landmarks:
        landmarks = []
        for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
            total_hands += 1
            
            valid_landmarks = get_valid_landmarks(hand_landmarks, image_width, image_height)
            num_landmarks = len(valid_landmarks)
            
            if num_landmarks < 21:
                None
                # print(f'{idx+1} | {num_landmarks} Tangan ketutupan')
                # print(f'{idx+1} | {num_landmarks} Tangan ketutupan')
            elif num_landmarks == 21:
                landmarks.extend(extract_landmark(hand_landmarks))
                
            
            status = "Valid" if num_landmarks == 21 else "Not Valid"
            status_text += f"h{idx+1}_stats: {status}, h{idx+1}_lmk: {num_landmarks}\n"

            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
        
        if landmarks != None and len(landmarks) == 63:
            landmarks_buffer.append(landmarks)
            print("append landmark buffer")
            # print(landmarks)
            # print(len(landmarks))
            # result_predict = predict_model_single(landmarks)
                
            
        
    status_text = f"hand detected: {total_hands}\n" + status_text.strip()
    y0, dy = 50, 30
    
    for i, line in enumerate(status_text.split('\n')):
        y = y0 + i * dy
        cv2.putText(frame, line, (10, y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2, cv2.LINE_AA)
        
    return frame


def display_hand_status(frame, results, mp_drawing, mp_hands, image_width, image_height):
    total_hands = 0
    status_text = ""
    global result_predict, sentences
    
    if results.multi_hand_landmarks:
        landmarks = []
        for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
            total_hands += 1
            
            valid_landmarks = get_valid_landmarks(hand_landmarks, image_width, image_height)
            num_landmarks = len(valid_landmarks)
            
            if num_landmarks < 21:
                None
                # print(f'{idx+1} | {num_landmarks} Tangan ketutupan')
                # print(f'{idx+1} | {num_landmarks} Tangan ketutupan')
            elif num_landmarks == 21:
                landmarks.extend(extract_landmark(hand_landmarks))
                
            
            status = "Valid" if num_landmarks == 21 else "Not Valid"
            status_text += f"h{idx+1}_stats: {status}, h{idx+1}_lmk: {num_landmarks}\n"

            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
        
        if landmarks != None and len(landmarks) == 63:
            result_predict = predict_model_single(landmarks)
            if len(sentences) > 0:
                if sentences[-1] != result_predict:
                    sentences.append(result_predict)
            else:
                sentences.append(result_predict)
            # print(result_predict)
                
            
        
    status_text = f"hand detected: {total_hands}\n" + status_text.strip()
    y0, dy = 50, 30
    
    for i, line in enumerate(status_text.split('\n')):
        y = y0 + i * dy
        cv2.putText(frame, line, (10, y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2, cv2.LINE_AA)
        
    return frame

In [81]:
cv2.destroyAllWindows()

# without buffer

In [82]:
# list_word = generate_word_spelling('./kbbi.csv', 3)
list_word = generate_word_spelling('./10k-indonesia-common-words.csv', 3)
print(list_word[:5])
print(len(list_word))
correct_spelling('bagaimans', list_word)

['yang', 'dan', 'ini', 'untuk', 'mereka']
10651


'bagaimana'

In [83]:
correct_spelling('b', list_word)

'b'

In [100]:
def gabung_kalimat(kata_list):
    if len(kata_list) > 0:
        print('true')
        kata_list_cleaned = []
        for kata in kata_list:
            kata_list_cleaned.append(correct_spelling(kata, list_word))

        # kalimat = check_and_correct(kata_list[0])
        kalimat = kata_list_cleaned[0]
        
        for i in range(1, len(kata_list_cleaned)):
            kata = kata_list_cleaned[i]
            # kata = check_and_correct(kata_list_cleaned[i])
            prev_kata = kata_list_cleaned[i-1]

            if len(prev_kata) > 1 or (len(prev_kata) == 1 and len(kata) > 1):
                kalimat += " " + kata
            else:
                kalimat += kata
        
        return kalimat.capitalize()
    else:
        return ""

hasil_gabung = gabung_kalimat(['aks', 'makan', 'aygm', 'b', 'e', 'r', 's', 'a', 'm', 'a', 'ibu'])
# hasil_gabung = gabung_kalimat([])
print(hasil_gabung)

true
Aku makan ayam bersama ibu


In [101]:
class_labels = ['A', 'Aku', 'Apa', 'B', 'Bagaimana', 'Baik', 'Bapak', 'Berapa',
    'Besok', 'C', 'D', 'Dia', 'Dimana', 'E', 'F', 'G', 'H', 'Halo',
    'Hari ini', 'I', 'Ibu', 'J', 'Jawab', 'K', 'Kalian', 'Kamu',
    'Kantor', 'Kapan', 'Kemana', 'Kemarin', 'Kerja', 'L', 'Lelah',
    'Lusa', 'M', 'Maaf', 'Makan', 'Malam', 'Mengapa', 'N', 'Nanti',
    'O', 'P', 'Pagi', 'Q', 'R', 'S', 'Sabar', 'Sakit', 'Sama - sama',
    'Sedih', 'Sekarang', 'Senang', 'Siang', 'Siapa', 'Sore', 'T',
    'Terima kasih', 'Tidur', 'Tolong', 'U', 'V', 'W', 'X', 'Y', 'Z']

landmarks_buffer = []
last_prediction = ""
result_predict = None
scaler, model = load_model_and_scaler()

sentences = []
threshold_seconds = 5  # Waktu dalam detik untuk mengecek
start_time = time.time()
last_word = ""

def main():
    global landmarks_buffer, last_prediction, result_predict, last_word, start_time, sentences, threshold_seconds
    
    hands, mp_drawing, mp_hands = initialize_hand_model()
    current_time = time.time()
    elapsed_time = current_time - start_time

    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Error: Tidak dapat mengakses kamera.")
        return

    prev_time = 0
    
    try:
        while True:
            ret, frame = cap.read()
            
            if not ret:
                print("Error: Tidak dapat membaca frame dari kamera.")
                break
            
            print('sentences: ', sentences)
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = hands.process(frame_rgb)
            
            image_height, image_width, _ = frame.shape
                                
            frame = display_hand_status(frame, results, mp_drawing, mp_hands, image_width, image_height)
            fps, prev_time = calculate_fps(prev_time)
            
            cv2.putText(frame, f'FPS: {int(fps)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
            
            if elapsed_time >= threshold_seconds:
                start_time = time.time()
                if sentences[-1] == last_word:
                    sentences = []
        
            # cv2.putText(frame, last_prediction, (10, frame.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
            # print(gabung_kalimat(sentences))
            put_text_bottom_left(frame, gabung_kalimat(sentences).capitalize())
            cv2.imshow('Cam', frame)
            
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    finally:
        cap.release()
        cv2.destroyAllWindows()

if __name__ == "__main__":
    main()

sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:  []
sentences:

# with buffer

In [None]:
# class_labels = ['A', 'Aku', 'Apa', 'B', 'Bagaimana', 'Baik', 'Bapak', 'Berapa',
#     'Besok', 'C', 'D', 'Dia', 'Dimana', 'E', 'F', 'G', 'H', 'Halo',
#     'Hari ini', 'I', 'Ibu', 'J', 'Jawab', 'K', 'Kalian', 'Kamu',
#     'Kantor', 'Kapan', 'Kemana', 'Kemarin', 'Kerja', 'L', 'Lelah',
#     'Lusa', 'M', 'Maaf', 'Makan', 'Malam', 'Mengapa', 'N', 'Nanti',
#     'O', 'P', 'Pagi', 'Q', 'R', 'S', 'Sabar', 'Sakit', 'Sama - sama',
#     'Sedih', 'Sekarang', 'Senang', 'Siang', 'Siapa', 'Sore', 'T',
#     'Terima kasih', 'Tidur', 'Tolong', 'U', 'V', 'W', 'X', 'Y', 'Z']

# landmarks_buffer = []
# last_prediction = ""
# result_predict = None

# def main():
#     global landmarks_buffer, last_prediction
#     hands, mp_drawing, mp_hands = initialize_hand_model()
#     scaler, model = load_model_and_scaler()

#     cap = cv2.VideoCapture(0)
#     if not cap.isOpened():
#         print("Error: Tidak dapat mengakses kamera.")
#         return

#     prev_time = 0

#     def scheduled_prediction():
#         global last_prediction, landmarks_buffer
#         try:
#             if landmarks_buffer:
#                 print(landmarks_buffer)
#                 print("get frame", len(landmarks_buffer))
#                 # print(f"sample: {landmarks_buffer[0]}")
#                 result_predict = batch_prediction(landmarks_buffer, scaler, model)
#                 if len(result_predict) > 0:
#                     last_prediction = result_predict[-1]
#                 else:
#                     print("no sign language deteced")
                
#                 print(f"result predict: {result_predict}")
#                 print(f"last predict: {last_prediction}")
#         except Exception as e:
#             print(f"Error in scheduled_prediction: {e}")
#         finally:
#             landmarks_buffer.clear()
#             # print("landmark buffer cleared")
#             # print('len after clear: ', len(landmarks_buffer))

#     def schedule_task():
#         while True:
#             scheduled_prediction()
#             # landmarks_buffer.clear()
#             time.sleep(0)

#     thread = threading.Thread(target=schedule_task)
#     thread.daemon = True  
#     thread.start()
    
#     try:
#         while True:
#             ret, frame = cap.read()
            
#             if not ret:
#                 print("Error: Tidak dapat membaca frame dari kamera.")
#                 break

#             frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
#             results = hands.process(frame_rgb)
            
#             image_height, image_width, _ = frame.shape
                                
#             frame = display_hand_status(frame, results, mp_drawing, mp_hands, image_width, image_height)
#             fps, prev_time = calculate_fps(prev_time)
            
#             cv2.putText(frame, f'FPS: {int(fps)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
#             # cv2.putText(frame, last_prediction, (10, frame.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
#             put_text_bottom_left(frame, last_prediction)
#             cv2.imshow('Cam', frame)
            
#             if cv2.waitKey(1) & 0xFF == ord('q'):
#                 break

#     finally:
#         cap.release()
#         cv2.destroyAllWindows()

# if __name__ == "__main__":
#     main()