In [None]:
#Buat Database

import cv2
import mediapipe as mp
import numpy as np
import os

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

cap = cv2.VideoCapture(0)

num_hands = input("Masukkan jumlah tangan yang akan dideteksi (1/2): ")
if num_hands not in ['1', '2']:
    print("Input tidak valid. Harap masukkan 1 atau 2.")
    exit()

class_label = input("Masukkan kelas: ")

with mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=int(num_hands),
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as hands:

    DATA_PATH = os.path.join('MP_kata')
    actions = np.array([class_label])
    no_sequences = 30
    sequence_length = 30

    if not os.path.exists(DATA_PATH):
        os.makedirs(DATA_PATH)

    for action in actions:
        for sequence in range(no_sequences):
            try:
                os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
            except:
                pass

    def extract_keypoints(results, hand_index):
        if results.multi_hand_landmarks:
            return np.array([[res.x, res.y, res.z] for res in results.multi_hand_landmarks[hand_index].landmark]).flatten()
        else:
            return np.zeros(21*3)

    def record_sequence(action, sequence):
        for frame_num in range(sequence_length):
            success, image = cap.read()
            if not success:
                print("Tidak dapat membaca frame dari webcam.")
                break

            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            results = hands.process(image_rgb)

            image_output = image.copy()

            if results.multi_hand_landmarks:
                for hand_index in range(len(results.multi_hand_landmarks)):
                    mp_drawing.draw_landmarks(
                        image_output, results.multi_hand_landmarks[hand_index], mp_hands.HAND_CONNECTIONS,
                        mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
                        mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2))

                    cv2.putText(image_output, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15, 12),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 4, cv2.LINE_AA)

                    keypoints = extract_keypoints(results, hand_index)
                    npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num) + '.npy')
                    np.save(npy_path, keypoints)

            cv2.imshow('OpenCV feed', image_output)

            if cv2.waitKey(10) & 0xFF == ord('q'):
                break

    sequence_num = 0

    while sequence_num < no_sequences:
        record_sequence(class_label, sequence_num)
        sequence_num += 1

    cap.release()
    cv2.destroyAllWindows()


In [None]:
#DATALATIH

import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import ModelCheckpoint

DATA_PATH = os.path.join('MP_kata')
# Actions
actions = np.array(['A', 'B', 'E', 'F', 'G', 'I', 'K', 'L', 'M', 'N', 'O', 'V', 'W', 'X', 'Y', 'Z','halo', 'kamu', 'nama', 'siapa'])
# 30 videos worth of data
no_sequences = 30
# 30 frames
sequence_length = 30
feature_dim = 63
label_map = {label: num for num, label in enumerate(actions)}

sequences, labels = [], []
for action in actions:
    for sequence in range(no_sequences):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

X = np.array(sequences)
y = to_categorical(labels).astype(int)

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

# Define and compile the model
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='tanh', input_shape=(sequence_length, feature_dim)))
model.add(LSTM(128, return_sequences=True, activation='tanh'))
model.add(LSTM(64, return_sequences=False, activation='tanh'))
model.add(Dense(64, activation='tanh'))
model.add(Dense(32, activation='tanh'))
model.add(Dense(len(actions), activation='softmax'))

model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Train the model
model.fit(x_train, y_train, epochs=3000, validation_data=(x_test, y_test))

# Save the trained model
model.save('modelkatatrain.h5')

# Convert the model to TensorFlow Lite format
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the converted model to a .tflite file
with open('modelkata.tflite', 'wb') as f:
    f.write(tflite_model)


In [None]:
import matplotlib.pyplot as plt

# Contoh data akurasi dan loss, sesuaikan dengan data pelatihan Anda
epochs = [1, 2, 3, 4, 5]
train_accuracy = [0.85, 0.88, 0.90, 0.92, 0.94]
train_loss = [0.3, 0.25, 0.2, 0.18, 0.15]

# Plot grafik akurasi
plt.figure(figsize=(8, 6))
plt.plot(epochs, train_accuracy, label='Akurasi Pelatihan', marker='o')
plt.xlabel('Epoch')
plt.ylabel('Akurasi')
plt.title('Grafik Akurasi Pelatihan')
plt.legend()
plt.grid(True)
plt.show()

# Plot grafik loss
plt.figure(figsize=(8, 6))
plt.plot(epochs, train_loss, label='Loss Pelatihan', marker='o', color='r')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Grafik Loss Pelatihan')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

model = load_model('modelkatatrain.h5')

cap = cv2.VideoCapture(0)
sequence_length = 30
# Actions
actions = np.array(['halo','nama','assalamualaikum','i love you','kamu','siapa'])


with mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=2,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as hands:

    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            print("Tidak dapat membaca frame dari webcam.")
            break

        image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(image_rgb)

        image_output = frame.copy()

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                    image_output, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                    mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
                    mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2))

                keypoints = np.array([[res.x, res.y, res.z] for res in hand_landmarks.landmark]).flatten()
                keypoints = np.expand_dims(keypoints, axis=0)

                # Memastikan keypoints memiliki bentuk (1, sequence_length, feature_dim)
                keypoints = np.repeat(keypoints, sequence_length, axis=0)
                keypoints = np.expand_dims(keypoints, axis=0)
                
               

                prediction = model.predict(keypoints)[0]
                predicted_class = actions[np.argmax(prediction)]
                
                  # Get status box
                cv2.rectangle(image_output, (0,0), (250, 60), (245, 117, 16), -1)
            
                cv2.putText(image_output, predicted_class.split(' ')[0]
                            , (90,40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            

#                 cv2.putText(image_output, predicted_class, (15, 12),
#                             cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 4, cv2.LINE_AA)

        cv2.imshow('OpenCV feed', image_output)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()


In [None]:
import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

cap = cv2.VideoCapture(0)
sequence_length = 30

model1 = load_model('modelkata.tflite')
model2 = load_model('model.h5')


sequence_length = 30
actions_model1 = np.array(['A','B','E','F','G','I','K','L','M','N','O','V','W','X','Y','Z'])
actions_model2 = np.array(['halo', 'kamu', 'nama', 'siapa'])

# Initial active model
active_model = None
active_actions = None

with mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=2,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as hands:

    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            print("Tidak dapat membaca frame dari webcam.")
            break

        image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(image_rgb)

        image_output = frame.copy()

        if results.multi_hand_landmarks:
            num_hands = len(results.multi_hand_landmarks)

            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                    image_output, hand_landmarks, mp_hands.HAND_CONNECTIONS,)
#                     mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
#                     mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2))

            # Switch models based on number of hands detected
            if num_hands == 1:
                active_model = model1
                active_actions = actions_model1
            elif num_hands == 2:
                active_model = model2
                active_actions = actions_model2

        if active_model is not None and results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                keypoints = np.array([[res.x, res.y, res.z] for res in hand_landmarks.landmark]).flatten()
                keypoints = np.expand_dims(keypoints, axis=0)

                # Memastikan keypoints memiliki bentuk (1, sequence_length, feature_dim)
                keypoints = np.repeat(keypoints, sequence_length, axis=0)
                keypoints = np.expand_dims(keypoints, axis=0)

                prediction = active_model.predict(keypoints)[0]
                predicted_class = active_actions[np.argmax(prediction)]

                # Get status box
#                 cv2.rectangle(image_output, (0, 0), (250, 180), (245, 117, 16), -1)

                cv2.putText(image_output, predicted_class.split(' ')[0], (30, 40),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

        cv2.imshow('OpenCV feed', image_output)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()


In [None]:
#PAKAI THREADD
import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model
import threading

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

cap = cv2.VideoCapture(0)
sequence_length = 30

model1 = load_model('modelkata.tflite')
model2 = load_model('model.h5')

sequence_length = 30
actions_model1 = np.array(['A', 'B', 'E', 'F', 'G', 'I', 'K', 'L', 'M', 'N', 'O', 'V', 'W', 'X', 'Y', 'Z'])
actions_model2 = np.array(['halo', 'kamu', 'nama', 'siapa'])

# Initial active model
active_model = None
active_actions = None

def process_frames():
    global active_model, active_actions

    with mp_hands.Hands(
        static_image_mode=False,
        max_num_hands=2,
        min_detection_confidence=0.3,
        min_tracking_confidence=0.3) as hands:

        while True:
            ret, frame = cap.read()
            if not ret:
                print("Tidak dapat membaca frame dari webcam.")
                break

            image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = hands.process(image_rgb)

            image_output = frame.copy()

            if results.multi_hand_landmarks:
                num_hands = len(results.multi_hand_landmarks)

                for hand_landmarks in results.multi_hand_landmarks:
                    mp_drawing.draw_landmarks(
                        image_output, hand_landmarks, mp_hands.HAND_CONNECTIONS,)
#                         mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
#                         mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2))

                # Switch models based on the number of hands detected
                if num_hands == 1:
                    active_model = model1
                    active_actions = actions_model1
                elif num_hands == 2:
                    active_model = model2
                    active_actions = actions_model2

            if active_model is not None and results.multi_hand_landmarks:
                for hand_landmarks in results.multi_hand_landmarks:
                    keypoints = np.array([[res.x, res.y, res.z] for res in hand_landmarks.landmark]).flatten()
                    keypoints = np.expand_dims(keypoints, axis=0)

                    # Memastikan keypoints memiliki bentuk (1, sequence_length, feature_dim)
                    keypoints = np.repeat(keypoints, sequence_length, axis=0)
                    keypoints = np.expand_dims(keypoints, axis=0)

                    prediction = active_model.predict(keypoints)[0]
                    predicted_class = active_actions[np.argmax(prediction)]

                    # Get status box
#                     cv2.rectangle(image_output, (0, 0), (250, 180), (245, 117, 16), -1)

                    cv2.putText(image_output, predicted_class.split(' ')[0], (30, 40),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

            cv2.imshow('OpenCV feed', image_output)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    cap.release()
    cv2.destroyAllWindows()

# Start the thread for processing frames
processing_thread = threading.Thread(target=process_frames)
processing_thread.start()


In [None]:
#HAMPIRFIXXX

import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

cap = cv2.VideoCapture(0)
sequence_length = 30

model1 = load_model('modelkata.tflite')
model2 = load_model('model.h5')


sequence_length = 30
actions_model1 = np.array(['A','B','E','F','G','I','K','L','M','N','O','V','W','X','Y','Z'])
actions_model2 = np.array(['halo', 'kamu', 'nama', 'siapa'])

# Initial active model
active_model = None
active_actions = None

# ... (kode sebelumnya) ...

with mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=2,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as hands:

    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            print("Tidak dapat membaca frame dari webcam.")
            break

        image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(image_rgb)

        image_output = frame.copy()

        if results.multi_hand_landmarks:
            num_hands = len(results.multi_hand_landmarks)

            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                    image_output, hand_landmarks, mp_hands.HAND_CONNECTIONS,)

            # Initialize variables to store predictions for each hand
            prediction_hand1 = None
            prediction_hand2 = None

            # Switch models based on number of hands detected
            if num_hands == 1:
                active_model = model1
                active_actions = actions_model1
            elif num_hands == 2:
                active_model = model2
                active_actions = actions_model2

            # Process each hand separately and get predictions
            for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
                keypoints = np.array([[res.x, res.y, res.z] for res in hand_landmarks.landmark]).flatten()
                keypoints = np.expand_dims(keypoints, axis=0)
                keypoints = np.repeat(keypoints, sequence_length, axis=0)
                keypoints = np.expand_dims(keypoints, axis=0)

                prediction = active_model.predict(keypoints)[0]

                # Assign predictions to corresponding variables
                if idx == 0:
                    prediction_hand1 = prediction
                elif idx == 1:
                    prediction_hand2 = prediction

            # Get the final prediction and display the text label for the first hand (label atas)
            if prediction_hand1 is not None:
                predicted_class_hand1 = active_actions[np.argmax(prediction_hand1)]
                proba_hand1 = np.max(prediction_hand1)  # Ambil nilai probabilitas tertinggi
                label_hand1 = f"{predicted_class_hand1.split(' ')[0]} ({proba_hand1:.2f})"  # Tambahkan probabilitas pada label
                cv2.putText(image_output, label_hand1, (30, 40),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

        cv2.imshow('OpenCV feed', image_output)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

In [None]:
#HAMPIRFIX DENGAN FPS

import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model
import time

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands


cap = cv2.VideoCapture(0)

# Set resolusi paling kecil (160x120)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 160)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 120)
sequence_length = 30

model1 = load_model('modelkata.tflite')
model2 = load_model('model.h5')

sequence_length = 30
actions_model1 = np.array(['A','B','E','F','G','I','K','L','M','N','O','V','W','X','Y','Z'])
actions_model2 = np.array(['halo', 'kamu', 'nama', 'siapa'])

# Initial active model
active_model = None
active_actions = None

frame_count = 0
start_time = time.time()

with mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=2,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as hands:

    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            print("Tidak dapat membaca frame dari webcam.")
            break

        image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(image_rgb)

        image_output = frame.copy()

        if results.multi_hand_landmarks:
            num_hands = len(results.multi_hand_landmarks)

            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                    image_output, hand_landmarks, mp_hands.HAND_CONNECTIONS,)

            # Initialize variables to store predictions for each hand
            prediction_hand1 = None
            prediction_hand2 = None

            # Switch models based on number of hands detected
            if num_hands == 1:
                active_model = model1
                active_actions = actions_model1
            elif num_hands == 2:
                active_model = model2
                active_actions = actions_model2

            # Process each hand separately and get predictions
            for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
                keypoints = np.array([[res.x, res.y, res.z] for res in hand_landmarks.landmark]).flatten()
                keypoints = np.expand_dims(keypoints, axis=0)
                keypoints = np.repeat(keypoints, sequence_length, axis=0)
                keypoints = np.expand_dims(keypoints, axis=0)

                prediction = active_model.predict(keypoints)[0]

                # Assign predictions to corresponding variables
                if idx == 0:
                    prediction_hand1 = prediction
                elif idx == 1:
                    prediction_hand2 = prediction

            # Get the final prediction and display the text label for the first hand (label atas)
            if prediction_hand1 is not None:
                predicted_class_hand1 = active_actions[np.argmax(prediction_hand1)]
                proba_hand1 = np.max(prediction_hand1)  # Ambil nilai probabilitas tertinggi
                label_hand1 = f"{predicted_class_hand1.split(' ')[0]} ({proba_hand1:.2f})"  # Tambahkan probabilitas pada label
                cv2.putText(image_output, label_hand1, (30, 40),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

        # Hitung FPS
        frame_count += 1
        current_time = time.time()
        elapsed_time = current_time - start_time
        fps = frame_count / elapsed_time

        # Tampilkan nilai FPS pada layar
        cv2.putText(image_output, f"FPS: {fps:.2f}", (30, 80),  # Ubah posisi Y untuk FPS
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

        cv2.imshow('OpenCV feed', image_output)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()


In [None]:
import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model
import time

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

cap = cv2.VideoCapture(0)

# Set resolusi paling kecil (160x120)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 160)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 120)

sequence_length = 30

model1 = load_model('modelkata.tflite')
model2 = load_model('model.h5')

sequence_length = 30
actions_model1 = np.array(['A','B','E','F','G','I','K','L','M','N','O','V','W','X','Y','Z'])
actions_model2 = np.array(['halo', 'kamu', 'nama', 'siapa'])

# Initial active model
active_model = None
active_actions = None

frame_count = 0
start_time = time.time()

with mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=2,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as hands:

    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            print("Tidak dapat membaca frame dari webcam.")
            break

        image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(image_rgb)

        image_output = frame.copy()

        if results.multi_hand_landmarks:
            num_hands = len(results.multi_hand_landmarks)

            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                    image_output, hand_landmarks, mp_hands.HAND_CONNECTIONS,)

            # Initialize variables to store predictions for each hand
            prediction_hand1 = None
            prediction_hand2 = None

            # Switch models based on number of hands detected
            if num_hands == 1:
                active_model = model1
                active_actions = actions_model1
            elif num_hands == 2:
                active_model = model2
                active_actions = actions_model2

            # Process each hand separately and get predictions
            for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
                keypoints = np.array([[res.x, res.y, res.z] for res in hand_landmarks.landmark]).flatten()
                keypoints = np.expand_dims(keypoints, axis=0)
                keypoints = np.repeat(keypoints, sequence_length, axis=0)
                keypoints = np.expand_dims(keypoints, axis=0)

                prediction = active_model.predict(keypoints)[0]

                # Assign predictions to corresponding variables
                if idx == 0:
                    prediction_hand1 = prediction
                elif idx == 1:
                    prediction_hand2 = prediction

            # Get the final prediction and display the text label for the first hand (label atas)
            if prediction_hand1 is not None:
                predicted_class_hand1 = active_actions[np.argmax(prediction_hand1)]
                proba_hand1 = np.max(prediction_hand1)  # Ambil nilai probabilitas tertinggi
                label_hand1 = f"{predicted_class_hand1.split(' ')[0]} ({proba_hand1:.2f})"  # Tambahkan probabilitas pada label

                # Get image dimensions
                image_height, image_width, _ = image_output.shape

                # Set the label position based on image dimensions
                label_position_x = 30
                label_position_y = int(0.1 * image_height)

                cv2.putText(image_output, label_hand1, (label_position_x, label_position_y),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

        # Hitung FPS
        frame_count += 1
        current_time = time.time()
        elapsed_time = current_time - start_time
        fps = frame_count / elapsed_time

        # Tampilkan nilai FPS pada layar
        cv2.putText(image_output, f"FPS: {fps:.2f}", (label_position_x, label_position_y + 40),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

        cv2.imshow('OpenCV feed', image_output)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()


In [None]:
import cv2
import mediapipe as mp
import serial
import time
from tensorflow.keras.models import load_model
import numpy as np

mp_face_detection = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

# # Konfigurasi Serial Port
# ser = serial.Serial('COM6', 9600)  # Ganti dengan port dan baudrate yang sesuai
# time.sleep(2)  # Beri waktu beberapa detik untuk memulai komunikasi

# Inisialisasi MediaPipe Face Detection
face_detection = mp_face_detection.FaceDetection()

# Inisialisasi MediaPipe Hand Detection
hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=2,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

# Load hand gesture recognition models
model1 = load_model('modelkata.tflite')
model2 = load_model('model.h5')

actions_model1 = np.array(['A', 'B', 'E', 'F', 'G', 'I', 'K', 'L', 'M', 'N', 'O', 'V', 'W', 'X', 'Y', 'Z'])
actions_model2 = np.array(['halo', 'kamu', 'nama', 'siapa'])

# Initial active model
active_model = None
active_actions = None

sequence_length = 30

cap = cv2.VideoCapture(0)  # Ganti dengan nomor video device yang sesuai

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)  # Mirror the image

    # Ubah gambar menjadi RGB untuk MediaPipe Hand Detection
    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Deteksi wajah menggunakan MediaPipe Face Detection
    results_face = face_detection.process(image_rgb)

    if results_face.detections:
        for detection in results_face.detections:
            # Ambil koordinat kotak pembatas wajah
            bbox = detection.location_data.relative_bounding_box
            x, y, w, h = int(bbox.xmin * frame.shape[1]), int(bbox.ymin * frame.shape[0]), \
                         int(bbox.width * frame.shape[1]), int(bbox.height * frame.shape[0])

#             # Kirim koordinat tengah wajah ke Arduino
#             string = 'X{0:d}Y{1:d}'.format(x + w // 2, y + h // 2)
#             print(string)
#             ser.write(string.encode('utf-8'))
#             time.sleep(0.2)

            # Gambar kotak pembatas wajah
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 3)
            # Plot titik tengah wajah
            cv2.circle(frame, (x + w // 2, y + h // 2), 2, (0, 255, 0), 2)

    # Deteksi tangan menggunakan MediaPipe Hand Detection
    results_hands = hands.process(image_rgb)

    if results_hands.multi_hand_landmarks:
        num_hands = len(results_hands.multi_hand_landmarks)

        for hand_landmarks in results_hands.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

        # Initialize variables to store predictions for each hand
        prediction_hand1 = None
        prediction_hand2 = None

        # Switch models based on number of hands detected
        if num_hands == 1:
            active_model = model1
            active_actions = actions_model1
        elif num_hands == 2:
            active_model = model2
            active_actions = actions_model2

        # Process each hand separately and get predictions
        for idx, hand_landmarks in enumerate(results_hands.multi_hand_landmarks):
            keypoints = np.array([[res.x, res.y, res.z] for res in hand_landmarks.landmark]).flatten()
            keypoints = np.expand_dims(keypoints, axis=0)
            keypoints = np.repeat(keypoints, sequence_length, axis=0)
            keypoints = np.expand_dims(keypoints, axis=0)

            prediction = active_model.predict(keypoints)[0]

            # Assign predictions to corresponding variables
            if idx == 0:
                prediction_hand1 = prediction
            elif idx == 1:
                prediction_hand2 = prediction

        # Get the final prediction and display the text label for the first hand (label atas)
        if prediction_hand1 is not None:
            predicted_class_hand1 = active_actions[np.argmax(prediction_hand1)]
            proba_hand1 = np.max(prediction_hand1)  # Ambil nilai probabilitas tertinggi
            label_hand1 = f"{predicted_class_hand1.split(' ')[0]} ({proba_hand1:.2f})"  # Tambahkan probabilitas pada label

            # Get image dimensions
            image_height, image_width, _ = frame.shape

            # Set the label position based on image dimensions
            label_position_x = 30
            label_position_y = int(0.1 * image_height)

            cv2.putText(frame, label_hand1, (label_position_x, label_position_y),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

    # Gambar area persegi di tengah layar
    cv2.rectangle(frame, (640 // 2 - 30, 480 // 2 - 30), (640 // 2 + 30, 480 // 2 + 30), (255, 255, 255), 3)

    # Hitung FPS
    cv2.putText(frame, f"FPS: {fps:.2f}", (label_position_x, label_position_y + 40),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

    cv2.imshow('Face Detection', frame)

    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
# ser.close()


In [None]:
import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model
import serial
import time
from playsound import playsound

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
mp_face_detection = mp.solutions.face_detection

model = load_model('model1.h5')

cap = cv2.VideoCapture(0)
sequence_length = 30
actions = np.array(['A','B','C','D','Delapan','E','Empat','Enam','F','G','H','halo','I','J','K','L','Lima','M','N','nama','O','Q','R','S','Sembilan','T','Tujuh','U','V','W','X','Y','Z'])

# Konfigurasi Serial Port
ser = serial.Serial('COM6', 9600)  # Ganti dengan port dan baudrate yang sesuai
time.sleep(2)  # Beri waktu beberapa detik untuk memulai komunikasi

def play_sound(file_path):
    playsound(file_path)

with mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=2,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as hands, \
        mp_face_detection.FaceDetection() as face_detection:

    while cap.isOpened():
        # Pendeteksian Bahasa Isyarat
        success, frame = cap.read()
        if not success:
            print("Tidak dapat membaca frame dari webcam.")
            break

        image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(image_rgb)

        image_output = frame.copy()

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                    image_output, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                    mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
                    mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2))

                keypoints = np.array([[res.x, res.y, res.z] for res in hand_landmarks.landmark]).flatten()
                keypoints = np.expand_dims(keypoints, axis=0)

                # Memastikan keypoints memiliki bentuk (1, sequence_length, feature_dim)
                keypoints = np.repeat(keypoints, sequence_length, axis=0)
                keypoints = np.expand_dims(keypoints, axis=0)

                prediction = model.predict(keypoints)[0]
                predicted_class = actions[np.argmax(prediction)]
                probabilities = prediction[np.argmax(prediction)]

                # Get status box
                cv2.rectangle(image_output, (0,0), (250, 60), (245, 117, 16), -1)
                cv2.putText(image_output, f'{predicted_class.split(" ")[0]} ({probabilities:.2f})', (90,40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

                # Play sound based on predicted class
                sound_file = f'{predicted_class.split(" ")[0]}.mp3'
                play_sound(sound_file)

        # Tampilkan hasil deteksi tangan di layar terpisah (Hand Detection Window)
        cv2.imshow('Hand Detection', image_output)

        # Pendeteksian Wajah
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.flip(frame, 1)  # Mirror the image

        # Ubah gambar menjadi RGB untuk MediaPipe
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Deteksi wajah menggunakan MediaPipe
        results = face_detection.process(frame_rgb)

        if results.detections:
            for detection in results.detections:
                # Ambil koordinat kotak pembatas wajah
                bbox = detection.location_data.relative_bounding_box
                x, y, w, h = int(bbox.xmin * frame.shape[1]), int(bbox.ymin * frame.shape[0]), \
                             int(bbox.width * frame.shape[1]), int(bbox.height * frame.shape[0])

#                 # Kirim koordinat tengah wajah ke Arduino
#                 string = 'X{0:d}Y{1:d}'.format(x - w//2, y - h//2)
#                 print(string)
#                 ser.write(string.encode('utf-8'))
#                 time.sleep(0.1)

                # Gambar kotak pembatas wajah
                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 3)
                # Plot titik tengah wajah
                cv2.circle(frame, (x + w//2, y + h//2), 2, (0, 255, 0), 2)

        # Gambar area persegi di tengah layar
        cv2.rectangle(frame, (640//2-30, 480//2-30), (640//2+30, 480//2+30), (255, 255, 255), 3)

        # Tampilkan hasil deteksi wajah di layar terpisah (Face Detection Window)
        cv2.imshow('Face Detection', frame)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
    ser.close()


In [1]:
import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model
import threading
import pyttsx3
import time

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
mp_face_detection = mp.solutions.face_detection

cap = cv2.VideoCapture(0)

# Initialize text-to-speech engine
engine = pyttsx3.init()

# Function to set the voice to Indonesian (Bahasa Indonesia)
def set_indonesian_voice(engine):
    voices = engine.getProperty('voices')
    for voice in voices:
        if "ID" in voice.id:
            engine.setProperty('voice', voice.id)
            break

set_indonesian_voice(engine)

sequence_length = 30

model1 = load_model('modelkata.tflite')
model2 = load_model('model.h5')

sequence_length = 30
actions_model1 = np.array(['A','B','E','F','G','I','K','L','M','N','O','V','W','X','Y','Z'])
actions_model2 = np.array(['halo', 'kamu', 'nama', 'siapa'])

# Initial active model
active_model = None
active_actions = None

frame_count = 0
start_time = time.time()

def text_to_speech(text):
    # Set properties (optional)
    engine.setProperty('rate', 100)  # Speed of speech
    engine.setProperty('volume', 5.0)  # Volume level (0.0 to 1.0)
    engine.say(text)
    engine.runAndWait()
    
def hand_sign_detection():
    # Deteksi tangan menggunakan MediaPipe
    with mp_hands.Hands(
        static_image_mode=False,
        max_num_hands=2,
        min_detection_confidence=0.5,
        min_tracking_confidence=0.5) as hands:
        
        while cap.isOpened():
            success, frame = cap.read()
            if not success:
                print("Tidak dapat membaca frame dari webcam.")
                break

            image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = hands.process(image_rgb)

            # Sisipkan kode pemrosesan hasil deteksi tangan di sini
            # ...
            image_output = frame.copy()

            if results.multi_hand_landmarks:
                num_hands = len(results.multi_hand_landmarks)

                for hand_landmarks in results.multi_hand_landmarks:
                    mp_drawing.draw_landmarks(
                        image_output, hand_landmarks, mp_hands.HAND_CONNECTIONS,)

                # Initialize variables to store predictions for each hand
                prediction_hand1 = None
                prediction_hand2 = None

                # Switch models based on number of hands detected
                if num_hands == 1:
                    active_model = model1
                    active_actions = actions_model1
                elif num_hands == 2:
                    active_model = model2
                    active_actions = actions_model2

                # Process each hand separately and get predictions
                for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
                    keypoints = np.array([[res.x, res.y, res.z] for res in hand_landmarks.landmark]).flatten()
                    keypoints = np.expand_dims(keypoints, axis=0)
                    keypoints = np.repeat(keypoints, sequence_length, axis=0)
                    keypoints = np.expand_dims(keypoints, axis=0)

                    prediction = active_model.predict(keypoints)[0]

                    # Assign predictions to corresponding variables
                    if idx == 0:
                        prediction_hand1 = prediction
                    elif idx == 1:
                        prediction_hand2 = prediction

                # Get the final prediction and display the text label for the first hand (label atas)
                if prediction_hand1 is not None:
                    predicted_class_hand1 = active_actions[np.argmax(prediction_hand1)]
                    proba_hand1 = np.max(prediction_hand1)  # Ambil nilai probabilitas tertinggi
                    label_hand1 = f"{predicted_class_hand1.split(' ')[0]} ({proba_hand1:.2f})"  # Tambahkan probabilitas pada label

                    # Get image dimensions
                    image_height, image_width, _ = image_output.shape

                    # Set the label position based on image dimensions
                    label_position_x = 30
                    label_position_y = int(0.1 * image_height)
                    
                    # Speak the detected hand gesture
                    text_to_speech(predicted_class_hand1.split(' ')[0])

                    cv2.putText(image_output, label_hand1, (label_position_x, label_position_y),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
                cv2.imshow('OpenCV feed', image_output)


            if cv2.waitKey(10) & 0xFF == ord('q'):
                break

def face_detection_worker():
    # Deteksi wajah menggunakan MediaPipe
    with mp_face_detection.FaceDetection() as face_detection:
        while True:
            ret, frame = cap.read()
            if not ret:
                break

            frame = cv2.flip(frame, 1)  # Mirror the image

            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = face_detection.process(frame_rgb)

            if results.detections:
                for detection in results.detections:
                    bboxC = detection.location_data.relative_bounding_box
                    ih, iw, _ = frame.shape
                    bbox = int(bboxC.xmin * iw), int(bboxC.ymin * ih), \
                           int(bboxC.width * iw), int(bboxC.height * ih)

                    # Sisipkan kode pemrosesan hasil deteksi wajah di sin
                
#                     #Kirim koordinat tengah wajah ke Arduino
#                     string = 'X{0:d}Y{1:d}'.format(x + w//2, y + h//2)
#                     print(string)
#                     ser.write(string.encode('utf-8'))
#                     time.sleep(0.2)

                    # Gambar kotak pembatas wajah
                    cv2.rectangle(frame, bbox, (0, 0, 255), 3)
                    # Plot titik tengah wajah
                    cv2.circle(frame, (bbox[0] + bbox[2]//2, bbox[1] + bbox[3]//2), 2, (0, 255, 0), 2)

            # Gambar area persegi di tengah layar
            cv2.rectangle(frame, (640//2-30, 480//2-30), (640//2+30, 480//2+30), (255, 255, 255), 3)

            cv2.imshow('Face Detection', frame)

            if cv2.waitKey(10) & 0xFF == ord('q'):
                break

# Jalankan thread untuk deteksi tangan
hand_thread = threading.Thread(target=hand_sign_detection)
hand_thread.start()

# Jalankan deteksi wajah
face_detection_worker()

# Pastikan untuk menunggu sampai thread selesai sebelum keluar
hand_thread.join()

cap.release()
cv2.destroyAllWindows()




In [None]:
#pakai gtts

import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model
import threading
from gtts import gTTS
from playsound import playsound

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
mp_face_detection = mp.solutions.face_detection

cap = cv2.VideoCapture(0)

# Function to set the voice to Indonesian (Bahasa Indonesia)
def set_indonesian_voice(engine):
    voices = engine.getProperty('voices')
    for voice in voices:
        if "ID" in voice.id:
            engine.setProperty('voice', voice.id)
            break

sequence_length = 30

model1 = load_model('modelkata.tflite')
model2 = load_model('model.h5')

sequence_length = 30
actions_model1 = np.array(['A','B','E','F','G','I','K','L','M','N','O','V','W','X','Y','Z'])
actions_model2 = np.array(['halo', 'kamu', 'nama', 'siapa'])

# Initial active model
active_model = None
active_actions = None

frame_count = 0
start_time = time.time()

def text_to_speech(text):
    # Generate speech using gTTS
    tts = gTTS(text=text, lang='id')  # Use 'id' for Indonesian language
    tts.save('output.mp3')  # Save the speech as an MP3 file

    # Play the speech using the playsound library
    playsound('output.mp3')

def hand_sign_detection():
    # Deteksi tangan menggunakan MediaPipe
    with mp_hands.Hands(
        static_image_mode=False,
        max_num_hands=2,
        min_detection_confidence=0.5,
        min_tracking_confidence=0.5) as hands:
        
        while cap.isOpened():
            success, frame = cap.read()
            if not success:
                print("Tidak dapat membaca frame dari webcam.")
                break

            image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = hands.process(image_rgb)

            # Sisipkan kode pemrosesan hasil deteksi tangan di sini
            # ...
            image_output = frame.copy()

            if results.multi_hand_landmarks:
                num_hands = len(results.multi_hand_landmarks)

                for hand_landmarks in results.multi_hand_landmarks:
                    mp_drawing.draw_landmarks(
                        image_output, hand_landmarks, mp_hands.HAND_CONNECTIONS,)

                # Initialize variables to store predictions for each hand
                prediction_hand1 = None
                prediction_hand2 = None

                # Switch models based on number of hands detected
                if num_hands == 1:
                    active_model = model1
                    active_actions = actions_model1
                elif num_hands == 2:
                    active_model = model2
                    active_actions = actions_model2

                # Process each hand separately and get predictions
                for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
                    keypoints = np.array([[res.x, res.y, res.z] for res in hand_landmarks.landmark]).flatten()
                    keypoints = np.expand_dims(keypoints, axis=0)
                    keypoints = np.repeat(keypoints, sequence_length, axis=0)
                    keypoints = np.expand_dims(keypoints, axis=0)

                    prediction = active_model.predict(keypoints)[0]

                    # Assign predictions to corresponding variables
                    if idx == 0:
                        prediction_hand1 = prediction
                    elif idx == 1:
                        prediction_hand2 = prediction

                # Get the final prediction and display the text label for the first hand (label atas)
                if prediction_hand1 is not None:
                    predicted_class_hand1 = active_actions[np.argmax(prediction_hand1)]
                    proba_hand1 = np.max(prediction_hand1)  # Ambil nilai probabilitas tertinggi
                    label_hand1 = f"{predicted_class_hand1.split(' ')[0]} ({proba_hand1:.2f})"  # Tambahkan probabilitas pada label

                    # Get image dimensions
                    image_height, image_width, _ = image_output.shape

                    # Set the label position based on image dimensions
                    label_position_x = 30
                    label_position_y = int(0.1 * image_height)
                    
                    # Speak the detected hand gesture
                    text_to_speech(predicted_class_hand1.split(' ')[0])

                    cv2.putText(image_output, label_hand1, (label_position_x, label_position_y),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
                cv2.imshow('OpenCV feed', image_output)


            if cv2.waitKey(10) & 0xFF == ord('q'):
                break

def face_detection_worker():
    # Deteksi wajah menggunakan MediaPipe
    with mp_face_detection.FaceDetection() as face_detection:
        while True:
            ret, frame = cap.read()
            if not ret:
                break

            frame = cv2.flip(frame, 1)  # Mirror the image

            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = face_detection.process(frame_rgb)

            if results.detections:
                for detection in results.detections:
                    bboxC = detection.location_data.relative_bounding_box
                    ih, iw, _ = frame.shape
                    bbox = int(bboxC.xmin * iw), int(bboxC.ymin * ih), \
                           int(bboxC.width * iw), int(bboxC.height * ih)

                    # Sisipkan kode pemrosesan hasil deteksi wajah di sin
                
                    # ...

                    # Gambar kotak pembatas wajah
                    cv2.rectangle(frame, bbox, (0, 0, 255), 3)
                    # Plot titik tengah wajah
                    cv2.circle(frame, (bbox[0] + bbox[2]//2, bbox[1] + bbox[3]//2), 2, (0, 255, 0), 2)

            # Gambar area persegi di tengah layar
            cv2.rectangle(frame, (640//2-30, 480//2-30), (640//2+30, 480//2+30), (255, 255, 255), 3)

            cv2.imshow('Face Detection', frame)

            if cv2.waitKey(10) & 0xFF == ord('q'):
                break

# Jalankan thread untuk deteksi tangan
hand_thread = threading.Thread(target=hand_sign_detection)
hand_thread.start()

# Jalankan deteksi wajah
face_detection_worker()

# Pastikan untuk menunggu sampai thread selesai sebelum keluar
hand_thread.join()

cap.release()
cv2.destroyAllWindows()
