# MODEL

In [8]:
!pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.10.14-cp310-cp310-win_amd64.whl.metadata (9.9 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-win_amd64.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.14-cp310-cp310-win_amd64.whl (50.8 MB)
   ---------------------------------------- 50.8/50.8 MB 4.3 MB/s eta 0:00:00
Downloading sounddevice-0.5.1-py3-none-win_amd64.whl (363 kB)
Installing collected packages: sounddevice, mediapipe
Successfully installed mediapipe-0.10.14 sounddevice-0.5.1



[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [23]:
import tensorflow as tf
import os
import cv2
import numpy as np
import mediapipe as mp
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Input, Dense, Flatten
from tensorflow.keras.models import Model
tf.get_logger().setLevel('ERROR')

In [29]:
# Inisialisasi MediaPipe Hands untuk deteksi tangan
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.7)

# Fungsi untuk memuat gambar dan label dari folder dengan MediaPipe
def load_data_with_mediapipe(folder_path):
    images = []
    labels = []
    classes = sorted(os.listdir(folder_path))  # Daftar huruf A-Z

    for label, class_name in enumerate(classes):
        class_path = os.path.join(folder_path, class_name)
        
        if not os.path.isdir(class_path):
            continue  # Lewati file non-folder seperti desktop.ini jika ada
        
        for img_file in os.listdir(class_path):
            img_path = os.path.join(class_path, img_file)
            img = cv2.imread(img_path)
            if img is not None:
                # Gunakan MediaPipe untuk mendeteksi tangan
                rgb_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                result = hands.process(rgb_image)
                
                if result.multi_hand_landmarks:
                    # Mendapatkan bounding box tangan berdasarkan landmark
                    h, w, _ = img.shape
                    landmarks = result.multi_hand_landmarks[0]
                    x_min = int(min([lm.x for lm in landmarks.landmark]) * w)
                    x_max = int(max([lm.x for lm in landmarks.landmark]) * w)
                    y_min = int(min([lm.y for lm in landmarks.landmark]) * h)
                    y_max = int(max([lm.y for lm in landmarks.landmark]) * h)

                    # Crop ROI (Region of Interest) yang mengandung tangan
                    roi = img[y_min:y_max, x_min:x_max]
                    
                    # Resize ROI ke ukuran yang diharapkan oleh model
                    roi_resized = cv2.resize(roi, (300, 300))
                    images.append(roi_resized)
                    labels.append(label)  # Gunakan indeks folder sebagai label

    return np.array(images), np.array(labels)

# Muat dataset menggunakan fungsi dengan MediaPipe
train_dir = "DATASET/training"
val_dir = "DATASET/validation"
test_dir = "DATASET/test"

train_images, train_labels = load_data_with_mediapipe(train_dir)
val_images, val_labels = load_data_with_mediapipe(val_dir)
test_images, test_labels = load_data_with_mediapipe(test_dir)

# Normalisasi gambar
train_images = train_images / 255.0
val_images = val_images / 255.0
test_images = test_images / 255.0

In [30]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.callbacks import ReduceLROnPlateau

In [31]:
import os

def count_images_in_folder(folder_path):
    total_count = 0
    class_counts = {}

    # Cek apakah folder memiliki sub-folder (untuk kasus training dan validation)
    if any(os.path.isdir(os.path.join(folder_path, d)) for d in os.listdir(folder_path)):
        # Hitung jumlah gambar di setiap sub-folder (kelas)
        for class_name in sorted(os.listdir(folder_path)):
            class_path = os.path.join(folder_path, class_name)
            
            if not os.path.isdir(class_path):
                continue  # Lewati file non-folder
            
            num_images = len(os.listdir(class_path))
            class_counts[class_name] = num_images
            total_count += num_images
    else:
        # Jika tidak ada sub-folder, hitung jumlah file langsung
        total_count = len(os.listdir(folder_path))
        class_counts = None  # Tidak ada kelas yang terdefinisi dalam folder test

    return total_count, class_counts

# Tentukan path dataset
train_dir = "DATASET/training"
val_dir = "DATASET/validation"
test_dir = "DATASET/test"

# Hitung jumlah data pada setiap set
train_total, train_class_counts = count_images_in_folder(train_dir)
val_total, val_class_counts = count_images_in_folder(val_dir)
test_total, test_class_counts = count_images_in_folder(test_dir)

print("Total images in training set:", train_total)
print("Image count per class in training set:", train_class_counts)
print("\nTotal images in validation set:", val_total)
print("Image count per class in validation set:", val_class_counts)
print("\nTotal images in test set:", test_total)


Total images in training set: 494
Image count per class in training set: {'A': 19, 'B': 19, 'C': 19, 'D': 19, 'E': 19, 'F': 19, 'G': 19, 'H': 19, 'I': 19, 'J': 19, 'K': 19, 'L': 19, 'M': 19, 'N': 19, 'O': 19, 'P': 19, 'Q': 19, 'R': 19, 'S': 19, 'T': 19, 'U': 19, 'V': 19, 'W': 19, 'X': 19, 'Y': 19, 'Z': 19}

Total images in validation set: 78
Image count per class in validation set: {'A': 3, 'B': 3, 'C': 3, 'D': 3, 'E': 3, 'F': 3, 'G': 3, 'H': 3, 'I': 3, 'J': 3, 'K': 3, 'L': 3, 'M': 3, 'N': 3, 'O': 3, 'P': 3, 'Q': 3, 'R': 3, 'S': 3, 'T': 3, 'U': 3, 'V': 3, 'W': 3, 'X': 3, 'Y': 3, 'Z': 3}

Total images in test set: 26


In [32]:
# Mengatur direktori
train_dir = "DATASET/training"
val_dir = "DATASET/validation"
test_dir = "DATASET/test"

# Data Augmentation untuk training
train_datagen = ImageDataGenerator(
    rescale=1.0/255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

# Data untuk validasi dan testing
val_datagen = ImageDataGenerator(rescale=1.0/255)
test_datagen = ImageDataGenerator(rescale=1.0/255)

# Memuat data
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(300, 300),  # Sesuaikan dengan input_shape model Anda
    batch_size=32,
    class_mode='sparse',  # Gunakan 'sparse' untuk sparse categorical crossentropy
    shuffle=True
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(300, 300),
    batch_size=32,
    class_mode='sparse'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(300, 300),
    batch_size=32,
    class_mode='sparse'
)


Found 468 images belonging to 26 classes.
Found 52 images belonging to 26 classes.
Found 0 images belonging to 0 classes.


In [33]:
# Mengatur input shape dan model dasar
input_shape = (300, 300, 3)
base_model = MobileNetV2(input_shape=input_shape, include_top=False, weights="imagenet")

# Menambahkan layer baru
x = Flatten()(base_model.output)
x = Dropout(0.5)(x)  # Menambahkan Dropout
output = Dense(26, activation='softmax')(x)  # 26 kelas untuk huruf A-Z

# Membangun model
ssd_model = Model(inputs=base_model.input, outputs=output)

# Freeze beberapa layer dari model dasar
for layer in base_model.layers[:-20]:
    layer.trainable = False

# Compile model
ssd_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Learning rate scheduler
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6)


In [35]:
# Fit model
ssd_model.fit(train_generator,
               validation_data=val_generator,
               epochs=50,  # Sesuaikan jumlah epoch sesuai kebutuhan
               callbacks=[reduce_lr])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x2a65a08fcd0>

In [None]:
# Evaluasi model
test_loss, test_accuracy = ssd_model.evaluate(test_generator)
print(f"Test accuracy: {test_accuracy:.4f}, Test loss: {test_loss:.4f}")

In [26]:
# Simpan model
model_path = "MODEL/ssd_sibi_model_mediapipe.h5"
ssd_model.save(model_path)
print(f"Model berhasil disimpan di {model_path}")

Model berhasil disimpan di MODEL/ssd_sibi_model_mediapipe.h5


In [37]:
# Fungsi untuk testing gambar dengan MediaPipe
def test_with_mediapipe(image_path):
    img = cv2.imread(image_path)
    rgb_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    result = hands.process(rgb_image)
    
    if result.multi_hand_landmarks:
        # Mendapatkan ROI dan resize
        h, w, _ = img.shape
        landmarks = result.multi_hand_landmarks[0]
        x_min = int(min([lm.x for lm in landmarks.landmark]) * w)
        x_max = int(max([lm.x for lm in landmarks.landmark]) * w)
        y_min = int(min([lm.y for lm in landmarks.landmark]) * h)
        y_max = int(max([lm.y for lm in landmarks.landmark]) * h)
        
        roi = img[y_min:y_max, x_min:x_max]
        roi_resized = cv2.resize(roi, (300, 300)) / 255.0  # Normalisasi
        
        # Prediksi huruf dari gambar
        roi_resized = np.expand_dims(roi_resized, axis=0)
        prediction = ssd_model.predict(roi_resized)
        predicted_class = np.argmax(prediction, axis=1)
        return chr(predicted_class[0] + 65)  # Konversi ke huruf A-Z
    else:
        return "No hand detected"

# Contoh penggunaan testing
test_image_path = "DATASET/test/W (3).jpg"
predicted_letter = test_with_mediapipe(test_image_path)
print(f"Predicted letter: {predicted_letter}")

Predicted letter: L


# TESTING

In [None]:
cap = cv2.VideoCapture(0)  # 0 untuk webcam pertama

ssd_model = "MODEL/ssd_sibi_model.h5"

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Preprocessing frame
    input_frame = cv2.resize(frame, (300, 300))
    input_frame = np.expand_dims(input_frame, axis=0) / 255.0

    # Prediksi label huruf SIBI
    preds = ssd_model.predict(input_frame)
    pred_label = np.argmax(preds, axis=1)[0]  # Dapatkan label prediksi
    class_name = chr(pred_label + 65)  # Ubah indeks menjadi huruf A-Z

    # Tampilkan hasil pada frame
    cv2.putText(frame, f"Predicted: {class_name}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
    cv2.imshow("Real-Time SIBI Detection", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


In [None]:
import cv2
import mediapipe as mp
import numpy as np
import tensorflow as tf

# Load model SSD yang telah dilatih
model_path = "MODEL/ssd_sibi_model.h5"
ssd_model = tf.keras.models.load_model(model_path)

# Setup MediaPipe Hands
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

# Inisialisasi MediaPipe Hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.7)

cap = cv2.VideoCapture(0)  # 0 untuk webcam pertama

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Ubah frame ke RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Deteksi tangan dengan MediaPipe
    results = hands.process(rgb_frame)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Gambarkan landmark tangan
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Dapatkan bounding box dari posisi landmark
            h, w, c = frame.shape
            x_min = min([lm.x for lm in hand_landmarks.landmark]) * w
            y_min = min([lm.y for lm in hand_landmarks.landmark]) * h
            x_max = max([lm.x for lm in hand_landmarks.landmark]) * w
            y_max = max([lm.y for lm in hand_landmarks.landmark]) * h

            # Ekstrak ROI dari bounding box
            roi = frame[int(y_min):int(y_max), int(x_min):int(x_max)]

            # Resize ROI sesuai input model
            if roi.size > 0:
                roi_resized = cv2.resize(roi, (300, 300))
                roi_normalized = np.expand_dims(roi_resized / 255.0, axis=0)

                # Prediksi huruf dengan model SSD
                preds = ssd_model.predict(roi_normalized)
                pred_label = np.argmax(preds, axis=1)[0]
                class_name = chr(pred_label + 65)  # Ubah indeks ke huruf A-Z

                # Tampilkan prediksi di frame
                cv2.putText(frame, f"Predicted: {class_name}", (int(x_min), int(y_min) - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

    # Tampilkan frame
    cv2.imshow("Real-Time SIBI Detection with MediaPipe", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
hands.close()
