<a href="https://colab.research.google.com/github/Nofryntii/sibi/blob/main/gesture_recognizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install mediapipe

In [None]:
!pip install --upgrade pip
!pip install mediapipe-model-maker

In [None]:
import os
import pathlib
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import sklearn
import cv2
import mediapipe as mp
from mediapipe_model_maker.python.vision import gesture_recognizer
from mediapipe.tasks.python.vision.gesture_recognizer import GestureRecognizer
from mediapipe.framework.formats import landmark_pb2


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
dataset_root = pathlib.Path("/content/drive/MyDrive/Sibi")

In [None]:
import utils

In [None]:
# Menampilkan label dari dataset
labels = [p.name for p in dataset_root.iterdir() if p.is_dir()]
print(f"Classes: {labels}")

# Mendapatkan daftar file gambar
train_files = utils.find_images(dataset_root)
print(f"Number of training images: {len(train_files)}")


In [None]:
def check_and_convert_images(dataset_path):
    for root, dirs, files in os.walk(dataset_path):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                img_path = os.path.join(root, file)
                img = cv2.imread(img_path)
                if img is not None:
                    # Konversi ke RGB
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    # Resize gambar ke ukuran yang diinginkan
                    img = cv2.resize(img, (192, 192))
                    # Simpan kembali
                    cv2.imwrite(img_path, cv2.cvtColor(img, cv2.COLOR_RGB2BGR))

# Jalankan fungsi konversi
check_and_convert_images(dataset_root)

In [None]:
import os
import cv2

def check_and_convert_images(dataset_path):
    for root, dirs, files in os.walk(dataset_path):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                img_path = os.path.join(root, file)

                # Membaca gambar
                img = cv2.imread(img_path)

                if img is not None:
                    # Cek jumlah channel gambar
                    if len(img.shape) == 2:  # Jika gambar grayscale (1 channel)
                        print(f"Gambar grayscale terdeteksi: {img_path}")
                        # Konversi gambar grayscale menjadi RGB
                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
                    elif img.shape[2] == 3:  # Jika gambar berwarna (3 channel)
                        # Konversi dari BGR ke RGB
                        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                    # Resize gambar ke ukuran yang diinginkan
                    img = cv2.resize(img, (192, 192))

                    # Simpan kembali gambar dengan format BGR
                    cv2.imwrite(img_path, cv2.cvtColor(img, cv2.COLOR_RGB2BGR))

# Jalankan fungsi konversi
dataset_root = "/content/drive/MyDrive/Sibi"
check_and_convert_images(dataset_root)


In [None]:
def verify_dataset_structure(dataset_path):
    # Pastikan folder dataset ada
    if not os.path.exists(dataset_path):
        raise ValueError(f"Dataset path {dataset_path} tidak ditemukan")

    # Variabel untuk menghitung total gambar
    total_images = 0

    # Hitung jumlah gambar per kelas
    for class_name in os.listdir(dataset_path):
        class_path = os.path.join(dataset_path, class_name)
        if os.path.isdir(class_path):
            n_images = len([f for f in os.listdir(class_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))])
            print(f"Kelas {class_name}: {n_images} gambar")
            # Tambahkan jumlah gambar ke total
            total_images += n_images

    # Tampilkan total keseluruhan gambar
    print(f"\nTotal keseluruhan gambar: {total_images} gambar")

# Menjalankan fungsi untuk memeriksa dataset
verify_dataset_structure(dataset_root)


In [None]:
import pathlib
import shutil
import numpy as np
import os

def split_dataset(input_root, output_root, splits="80:20", seed=None):
    if seed is not None:
        np.random.seed(seed)

    input_root = pathlib.Path(input_root)
    output_root = pathlib.Path(output_root)

    # Membagi persentase menjadi angka
    split_percentages = [int(s) for s in splits.split(':')]

    # Memastikan persentase berjumlah 100
    assert sum(split_percentages) == 100, "Total persentase harus 100"

    for labelpath in input_root.iterdir():
        if not labelpath.is_dir():
            continue

        files = sorted(labelpath.iterdir())
        np.random.shuffle(files)

        # Hitung jumlah gambar untuk setiap split
        total_files = len(files)
        train_size = int(total_files * (split_percentages[0] / 100))
        test_size = total_files - train_size

        # Membagi dataset
        subsets = {
            'train': files[:train_size],
            'test': files[train_size:]
        }

        # Menyimpan dataset ke folder output
        for split_name, subset_files in subsets.items():
            subset_root = output_root / split_name / labelpath.name
            subset_root.mkdir(parents=True, exist_ok=True)
            for file in subset_files:
                shutil.copy(file, subset_root)

        print(f"Kelas {labelpath.name} - Train: {train_size}, Test: {test_size}")

# Menjalankan fungsi split dengan 80% untuk training dan 20% untuk testing
output_root = "/content/processed_data"
split_dataset(dataset_root, output_root, splits="80:20", seed=42)


In [None]:
def split_dataset(input_root, output_root, splits="80:20", seed=None):
    np.random.seed(seed)
    input_root, output_root = pathlib.Path(input_root), pathlib.Path(output_root)
    split_percentages = [int(s) for s in splits.split(':')]
    assert sum(split_percentages) == 100, "Total persentase harus 100"

    for labelpath in input_root.iterdir():
        if labelpath.is_dir():
            files = sorted(labelpath.iterdir())
            np.random.shuffle(files)
            train_size = int(len(files) * (split_percentages[0] / 100))
            subsets = {'train': files[:train_size], 'test': files[train_size:]}

            for split_name, subset_files in subsets.items():
                (output_root / split_name / labelpath.name).mkdir(parents=True, exist_ok=True)
                for file in subset_files:
                    shutil.copy(file, output_root / split_name / labelpath.name)

            print(f"Kelas {labelpath.name} - Train: {train_size}, Test: {len(files) - train_size}")

# Usage
split_dataset(dataset_root, "/content/processed_data", splits="80:20", seed=42)


In [None]:
train_data = gesture_recognizer.Dataset.from_folder(str(dataset_root))
train_data.gen_tf_dataset().unbatch().save("/content/train_data")

In [None]:
data_root = pathlib.Path("/content/processed_data")

In [None]:
import pathlib
import numpy as np

import utils

data_root = pathlib.Path("./processed_data")
dataset_train = data_root / "train"
trainfiles = utils.find_images(dataset_train)

sample_files = np.random.choice(np.asarray(trainfiles), 10)
fig, axarr = utils.plot_image_files(sample_files, ncols=5)
fig.savefig("example-output.jpg", dpi=150, bbox_inches="tight")

In [None]:
from mediapipe_model_maker.python.vision import gesture_recognizer

handparams = gesture_recognizer.HandDataPreprocessingParams(
    min_detection_confidence=0.5
)

dataset_train = data_root / "train"
data = gesture_recognizer.Dataset.from_folder(str(dataset_train), handparams)
train_data, validation_data = data.split(0.8)

dataset_test = data_root / "test"
test_data = gesture_recognizer.Dataset.from_folder(
    str(dataset_test), handparams
)


In [None]:
trainfiles = utils.find_images(dataset_train)

sample_files = np.random.choice(np.asarray(trainfiles), 10)
fig, axarr = utils.plot_image_files(sample_files, ncols=5)
fig.savefig("outputdataset.jpg", dpi=150, bbox_inches="tight")

In [None]:
hparams = gesture_recognizer.HParams(
    export_dir="exported_model",
    batch_size=32,
    epochs=100,
    shuffle=True,
    learning_rate=0.001,
    lr_decay=0.95,
)
moptions = gesture_recognizer.ModelOptions(dropout_rate=0.05)
options = gesture_recognizer.GestureRecognizerOptions(
    hparams=hparams, model_options=moptions
)

model = gesture_recognizer.GestureRecognizer.create(
    train_data=train_data, validation_data=validation_data, options=options
)


In [None]:
loss, acc = model.evaluate(test_data, batch_size=1)
print(f"Test loss: {loss:.4f}, Test accuracy: {acc:.2%}")

In [None]:
model.export_model("model.task")

In [None]:
dataset_root = pathlib.Path("/content/processed_data/test")
testfiles = list(dataset_root.glob("**/*.jpg"))  # Semua file JPG dalam dataset

In [None]:
filename = np.random.choice(testfiles)

In [None]:
print(len(testfiles), testfiles[:10])  # Periksa jumlah dan contoh file gambar

In [None]:
import mediapipe as mp
from mediapipe.tasks.python.vision.gesture_recognizer import GestureRecognizer

base_options = mp.tasks.BaseOptions(
    model_asset_path=hparams.export_dir + "/model.task"
)
options = mp.tasks.vision.GestureRecognizerOptions(
    base_options=base_options, running_mode=mp.tasks.vision.RunningMode.IMAGE
)

with GestureRecognizer.create_from_options(options) as recognizer:
    mp_image = mp.Image.create_from_file(str(filename))
    result = recognizer.recognize(mp_image)


In [None]:
test_samples = np.random.choice(np.asarray(testfiles), 10)

with GestureRecognizer.create_from_options(options) as recognizer:
    fig, axarr = utils.plot_recognizer_predictions(test_samples, recognizer, 5)
fig.savefig("example-output.jpg", dpi=150, bbox_inches="tight")

In [None]:
!pip install tqdm

In [None]:
from tqdm import tqdm  # Tambahkan ini jika belum ada

test_results = []
with mp.tasks.vision.GestureRecognizer.create_from_options(options) as recognizer:
    for filename in tqdm(testfiles, desc="Processing test files"):
        mp_image = mp.Image.create_from_file(str(filename))
        result = recognizer.recognize(mp_image)
        if len(result.gestures) > 0:
            pred = result.gestures[0][0].category_name or "n/a"
        else:
            pred = "empty"
        test_results.append((filename, filename.parent.name, pred))

# Convert to DataFrame
results_df = pd.DataFrame(test_results, columns=["filename", "label", "pred"])


In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import numpy as np
import sklearn

# Menentukan urutan kelas
classes = sorted(test_data.label_names + ["n/a", "empty"])

# Menghitung confusion matrix tanpa normalisasi
cm = sklearn.metrics.confusion_matrix(
    results_df["label"], results_df["pred"], labels=classes
)

# Membuat objek ConfusionMatrixDisplay
disp = sklearn.metrics.ConfusionMatrixDisplay(cm, display_labels=classes)

# Membuat figure dan axis
fig, ax = plt.subplots()  # Menyesuaikan ukuran untuk visualisasi lebih baik

# Plot confusion matrix tanpa grid
disp.plot(include_values=False, cmap="Blues", ax=ax)
ax.grid(False)
ax.set_facecolor("white")

# Menambahkan nilai pada sel, hanya jika tidak nol, dengan warna abu-abu
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        if cm[i, j] != 0:  # Menampilkan nilai hanya jika bukan nol
            ax.text(
                j, i, f"{cm[i, j]}",
                ha="center", va="center", color="gold", fontsize=8
            )

# Menyimpan plot ke file dan menampilkannya
plt.savefig("confusion_matrix_filtered_gray_text.png", dpi=150, bbox_inches="tight")
plt.show()


In [None]:
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score

# Menambahkan kolom 'result' untuk mengevaluasi apakah prediksi benar atau salah
results_df["result"] = results_df["pred"] == results_df["label"]

# Menghitung precision, recall, dan f1-score untuk masing-masing kelas
report = classification_report(
    results_df["label"],
    results_df["pred"],
    labels=results_df["label"].unique(),  # Memastikan semua label muncul di laporan
    zero_division=0
)

print("Classification Report:")
print(report)


In [None]:
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score
import pandas as pd

# Menambahkan kolom 'result' untuk mengevaluasi apakah prediksi benar atau salah
results_df["result"] = results_df["pred"] == results_df["label"]

# Menentukan urutan kelas dari A-Y
classes = sorted(results_df["label"].unique())  # Mengurutkan kelas dari A hingga Y

# Menghitung precision, recall, dan f1-score untuk masing-masing kelas
report = classification_report(
    results_df["label"],
    results_df["pred"],
    labels=classes,  # Menambahkan urutan kelas
    zero_division=0
)

print("Classification Report:")
print(report)

# Menambahkan nilai rata-rata precision, recall, f1-score
precision_avg = precision_score(results_df["label"], results_df["pred"], average='macro', zero_division=0)
recall_avg = recall_score(results_df["label"], results_df["pred"], average='macro', zero_division=0)
f1_avg = f1_score(results_df["label"], results_df["pred"], average='macro', zero_division=0)

print("\nAverage Metrics:")
print(f"Precision (average): {precision_avg:.4f}")
print(f"Recall (average): {recall_avg:.4f}")
print(f"F1 Score (average): {f1_avg:.4f}")


In [None]:
# Mengelompokkan hasil prediksi
results_df["result"] = np.where(
    results_df.pred == results_df.label,
    "correct",
    np.where(results_df.pred.isin(["empty", "n/a"]), "not found", "incorrect")
)

# Mengatur urutan kategori untuk kolom 'result'
results_df["result"] = pd.Categorical(
    results_df["result"],
    categories=["not found", "incorrect", "correct"],
    ordered=True
)

# Membuat urutan kategori dari A hingga Y
label_order = sorted(results_df["label"].unique())  # Menyortir label secara alfabetis

# Mengubah kolom 'label' menjadi kategori dengan urutan yang sudah ditentukan
results_df["label"] = pd.Categorical(results_df["label"], categories=label_order, ordered=True)

# Atur gaya dan tema seaborn
sns.set_theme(style="whitegrid")
sns.set_palette("pastel")

# Membuat histogram dengan seaborn
plt.figure(figsize=(12, 8))
ax = sns.histplot(
    data=results_df,
    x="label",
    hue="result",
    multiple="stack",
    stat="count",
    palette={"correct": "mediumseagreen", "incorrect": "coral", "not found": "gray"},  # Urutan warna sesuai kategori
    legend=True
)

# Menambahkan judul dan label
plt.title("Prediction Results by Label", fontsize=16)
plt.xlabel("Labels", fontsize=14)
plt.ylabel("Count", fontsize=14)

# Menampilkan legenda secara eksplisit
plt.legend(
    title="Result",
    title_fontsize=14,
    fontsize=12,
    loc="upper right",
    labels=["Correct", "Incorrect", "Not Found"]  # Disesuaikan dengan urutan kategori
)

# Menyimpan grafik
plt.savefig("prediction_results_with_ordered_labels.png", bbox_inches="tight")
plt.show()


In [None]:
results_df.query("result == 'not found'").groupby(
    "label"
).pred.value_counts().sort_values(ascending=False)

In [None]:
train_ds = train_data.gen_tf_dataset(batch_size=train_data.size)
xy = train_ds.take(1).get_single_element()

embeddings, classes_onehot = xy[0].numpy(), xy[1].numpy()  # type: ignore
class_indices = np.argmax(classes_onehot, axis=1)

print(embeddings.shape, class_indices.shape)
# -> (1861, 128) (1861,)

In [None]:
import sklearn.manifold

tsne = sklearn.manifold.TSNE()
emb = tsne.fit_transform(embeddings)


In [None]:
import seaborn as sns
import pandas as pd

embdf = pd.DataFrame(emb, columns=["X1", "X2"]).assign(label=class_indices)
sns.scatterplot(
    data=embdf, x="X1", y="X2", hue="label", palette="Spectral", legend=False
)
for i, c in enumerate(train_data.label_names):
    if np.all(class_indices != i):
        continue
    center = emb[class_indices == i].mean(axis=0)
    plt.annotate(c, center, center - 6)
    plt.savefig("result.png")


In [None]:
import seaborn as sns

results_df["result"] = np.where(
    results_df.pred == results_df.label,
    "correct",
    np.where(results_df.pred.isin(["n/a", "empty"]), "not found", "incorrect"),
)
print(results_df.result.value_counts(normalize=True))
sns.histplot(
    data=results_df, x="label", hue="result", multiple="stack", stat="count"
)

In [None]:
import seaborn as sns
import pandas as pd

embdf = pd.DataFrame(emb, columns=["X1", "X2"]).assign(label=class_indices)
sns.scatterplot(
    data=embdf, x="X1", y="X2", hue="label", palette="Spectral", legend=False
)
for i, c in enumerate(train_data.label_names):
    if np.all(class_indices != i):
        continue
    center = emb[class_indices == i].mean(axis=0)
    plt.annotate(c, center, center - 6)
    plt.savefig("result.png")
