In [1]:
import os
import cv2
import pickle
import numpy as np
import mediapipe as mp
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, precision_score, recall_score, f1_score,accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split

****

**INITIALIZE**

In [None]:
DATASET_PATH='../create_dataset/dataset'

In [2]:
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

****

**TRAIN DATA**

In [None]:
DATA = []
LABELS = []
hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.4)
for label in os.listdir(DATASET_PATH):
    print('Dang xu ly class: ' + label)
    for img_file in os.listdir(os.path.join(DATASET_PATH, label)):
        img_path = os.path.join(DATASET_PATH, label, img_file)
        img = cv2.imread(img_path)
        # img = cv2.resize(img, (180, 180))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        result = hands.process(img_rgb)
        if result.multi_hand_landmarks:
            for hand_landmarks in result.multi_hand_landmarks:
                data_aux = []
                for lm in hand_landmarks.landmark:
                    data_aux.append(lm.x)
                    data_aux.append(lm.y)
                LABELS.append(label)
                DATA.append(data_aux)
        else:
            print(f'Không phát hiện tay: {img_path}')
hands.close()
# Lưu data
with open('MLP_data.pickle', 'wb') as f:
    pickle.dump({'data': DATA, 'labels': LABELS}, f)

****

**ĐÁNH GIÁ**

In [None]:
data_dict = pickle.load(open('MLP_data.pickle', 'rb'))
DATA = data_dict['data']
LABELS = data_dict['labels']

x_train, x_test, y_train, y_test = train_test_split(DATA, LABELS, test_size=0.3, shuffle=True, stratify=LABELS)

model = MLPClassifier(hidden_layer_sizes=(128, 64), max_iter=1000, random_state=42)
model.fit(x_train, y_train)

y_pred = model.predict(x_test)

*Độ chính xác*

In [None]:
print('Accuracy:', accuracy_score(y_test, y_pred) * 100, '%')

*Đánh giá từng chỉ số*

In [None]:
# Đánh giá từng chỉ số
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("🔸 Precision:", round(precision * 100, 2), "%")
print("🔸 Recall:", round(recall * 100, 2), "%")
print("🔸 F1 Score:", round(f1 * 100, 2), "%")

*Ma trận sai lầm*

In [None]:
# in ma trận sai lầm
with open('ConfusionMatrix.txt', 'w') as f:
    f.write(str(confusion_matrix(y_test, y_pred)))

*Ma trận sai lầm*

In [None]:
# báo cáo chi tiết theo từng lớp
with open('ClassificationReport.txt', 'w') as f:
    f.write(str(classification_report(y_test, y_pred)))

*Biểu đồ chính xác theo từng lớp*

In [None]:
cm = confusion_matrix(y_test, y_pred)
per_class_acc = cm.diagonal() / cm.sum(axis=1)

plt.figure(figsize=(10, 5))
plt.bar(np.unique(y_test), per_class_acc, color='skyblue')
plt.xlabel('Class Labels')
plt.ylabel('Accuracy')
plt.title('Per-class Accuracy')
plt.ylim([0, 1])
plt.grid(True, linestyle='--', alpha=0.5)
plt.show()


****

**SAVE MODEL**

In [None]:
# lưu model
with open('MLP_model.p', 'wb') as f:
    pickle.dump(model, f)

****

**TEST WITH CAMERA**

In [3]:
hands = mp_hands.Hands(static_image_mode=False, min_detection_confidence=0.7)
model = pickle.load(open('MLP_model.p', 'rb'))

num_classes = len(model.classes_)

cap = cv2.VideoCapture(0)

while True:
    x_ = []
    y_ = []
    ret, frame = cap.read()
    H, W, _ = frame.shape
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(frame,
                                      hand_landmarks,
                                      mp_hands.HAND_CONNECTIONS)

            data_aux = []
            for lm in hand_landmarks.landmark:
                data_aux.append(lm.x)
                data_aux.append(lm.y)
                x_.append(lm.x)
                y_.append(lm.y)

        x1 = int(min(x_) * W)
        y1 = int(min(y_) * H)
        x2 = int(max(x_) * W)
        y2 = int(max(y_) * H)

        prediction = model.predict([np.array(data_aux)])
        probs = model.predict_proba([np.asarray(data_aux)])[0]
        current_class = prediction[0]

        cv2.putText(frame, prediction[0], (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    panel_height = max(H, num_classes * 30)
    output_panel = np.ones((panel_height, 250, 3), dtype=np.uint8) * 255

    if results.multi_hand_landmarks:
        label_probs = list(zip(model.classes_, probs))
        label_probs.sort(key=lambda x: x[1], reverse=True)

        for idx, (label, prob) in enumerate(label_probs):
            y_pos = 50 + idx * 24
            cv2.putText(output_panel, f"{label}: {prob * 100:.2f}%", (10, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 0), 1)

    frame_resized = cv2.resize(frame, (W, panel_height))
    combined = np.hstack((frame_resized, output_panel))
    cv2.imshow('Realtime Hand Detection', combined)

    if cv2.waitKey(1) & 0xFF == 27:
        break

cap.release()
cv2.destroyAllWindows()