<a href="https://colab.research.google.com/github/MaiKhoa0101/MachineLearning/blob/main/CODE_Detect.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install --upgrade mediapipe



In [None]:
"""
Sign Language Gesture Detection System
Phát hiện động tác ngón tay từ video và xuất JSON
"""

import cv2
import mediapipe as mp
import numpy as np
import pandas as pd
import json
import math


class SignLanguageDetector:
    def __init__(self, excel_path='detech.xlsx'):
        # MediaPipe Hands
        self.mp_hands = mp.solutions.hands
        self.hands = self.mp_hands.Hands(
            static_image_mode=False,
            max_num_hands=2,
            min_detection_confidence=0.7,
            min_tracking_confidence=0.7
        )
        self.mp_drawing = mp.solutions.drawing_utils

        # Load rules
        self.rules = self.load_rules(excel_path)

        # Finger landmarks
        self.finger_landmarks = {
            'thumb': [1, 2, 3, 4],
            'index': [5, 6, 7, 8],
            'middle': [9, 10, 11, 12],
            'ring': [13, 14, 15, 16],
            'pinky': [17, 18, 19, 20]
        }

        print("✅ Khởi tạo Sign Language Detector thành công!")

    # --------------------------------------------------
    # LOAD RULES
    # --------------------------------------------------
    def load_rules(self, excel_path):
        try:
            df = pd.read_excel(excel_path)
            rules = {}

            for _, row in df.iterrows():
                finger_name = str(row['Ten_bo_phan']).lower().strip()
                gesture = str(row['Dong_tac']).lower().strip()
                code = str(row['Ma_dong_tac']).strip()

                if 'cái' in finger_name:
                    finger_type = 'thumb'
                elif 'trỏ' in finger_name:
                    finger_type = 'index'
                elif 'giữa' in finger_name:
                    finger_type = 'middle'
                elif 'áp út' in finger_name or 'ap ut' in finger_name:
                    finger_type = 'ring'
                elif 'út' in finger_name:
                    finger_type = 'pinky'
                else:
                    continue

                hand_side = 'left' if 'trái' in finger_name else 'right'
                key = f"{finger_type}_{hand_side}"

                if key not in rules:
                    rules[key] = {}

                rules[key][self.normalize_gesture_name(gesture)] = code

            print(f"✅ Đã load {len(rules)} nhóm quy tắc từ Excel")
            return rules

        except Exception as e:
            print(f"❌ Lỗi khi đọc Excel: {e}")
            return {}

    def normalize_gesture_name(self, gesture):
        return (
            gesture.replace('duỗi', 'duoi')
                   .replace('chạm', 'cham')
                   .replace('út', 'ut')
                   .strip()
        )

    # --------------------------------------------------
    # UTILS
    # --------------------------------------------------
    def calculate_distance(self, p1, p2):
        return math.sqrt(
            (p1.x - p2.x) ** 2 +
            (p1.y - p2.y) ** 2 +
            (p1.z - p2.z) ** 2
        )

    # --------------------------------------------------
    # FINGER DETECTION
    # --------------------------------------------------
    def detect_thumb_gesture(self, landmarks, hand_side):
        thumb_tip = landmarks[4]
        index_mcp = landmarks[5]

        if hand_side == 'left':
            return 'duoi' if thumb_tip.x < index_mcp.x else 'thu'
        else:
            return 'duoi' if thumb_tip.x > index_mcp.x else 'thu'

    def detect_finger_gesture(self, landmarks, finger_type):
        indices = self.finger_landmarks[finger_type]

        wrist = landmarks[0]
        mcp = landmarks[indices[0]]
        tip = landmarks[indices[3]]

        dist_tip = self.calculate_distance(wrist, tip)
        dist_mcp = self.calculate_distance(wrist, mcp)

        if dist_tip > dist_mcp + 0.02:
            return 'duoi'
        elif dist_tip < dist_mcp - 0.02:
            return 'thu'
        else:
            return 'cong'

    # --------------------------------------------------
    # TOUCH DETECTION
    # --------------------------------------------------
    def detect_touching(self, landmarks, idx1, idx2, threshold=0.05):
        return self.calculate_distance(landmarks[idx1], landmarks[idx2]) < threshold

    # --------------------------------------------------
    # ANALYZE HAND
    # --------------------------------------------------
    def analyze_hand(self, landmarks, hand_label):
        hand_side = 'left' if hand_label == 'Left' else 'right'
        gestures = {}

        for finger, indices in self.finger_landmarks.items():
            if finger == 'thumb':
                gesture = self.detect_thumb_gesture(landmarks, hand_side)
            else:
                gesture = self.detect_finger_gesture(landmarks, finger)

            rule_key = f"{finger}_{hand_side}"
            if rule_key in self.rules and gesture in self.rules[rule_key]:
                gestures[finger] = {
                    'code': self.rules[rule_key][gesture],
                    'gesture': gesture,
                    'finger': finger,
                    'hand': hand_side
                }

        return gestures

    # --------------------------------------------------
    # VIDEO PROCESS
    # --------------------------------------------------
    def process_video(self, video_path, output_path='output.json', show_video=False):
        cap = cv2.VideoCapture(video_path)
        fps = cap.get(cv2.CAP_PROP_FPS)

        results = []
        frame_id = 0

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            res = self.hands.process(rgb)

            frame_data = {
                'frame': frame_id,
                'timestamp': round(frame_id / fps, 2),
                'gestures': []
            }

            if res.multi_hand_landmarks:
                for i, hand_lm in enumerate(res.multi_hand_landmarks):
                    label = res.multi_handedness[i].classification[0].label
                    gestures = self.analyze_hand(hand_lm.landmark, label)

                    for g in gestures.values():
                        frame_data['gestures'].append(g)

                    self.mp_drawing.draw_landmarks(
                        frame, hand_lm, self.mp_hands.HAND_CONNECTIONS
                    )

            if frame_data['gestures']:
                results.append(frame_data)

            if show_video:
                cv2.imshow("Sign Detection", frame)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

            frame_id += 1

        cap.release()
        cv2.destroyAllWindows()

        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, indent=2, ensure_ascii=False)

        print(f"✅ Hoàn thành – phát hiện {len(results)} frame có gesture")
        return results


# --------------------------------------------------
# MAIN
# --------------------------------------------------
if __name__ == "__main__":
    detector = SignLanguageDetector('detech.xlsx')
    detector.process_video('test_video.mp4', 'detected_gestures.json', show_video=False)


AttributeError: module 'mediapipe' has no attribute 'solutions'

In [None]:
pip install mediapipe opencv-python pandas numpy


In [None]:
import cv2
import mediapipe as mp
import numpy as np
import json
from math import atan2, degrees

# =======================
# UTILS
# =======================

def dist(a, b):
    return np.linalg.norm(np.array([a.x, a.y]) - np.array([b.x, b.y]))

def angle(a, b, c):
    ab = np.array([a.x - b.x, a.y - b.y])
    cb = np.array([c.x - b.x, c.y - b.y])
    rad = np.arccos(
        np.dot(ab, cb) /
        (np.linalg.norm(ab) * np.linalg.norm(cb) + 1e-6)
    )
    return degrees(rad)

# =======================
# FINGER DETECTOR
# =======================

class FingerGestureDetector:
    finger_map = {
        "thumb": [1, 2, 3, 4],
        "index": [5, 6, 7, 8],
        "middle": [9, 10, 11, 12],
        "ring": [13, 14, 15, 16],
        "pinky": [17, 18, 19, 20]
    }

    def detect_thumb(self, lm, hand):
        tip = lm[4]
        index_mcp = lm[5]

        if hand == "left":
            return "duoi" if tip.x < index_mcp.x else "thu"
        else:
            return "duoi" if tip.x > index_mcp.x else "thu"

    def detect_finger(self, lm, finger):
        ids = self.finger_map[finger]
        wrist = lm[0]
        mcp = lm[ids[0]]
        tip = lm[ids[-1]]

        if dist(wrist, tip) > dist(wrist, mcp) + 0.02:
            return "duoi"
        elif dist(wrist, tip) < dist(wrist, mcp) - 0.02:
            return "thu"
        else:
            return "cong"

    def analyze(self, lm, hand):
        result = {}
        for finger in self.finger_map:
            if finger == "thumb":
                result[finger] = self.detect_thumb(lm, hand)
            else:
                result[finger] = self.detect_finger(lm, finger)
        return result

# =======================
# HAND POSE
# =======================

class HandPoseDetector:
    def detect(self, lm):
        wrist = lm[0]
        index_mcp = lm[5]
        pinky_mcp = lm[17]

        dx = index_mcp.x - pinky_mcp.x
        dy = index_mcp.y - pinky_mcp.y
        ang = degrees(atan2(dy, dx))

        if abs(ang) < 20:
            return "sap_thang"
        elif ang > 20:
            return "nghieng_phai"
        else:
            return "nghieng_trai"

# =======================
# ARM DETECTOR
# =======================

class ArmAnalyzer:
    def detect(self, shoulder, elbow, wrist):
        ang = angle(shoulder, elbow, wrist)

        if ang > 160:
            return "duoi_thang"
        elif ang > 90:
            return "nghieng"
        else:
            return "co"

# =======================
# BODY POSITION
# =======================

class BodyPositionAnalyzer:
    def detect(self, wrist, nose):
        if wrist.y < nose.y - 0.05:
            return "tren"
        elif wrist.y > nose.y + 0.05:
            return "duoi"
        else:
            return "giua"

# =======================
# MAIN DETECTOR
# =======================

class SignLanguageDetector:
    def __init__(self):
        self.mp_holistic = mp.solutions.holistic
        self.holistic = self.mp_holistic.Holistic(
            min_detection_confidence=0.5,
            min_tracking_confidence=0.5
        )

        self.finger = FingerGestureDetector()
        self.hand_pose = HandPoseDetector()
        self.arm = ArmAnalyzer()
        self.body_pos = BodyPositionAnalyzer()

    def process_frame(self, frame):
        img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        res = self.holistic.process(img)

        output = []

        if res.pose_landmarks:
            pose = res.pose_landmarks.landmark
            nose = pose[self.mp_holistic.PoseLandmark.NOSE]

            # LEFT HAND
            if res.left_hand_landmarks:
                lm = res.left_hand_landmarks.landmark
                fingers = self.finger.analyze(lm, "left")
                pose_hand = self.hand_pose.detect(lm)

                arm_pose = self.arm.detect(
                    pose[self.mp_holistic.PoseLandmark.LEFT_SHOULDER],
                    pose[self.mp_holistic.PoseLandmark.LEFT_ELBOW],
                    pose[self.mp_holistic.PoseLandmark.LEFT_WRIST]
                )

                body_pos = self.body_pos.detect(
                    pose[self.mp_holistic.PoseLandmark.LEFT_WRIST],
                    nose
                )

                output.append({
                    "hand": "left",
                    "fingers": fingers,
                    "hand_pose": pose_hand,
                    "arm": arm_pose,
                    "body_position": body_pos
                })

            # RIGHT HAND
            if res.right_hand_landmarks:
                lm = res.right_hand_landmarks.landmark
                fingers = self.finger.analyze(lm, "right")
                pose_hand = self.hand_pose.detect(lm)

                arm_pose = self.arm.detect(
                    pose[self.mp_holistic.PoseLandmark.RIGHT_SHOULDER],
                    pose[self.mp_holistic.PoseLandmark.RIGHT_ELBOW],
                    pose[self.mp_holistic.PoseLandmark.RIGHT_WRIST]
                )

                body_pos = self.body_pos.detect(
                    pose[self.mp_holistic.PoseLandmark.RIGHT_WRIST],
                    nose
                )

                output.append({
                    "hand": "right",
                    "fingers": fingers,
                    "hand_pose": pose_hand,
                    "arm": arm_pose,
                    "body_position": body_pos
                })

        return output

# =======================
# VIDEO PROCESS
# =======================

def process_video(video_path):
    cap = cv2.VideoCapture(video_path)
    detector = SignLanguageDetector()

    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_id = 0
    results = []

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        gestures = detector.process_frame(frame)
        if gestures:
            results.append({
                "frame": frame_id,
                "timestamp": round(frame_id / fps, 2),
                "gestures": gestures
            })

        frame_id += 1

    cap.release()

    with open("detected_gestures.json", "w", encoding="utf-8") as f:
        json.dump(results, f, ensure_ascii=False, indent=2)

    print("✅ DONE – detected_gestures.json")

# =======================
# RUN
# =======================

if __name__ == "__main__":
    process_video("test_video.mp4")
