In [1]:
!pip install numpy pandas scikit-learn tensorflow opencv-python mediapipe joblib




In [2]:
import cv2
import mediapipe as mp
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
import joblib

In [3]:
class handDetector():
    def __init__(self, mode=False, maxHands=2, detectionCon=0.5, trackCon=0.5):
        self.mode = mode
        self.maxHands = maxHands
        self.detectionCon = detectionCon
        self.trackCon = trackCon

        self.mpHands = mp.solutions.hands
        self.hands = self.mpHands.Hands(static_image_mode=self.mode, max_num_hands=self.maxHands,
                                        min_detection_confidence=self.detectionCon, min_tracking_confidence=self.trackCon)
        self.mpDraw = mp.solutions.drawing_utils

    def findHands(self, img, draw=True):
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.results = self.hands.process(imgRGB)

        if self.results.multi_hand_landmarks:
            for handLms in self.results.multi_hand_landmarks:
                if draw:
                    self.mpDraw.draw_landmarks(img, handLms,
                                               self.mpHands.HAND_CONNECTIONS)
        return img

    def findPosition(self, img, handNo=0, draw=True):
        lmList = []
        if self.results.multi_hand_landmarks:
            myHand = self.results.multi_hand_landmarks[handNo]
            for id, lm in enumerate(myHand.landmark):
                h, w, c = img.shape
                cx, cy = int(lm.x * w), int(lm.y * h)
                lmList.append([id, cx, cy])
                if draw:
                    cv2.circle(img, (cx, cy), 5, (255, 0, 255), cv2.FILLED)
        return lmList

    def normalize_hand(self, img, lmList):
        if not lmList:
            return None
        x_min = min([lm[1] for lm in lmList])
        y_min = min([lm[2] for lm in lmList])
        x_max = max([lm[1] for lm in lmList])
        y_max = max([lm[2] for lm in lmList])

        # Ensure coordinates are within the image boundaries
        x_min = max(0, x_min)
        y_min = max(0, y_min)
        x_max = min(img.shape[1], x_max)
        y_max = min(img.shape[0], y_max)

        if x_max > x_min and y_max > y_min:
            hand_img = img[y_min:y_max, x_min:x_max]
            standard_size = (200, 200)
            normalized_hand_img = cv2.resize(hand_img, standard_size, interpolation=cv2.INTER_AREA)
            return normalized_hand_img
        else:
            return None

In [4]:
# Paso 4: Función para recopilar datos de puntos de referencia de manos
def collect_data(detector, cap, num_samples_per_label=1000, output_file='hand_landmarks_0_to_5.csv'):
    data = []
    for label in range(6):  # Etiquetas del 0 al 5
        print(f"Mostrando el número {label}")
        count = 0
        while count < num_samples_per_label:
            success, img = cap.read()
            if not success:
                continue
            img = detector.findHands(img)
            lmList = detector.findPosition(img, draw=False)
            normalized_hand = detector.normalize_hand(img, lmList)
            if normalized_hand is not None and len(lmList) == 21:
                lm_flattened = [coord for lm in lmList for coord in lm[1:]]
                data.append(lm_flattened + [label])
                count += 1
            cv2.imshow("Image", img)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    columns = [f'x{i}' for i in range(21)] + [f'y{i}' for i in range(21)] + ['label']
    df = pd.DataFrame(data, columns=columns)
    df.to_csv(output_file, index=False)
    cap.release()
    cv2.destroyAllWindows()


In [5]:
# Recopilar datos
if __name__ == "__main__":
    detector = handDetector(detectionCon=0.75)
    cap = cv2.VideoCapture(0)
    collect_data(detector, cap, num_samples_per_label=1000, output_file='hand_landmarks_0_to_5.csv')

Mostrando el número 0




Mostrando el número 1
Mostrando el número 2
Mostrando el número 3
Mostrando el número 4
Mostrando el número 5


error: OpenCV(4.9.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\resize.cpp:4152: error: (-215:Assertion failed) !ssize.empty() in function 'cv::resize'


In [None]:
# Paso 5: Entrenar el modelo de red neuronal
def train_model(data_file='hand_landmarks_0_to_5.csv'):
    data = pd.read_csv(data_file)
    X = data.drop(['label'], axis=1).values
    y = data['label'].values

    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    y = tf.keras.utils.to_categorical(y, num_classes=6)  # 6 clases para números del 0 al 5

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = Sequential([
        Dense(512, activation='relu', input_shape=(X_train.shape[1],)),
        Dropout(0.5),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(6, activation='softmax')  # 6 clases para números del 0 al 5
    ])

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    model.fit(X_train, y_train, epochs=50, validation_data=(X_test, y_test))

    model.save('hand_gesture_model_0_to_5.h5')
    joblib.dump(scaler, 'scaler.pkl')
    return model, scaler

In [None]:
# Entrenar el modelo
if __name__ == "__main__":
    model, scaler = train_model(data_file='hand_landmarks_0_to_5.csv')