# Initialisation

In [1]:
# Initialisation des modules nécessaires
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import os
import cv2
import tensorflow as tf
import warnings
warnings.filterwarnings('ignore')
import mediapipe as mp
import pickle
import time

# Collecte du dataset

In [6]:
# Collecte du dataset
DATA_DIR = './dataset'
if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

# Définir le nombre de classes et la taille du dataset par classe
number_of_classes = 9
dataset_size = 100
labels_dict = {0: 'A', 1: 'C', 2: 'H', 3: "L", 4:"O", 5:"S", 6: "Oui", 7:"Non", 8:"Bonjour"}

# Initialiser la capture vidéo
cap = cv2.VideoCapture(0)

# Collecter les données pour chaque classe
for j in range(number_of_classes):
    class_dir = os.path.join(DATA_DIR, str(j))
    if not os.path.exists(class_dir):
        os.makedirs(class_dir)

    print(f'Collecte en cours pour : {labels_dict[j]}')

    done = False
    while True:
        ret, frame = cap.read()
        cv2.putText(frame, f'Pret? Presse "R" pour enregistrer ! (Cible : {labels_dict[j]})', (100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.imshow('frame', frame)
        if cv2.waitKey(25) == ord('r'):
            break

    # Capturer des images pour la classe courante
    counter = 0
    while counter < dataset_size:
        ret, frame = cap.read()
        cv2.imshow('frame', frame)
        cv2.waitKey(25)
        cv2.imwrite(os.path.join(class_dir, f'{counter}.jpg'), frame)
        counter += 1

    if cv2.waitKey(1) & 0xFF == ord('q'):
            break

# Libérer la capture vidéo et fermer toutes les fenêtres
cap.release()
cv2.destroyAllWindows()



Collecte en cours pour : A


KeyboardInterrupt: 

In [5]:
# Initialiser Mediapipe
DATA_DIR = './dataset'
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.5)

# Initialiser les variables pour les données et les étiquettes
data = []
labels = []

# Parcourir le dataset pour extraire les landmarks
for dir_ in os.listdir(DATA_DIR):
    for img_path in os.listdir(os.path.join(DATA_DIR, dir_)):
        data_aux = []
        x_ = []
        y_ = []

        img = cv2.imread(os.path.join(DATA_DIR, dir_, img_path))
        if img is None:
            print(f"Erreur : Impossible de lire l'image {os.path.join(DATA_DIR, dir_, img_path)}")
            continue
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        results = hands.process(img_rgb)
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    x_.append(x)
                    y_.append(y)

                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    data_aux.append(x - min(x_))
                    data_aux.append(y - min(y_))

                # Dessiner les landmarks sur l'image
                img_height, img_width, _ = img.shape
                for landmark in hand_landmarks.landmark:
                    cx, cy = int(landmark.x * img_width), int(landmark.y * img_height)
                    cv2.circle(img, (cx, cy), 5, (0, 255, 0), -1)

            data.append(data_aux)
            labels.append(dir_)

            # Afficher l'image avec les landmarks
            plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            plt.title(f'Classe {dir_} - Image {img_path}')
            plt.axis('off')
            plt.show()

# Sauvegarder les données dans un fichier pickle
with open('data.pickle', 'wb') as f:
    pickle.dump({'data': data, 'labels': labels}, f)

#Le fichier data.pickle sert à stocker les données prétraitées extraites des images du dataset. 
#Cela inclut les coordonnées normalisées des landmarks des mains détectées dans chaque image


I0000 00:00:1726325303.706025 2096152 gl_context.cc:357] GL version: 2.1 (2.1 ATI-5.5.17), renderer: AMD Radeon Pro 5500M OpenGL Engine
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1726325303.735403 2097767 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


NameError: name 'DATA_DIR' is not defined

# Entrainement du modèle de classification

In [3]:
# Charger les données
data_dict = pickle.load(open('data.pickle', 'rb'))
data = []
labels = []

expected_length = 42  # Nombre attendu de coordonnées x et y pour 21 points de repère

for d, label in zip(data_dict['data'], data_dict['labels']):
    if len(d) == expected_length:
        data.append(d)
        labels.append(label)
    else:
        pass

# Convertir les données en tableau NumPy
data = np.asarray(data)
labels = np.asarray(labels)

# Diviser les données en ensembles d'entraînement et de test
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)

# Initialiser et entraîner le modèle
model = RandomForestClassifier()
model.fit(x_train, y_train)

# Prédire et évaluer le modèle
y_predict = model.predict(x_test)
score = accuracy_score(y_predict, y_test)

print(f'{score * 100}% de réussite sur l\'entrainement de ce modele !')

# Sauvegarder le modèle
with open('model.p', 'wb') as f:
    pickle.dump({'model': model}, f)

#Le modèle est entraîné et évalué, il est sauvegardé dans un fichier . 
#Cela permet de réutiliser le modèle sans avoir à le réentraîner à chaque fois. 
#La sauvegarde du modèle économise donc du temps et des ressources computationnelles.

100.0% de réussite sur l'entrainement de ce modele !


# Test du modèle

In [3]:
# Charger le modèle
model_dict = pickle.load(open('./model.p', 'rb'))
model = model_dict['model']

# Initialiser la capture vidéo
cap = cv2.VideoCapture(0)

# Initialiser Mediapipe Hands
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

# Dictionnaire des étiquettes enregistrées
labels_dict = {0: 'A', 1: 'C', 2: 'H', 3: "L", 4:"O", 5:"S", 6: "Oui", 7:"Non", 8:"Bonjour"}

# Programme de captation
while True:
    data_aux = []
    x_ = []
    y_ = []

    ret, frame = cap.read()
    if not ret:
        continue

    H, W, _ = frame.shape
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS, mp_drawing_styles.get_default_hand_landmarks_style(), mp_drawing_styles.get_default_hand_connections_style())

            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y
                x_.append(x)
                y_.append(y)

            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y
                data_aux.append(x - min(x_))
                data_aux.append(y - min(y_))

            x1 = int(min(x_) * W) - 10
            y1 = int(min(y_) * H) - 10
            x2 = int(max(x_) * W) - 10
            y2 = int(max(y_) * H) - 10

            try:
                prediction = model.predict([np.asarray(data_aux)])
                predicted_character = labels_dict[int(prediction[0])]

                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)
                cv2.putText(frame, predicted_character, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3, cv2.LINE_AA)
            except:
                continue

    cv2.imshow('frame', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

I0000 00:00:1726752834.970478 2738878 gl_context.cc:357] GL version: 2.1 (2.1 ATI-5.5.17), renderer: AMD Radeon Pro 5500M OpenGL Engine
W0000 00:00:1726752834.988283 2740310 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1726752835.009881 2740310 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


# Mini jeu : Le pendu

In [4]:
# Charger le modèle
model_dict = pickle.load(open('./model.p', 'rb'))
model = model_dict['model']

# Initialiser la capture vidéo
cap = cv2.VideoCapture(0)

# Initialiser Mediapipe Hands
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

# Dictionnaire des étiquettes
labels_dict = {0: 'A', 1: 'C', 2: 'H', 3: "L", 4: "O", 5: "S", 6: "Oui", 7: "Non", 8: "Bonjour"}

# Mot à deviner
word_to_guess = "HALO"
word_found = ["_"] * len(word_to_guess)
vies = 5
letters_attempted = set()
previous_letter = ""
stability_time = 0
stability_duration = 1  # Durée de stabilité pour valider une lettre en secondes
cross_start_time = 0
cross_duration = 2  # Durée d'affichage de la croix en secondes

# Programme du jeu
while True:
    data_aux = []
    x_ = []
    y_ = []

    ret, frame = cap.read()
    if not ret:
        continue

    H, W, _ = frame.shape
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS, mp_drawing_styles.get_default_hand_landmarks_style(), mp_drawing_styles.get_default_hand_connections_style())

            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y
                x_.append(x)
                y_.append(y)

            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y
                data_aux.append(x - min(x_))
                data_aux.append(y - min(y_))

            try:
                prediction = model.predict([np.asarray(data_aux)])
                predicted_character = labels_dict[int(prediction[0])]

                if predicted_character == previous_letter:
                    stability_time += 1
                else:
                    previous_letter = predicted_character
                    stability_time = 0

                if stability_time >= stability_duration * 30:  # 30 frames par second 
                    if predicted_character not in letters_attempted :
                        letters_attempted.add(predicted_character)
                        if predicted_character in word_to_guess:
                            for i, char in enumerate(word_to_guess):
                                if char == predicted_character:
                                    word_found[i] = predicted_character
                        else:
                            vies -= 1
                            cross_start_time = time.time()

                    temps_stabilite = 0

                x1 = int(min(x_) * W) - 10
                y1 = int(min(y_) * H) - 10
                x2 = int(max(x_) * W) - 10
                y2 = int(max(y_) * H) - 10

                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)
                cv2.putText(frame, predicted_character, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3, cv2.LINE_AA)
            except:
                continue

    mot_affiche = " ".join(word_found)
    cv2.putText(frame, mot_affiche, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (255, 255, 255), 3, cv2.LINE_AA)
    cv2.putText(frame, f'Vies restantes: {vies}', (10, 100), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (255, 255, 255), 3, cv2.LINE_AA)

    if time.time() - cross_start_time < cross_duration:
        cv2.putText(frame, 'X', (W - 50, H - 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 4, cv2.LINE_AA)

    cv2.imshow('frame', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    # Condition de victoire
    if "_" not in word_found:
        cv2.putText(frame, "Victoire !", (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 255, 0), 3, cv2.LINE_AA)
        cv2.imshow('frame', frame)
        cv2.waitKey(5000)
        break 

    # Condition de défaite
    if vies <= 0:
        cv2.putText(frame, "Perdu !", (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 255), 3, cv2.LINE_AA)
        cv2.imshow('frame', frame)
        cv2.waitKey(5000)
        break

cap.release()
cv2.destroyAllWindows()

I0000 00:00:1726752855.741478 2738878 gl_context.cc:357] GL version: 2.1 (2.1 ATI-5.5.17), renderer: AMD Radeon Pro 5500M OpenGL Engine
W0000 00:00:1726752855.751474 2740854 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1726752855.767658 2740859 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


# Detection de la silouette

In [5]:
import cv2
import mediapipe as mp

# Initialiser 
cap = cv2.VideoCapture(0)

mp_pose = mp.solutions.pose
mp_hands = mp.solutions.hands
mp_face_mesh = mp.solutions.face_mesh
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5, min_tracking_confidence=0.5)
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5, min_tracking_confidence=0.5)

while True:
    ret, frame = cap.read()
    if not ret:
        continue

    # Convertir l'image en RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Traiter l'image et détecter les points clés
    results_pose = pose.process(frame_rgb)
    results_hands = hands.process(frame_rgb)
    results_face = face_mesh.process(frame_rgb)

    # Dessiner les points clés et les connexions sur l'image
    if results_pose.pose_landmarks:
        mp_drawing.draw_landmarks(
            frame, 
            results_pose.pose_landmarks, 
            mp_pose.POSE_CONNECTIONS, 
            landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()
        )

    if results_hands.multi_hand_landmarks:
        for hand_landmarks in results_hands.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                frame, 
                hand_landmarks, 
                mp_hands.HAND_CONNECTIONS, 
                landmark_drawing_spec=mp_drawing_styles.get_default_hand_landmarks_style(),
                connection_drawing_spec=mp_drawing_styles.get_default_hand_connections_style()
            )

    if results_face.multi_face_landmarks:
        for face_landmarks in results_face.multi_face_landmarks:
            mp_drawing.draw_landmarks(
                frame, 
                face_landmarks, 
                mp_face_mesh.FACEMESH_TESSELATION, 
                landmark_drawing_spec=None, 
                connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_tesselation_style()
            )
            mp_drawing.draw_landmarks(
                frame, 
                face_landmarks, 
                mp_face_mesh.FACEMESH_CONTOURS, 
                landmark_drawing_spec=None, 
                connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_contours_style()
            )
            # Vérifier si les points de l'iris sont visibles avant de les dessiner
            if len(face_landmarks.landmark) > 468:
                mp_drawing.draw_landmarks(
                    frame, 
                    face_landmarks, 
                    mp_face_mesh.FACEMESH_IRISES, 
                    landmark_drawing_spec=None, 
                    connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_iris_connections_style()
                )

    # Afficher l'image
    cv2.imshow('Pose, Hands et Face Detection', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break


cap.release()
cv2.destroyAllWindows()


I0000 00:00:1726753081.940780 2738878 gl_context.cc:357] GL version: 2.1 (2.1 ATI-5.5.17), renderer: AMD Radeon Pro 5500M OpenGL Engine
I0000 00:00:1726753081.947852 2738878 gl_context.cc:357] GL version: 2.1 (2.1 ATI-5.5.17), renderer: AMD Radeon Pro 5500M OpenGL Engine
W0000 00:00:1726753081.967003 2746172 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1726753081.968292 2738878 gl_context.cc:357] GL version: 2.1 (2.1 ATI-5.5.17), renderer: AMD Radeon Pro 5500M OpenGL Engine
W0000 00:00:1726753081.971935 2746192 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1726753081.977295 2746190 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1726753081.983771 2746172 inferenc