In [2]:
# Block 1: Import necessary libraries
import os
import cv2
import numpy as np
import shutil
from ultralytics import YOLO
import time

# Check if libraries are properly imported
print("Libraries imported successfully!")

Libraries imported successfully!


In [3]:
# Block 2: Define dataset path and check if it exists
dataset_path = r"D:\objdetection_imp_model\Image"
if os.path.exists(dataset_path):
    print(f"Dataset path found: {dataset_path}")
    # List a few sample directories to verify
    sample_dirs = os.listdir(dataset_path)[:5]  # First 5 directories
    print(f"Sample directories: {sample_dirs}")
else:
    print(f"Warning: Dataset path not found: {dataset_path}")
    # You might need to update the path

Dataset path found: D:\objdetection_imp_model\Image
Sample directories: ['A', 'B', 'C', 'D', 'E']


In [4]:
# Block 3: Define the HandGestureRecognition class
class HandGestureRecognition:
    def __init__(self, dataset_path, output_path='runs/detect'):
        self.dataset_path = dataset_path
        self.output_path = output_path
        self.model = None
        self.class_names = [chr(i + ord('A')) for i in range(26)]  # A to Z
        self.roi_x, self.roi_y = 100, 100
        self.roi_width, self.roi_height = 300, 300
        print(f"HandGestureRecognition initialized with classes: {self.class_names}")


In [5]:
# Block 4: Create an instance of the class
hgr = HandGestureRecognition(dataset_path)
print("HandGestureRecognition instance created")

HandGestureRecognition initialized with classes: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
HandGestureRecognition instance created


In [7]:
# Block 5: Add the prepare_dataset method to the class
def prepare_dataset(self, yaml_file="hand_gestures.yaml"):
    """
    Prepares dataset in YOLOv8 format.
    YOLOv8 requires images and labels in specific format.
    """
    # Create directory structure
    dataset_dir = "hand_gestures_dataset"
    if os.path.exists(dataset_dir):
        shutil.rmtree(dataset_dir)
    
    os.makedirs(f"{dataset_dir}/train/images", exist_ok=True)
    os.makedirs(f"{dataset_dir}/train/labels", exist_ok=True)
    os.makedirs(f"{dataset_dir}/val/images", exist_ok=True)
    os.makedirs(f"{dataset_dir}/val/labels", exist_ok=True)
    
    # Create YAML file for dataset
    with open(yaml_file, "w") as f:
        f.write(f"path: {os.path.abspath(dataset_dir)}\n")
        f.write("train: train/images\n")
        f.write("val: val/images\n")
        f.write("nc: 26\n")  # Number of classes (A-Z)
        f.write(f"names: {self.class_names}\n")
    
    print("Converting dataset to YOLOv8 format...")
    
    # Process each alphabet folder
    processed_images = 0
    for idx, alphabet in enumerate(self.class_names):
        folder_path = os.path.join(self.dataset_path, alphabet)
        if not os.path.exists(folder_path):
            print(f"Warning: Folder for {alphabet} not found at {folder_path}")
            continue
            
        images = [f for f in os.listdir(folder_path) if f.endswith(('.jpg', '.jpeg', '.png'))]
        print(f"Processing {alphabet}: {len(images)} images found")
        
        # Split into train (80%) and validation (20%) sets
        split_idx = int(len(images) * 0.8)
        train_images = images[:split_idx]
        val_images = images[split_idx:]
        
        # Process training images
        for img_file in train_images:
            img_path = os.path.join(folder_path, img_file)
            img = cv2.imread(img_path)
            if img is None:
                continue
                
            # Create label file (YOLOv8 format: class_id center_x center_y width height)
            dest_img_path = f"{dataset_dir}/train/images/{alphabet}_{img_file}"
            dest_label_path = f"{dataset_dir}/train/labels/{alphabet}_{os.path.splitext(img_file)[0]}.txt"
            
            # Copy image
            shutil.copy(img_path, dest_img_path)
            
            # Create label (assuming object is centered and takes up ~70% of image)
            with open(dest_label_path, "w") as f:
                f.write(f"{idx} 0.5 0.5 0.7 0.7\n")
            
            processed_images += 1
        
        # Process validation images (similar to training)
        for img_file in val_images:
            img_path = os.path.join(folder_path, img_file)
            img = cv2.imread(img_path)
            if img is None:
                continue
                
            dest_img_path = f"{dataset_dir}/val/images/{alphabet}_{img_file}"
            dest_label_path = f"{dataset_dir}/val/labels/{alphabet}_{os.path.splitext(img_file)[0]}.txt"
            
            # Copy image
            shutil.copy(img_path, dest_img_path)
            
            # Create label
            with open(dest_label_path, "w") as f:
                f.write(f"{idx} 0.5 0.5 0.7 0.7\n")
            
            processed_images += 1
    
    print(f"Dataset prepared successfully. Total processed images: {processed_images}")
    print(f"YAML file created at {yaml_file}")
    return yaml_file

# Add the method to the class
HandGestureRecognition.prepare_dataset = prepare_dataset

In [8]:
# Block 6: Prepare the dataset
yaml_file = hgr.prepare_dataset()
print(f"Dataset preparation complete. YAML file: {yaml_file}")

Converting dataset to YOLOv8 format...
Processing A: 101 images found
Processing B: 101 images found
Processing C: 101 images found
Processing D: 101 images found
Processing E: 101 images found
Processing F: 101 images found
Processing G: 101 images found
Processing H: 101 images found
Processing I: 101 images found
Processing J: 101 images found
Processing K: 101 images found
Processing L: 101 images found
Processing M: 101 images found
Processing N: 101 images found
Processing O: 101 images found
Processing P: 101 images found
Processing Q: 101 images found
Processing R: 101 images found
Processing S: 101 images found
Processing T: 101 images found
Processing U: 101 images found
Processing V: 101 images found
Processing W: 101 images found
Processing X: 101 images found
Processing Y: 101 images found
Processing Z: 101 images found
Dataset prepared successfully. Total processed images: 2626
YAML file created at hand_gestures.yaml
Dataset preparation complete. YAML file: hand_gestures.

In [11]:
# Block 7: Add the train_model method to the class
def train_model(self, yaml_file, epochs=35, img_size=640, batch_size=16):
    """Train YOLOv8 model on the prepared dataset"""
    print(f"Starting model training with {epochs} epochs...")
    
    # Initialize YOLOv8 model
    model = YOLO('yolov8n.pt')  # Start with pretrained nano model
    
    # Train the model
    results = model.train(
        data=yaml_file,
        epochs=epochs,
        imgsz=img_size,
        batch=batch_size,
        patience=10,  # Early stopping
        name='hand_gesture_model'
    )
    
    print(f"Training completed. Model saved at {self.output_path}/hand_gesture_model")
    
    # Load the best model
    self.model = YOLO(f'{self.output_path}/hand_gesture_model/weights/best.pt')
    return self.model

# Add the method to the class
HandGestureRecognition.train_model = train_model


In [13]:
# Block 8: Train the model (you can adjust epochs for faster testing)
# Uncomment this block when you're ready to train
model = hgr.train_model(yaml_file, epochs=35)  # Reduce epochs for testing
print("Model training complete")

Starting model training with 35 epochs...
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:01<00:00, 5.92MB/s]


Ultralytics 8.3.91  Python-3.10.11 torch-2.6.0+cpu CPU (AMD Ryzen 7 5700U with Radeon Graphics)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=hand_gestures.yaml, epochs=35, time=None, patience=10, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=hand_gesture_model, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=T

[34m[1mtrain: [0mScanning D:\objdetection_imp_model\hand_gestures_dataset\train\labels... 2080 images, 0 backgrounds, 0 corrupt: 100%|██████████| 2080/2080 [00:03<00:00, 534.75it/s]


[34m[1mtrain: [0mNew cache created: D:\objdetection_imp_model\hand_gestures_dataset\train\labels.cache


[34m[1mval: [0mScanning D:\objdetection_imp_model\hand_gestures_dataset\val\labels... 546 images, 0 backgrounds, 0 corrupt: 100%|██████████| 546/546 [00:01<00:00, 497.54it/s]


[34m[1mval: [0mNew cache created: D:\objdetection_imp_model\hand_gestures_dataset\val\labels.cache
Plotting labels to runs\detect\hand_gesture_model\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000333, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added 
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns\detect\hand_gesture_model[0m
Starting training for 35 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/35         0G      1.329      4.171      1.834         40        640: 100%|██████████| 130/130 [09:11<00:00,  4.24s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:51<00:00,  2.83s/it]

                   all        546        546      0.323      0.417      0.199      0.163






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/35         0G      0.665      2.972        1.2         37        640: 100%|██████████| 130/130 [09:30<00:00,  4.39s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:51<00:00,  2.85s/it]

                   all        546        546      0.495      0.739       0.67      0.602






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/35         0G     0.5607      2.211      1.118         42        640: 100%|██████████| 130/130 [09:29<00:00,  4.38s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:48<00:00,  2.69s/it]

                   all        546        546      0.685      0.835      0.854      0.813






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/35         0G     0.4998      1.756       1.07         40        640: 100%|██████████| 130/130 [09:31<00:00,  4.40s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:47<00:00,  2.66s/it]

                   all        546        546      0.859      0.957      0.979      0.904






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/35         0G     0.4455       1.45      1.028         47        640: 100%|██████████| 130/130 [09:19<00:00,  4.30s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:48<00:00,  2.68s/it]

                   all        546        546      0.939      0.926      0.986      0.952






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/35         0G     0.4007      1.253      1.002         40        640: 100%|██████████| 130/130 [09:25<00:00,  4.35s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:49<00:00,  2.74s/it]

                   all        546        546      0.925      0.957      0.988      0.953






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/35         0G     0.3814      1.122     0.9877         36        640: 100%|██████████| 130/130 [09:21<00:00,  4.32s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:48<00:00,  2.71s/it]

                   all        546        546      0.965      0.988      0.991      0.964






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/35         0G     0.3737      1.046     0.9857         45        640: 100%|██████████| 130/130 [09:21<00:00,  4.32s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:48<00:00,  2.70s/it]

                   all        546        546      0.971      0.984      0.994      0.981






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/35         0G     0.3413     0.9848     0.9693         37        640: 100%|██████████| 130/130 [09:20<00:00,  4.31s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:49<00:00,  2.74s/it]

                   all        546        546      0.979      0.992      0.995      0.986






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/35         0G     0.3269      0.901     0.9617         42        640: 100%|██████████| 130/130 [09:27<00:00,  4.37s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:49<00:00,  2.77s/it]

                   all        546        546      0.989      0.988      0.992      0.974






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/35         0G     0.3144     0.8536     0.9572         49        640: 100%|██████████| 130/130 [09:25<00:00,  4.35s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:51<00:00,  2.85s/it]

                   all        546        546      0.983      0.993      0.995      0.975






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/35         0G     0.2949     0.8082     0.9411         44        640: 100%|██████████| 130/130 [09:21<00:00,  4.32s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:50<00:00,  2.83s/it]

                   all        546        546      0.952      0.979      0.995      0.963






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/35         0G     0.2938     0.7647     0.9387         53        640: 100%|██████████| 130/130 [09:27<00:00,  4.36s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:51<00:00,  2.85s/it]

                   all        546        546      0.978      0.992      0.995      0.983






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/35         0G      0.278     0.7481     0.9323         38        640: 100%|██████████| 130/130 [09:22<00:00,  4.33s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:53<00:00,  2.96s/it]

                   all        546        546      0.989      0.991      0.995      0.967






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/35         0G     0.2811     0.7224     0.9392         39        640: 100%|██████████| 130/130 [09:26<00:00,  4.35s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:53<00:00,  2.96s/it]

                   all        546        546      0.982      0.995      0.995      0.986






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/35         0G     0.2651     0.7185     0.9264         38        640: 100%|██████████| 130/130 [09:23<00:00,  4.33s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:52<00:00,  2.94s/it]

                   all        546        546      0.958      0.996      0.994      0.987






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/35         0G     0.2571     0.6828     0.9208         40        640: 100%|██████████| 130/130 [09:16<00:00,  4.28s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:52<00:00,  2.89s/it]

                   all        546        546      0.985      0.991      0.995      0.989






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/35         0G     0.2512     0.6355      0.917         32        640: 100%|██████████| 130/130 [09:23<00:00,  4.34s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:52<00:00,  2.91s/it]

                   all        546        546      0.985      0.994      0.995      0.993






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/35         0G      0.241      0.612     0.9217         42        640: 100%|██████████| 130/130 [09:15<00:00,  4.27s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:53<00:00,  2.95s/it]

                   all        546        546      0.986      0.999      0.995      0.994






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/35         0G     0.2352     0.6029     0.9152         42        640: 100%|██████████| 130/130 [09:16<00:00,  4.28s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:53<00:00,  2.98s/it]

                   all        546        546      0.991      0.996      0.995      0.994






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      21/35         0G     0.2234     0.5918     0.9069         40        640: 100%|██████████| 130/130 [09:13<00:00,  4.26s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:53<00:00,  2.95s/it]

                   all        546        546      0.992      0.993      0.995      0.992






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      22/35         0G     0.2267     0.5696     0.9084         44        640: 100%|██████████| 130/130 [09:20<00:00,  4.31s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:54<00:00,  3.01s/it]

                   all        546        546      0.972      0.994      0.995      0.992






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      23/35         0G     0.2118     0.5345     0.8975         45        640: 100%|██████████| 130/130 [09:41<00:00,  4.48s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [01:06<00:00,  3.68s/it]

                   all        546        546      0.994      0.993      0.995      0.992






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      24/35         0G     0.2085     0.5261     0.8995         45        640: 100%|██████████| 130/130 [11:00<00:00,  5.08s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:56<00:00,  3.14s/it]

                   all        546        546      0.988      0.986      0.995      0.994






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      25/35         0G     0.2086     0.5263     0.8993         41        640: 100%|██████████| 130/130 [10:36<00:00,  4.90s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:54<00:00,  3.01s/it]

                   all        546        546      0.993      0.993      0.995      0.994





Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      26/35         0G     0.2422     0.4757     0.9161         16        640: 100%|██████████| 130/130 [09:20<00:00,  4.31s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:52<00:00,  2.91s/it]

                   all        546        546      0.981      0.998      0.995      0.994






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      27/35         0G     0.2059     0.4155     0.8863         16        640: 100%|██████████| 130/130 [09:16<00:00,  4.28s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:51<00:00,  2.88s/it]

                   all        546        546      0.989      0.995      0.995      0.995






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      28/35         0G     0.1925     0.3918     0.8751         16        640: 100%|██████████| 130/130 [09:22<00:00,  4.33s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:53<00:00,  2.98s/it]

                   all        546        546      0.994      0.997      0.995      0.995






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      29/35         0G     0.1873     0.3768     0.8791         16        640: 100%|██████████| 130/130 [09:20<00:00,  4.31s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:52<00:00,  2.94s/it]

                   all        546        546      0.993      0.996      0.995      0.995






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      30/35         0G     0.1777     0.3617     0.8629         16        640: 100%|██████████| 130/130 [09:15<00:00,  4.27s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:53<00:00,  2.95s/it]

                   all        546        546      0.994      0.997      0.995      0.995






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      31/35         0G     0.1712      0.353     0.8531         16        640: 100%|██████████| 130/130 [09:17<00:00,  4.29s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:51<00:00,  2.88s/it]

                   all        546        546      0.994      0.996      0.995      0.995






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      32/35         0G     0.1586     0.3384     0.8423         16        640: 100%|██████████| 130/130 [09:22<00:00,  4.33s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:51<00:00,  2.89s/it]


                   all        546        546      0.993      0.998      0.995      0.995

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      33/35         0G     0.1534     0.3314     0.8451         16        640: 100%|██████████| 130/130 [09:30<00:00,  4.39s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:55<00:00,  3.08s/it]

                   all        546        546      0.995      0.998      0.995      0.995






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      34/35         0G     0.1484      0.323     0.8323         16        640: 100%|██████████| 130/130 [09:23<00:00,  4.33s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:52<00:00,  2.92s/it]


                   all        546        546      0.995      0.998      0.995      0.995

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      35/35         0G     0.1419     0.3162     0.8321         16        640: 100%|██████████| 130/130 [09:23<00:00,  4.33s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:48<00:00,  2.72s/it]


                   all        546        546      0.993      0.999      0.995      0.995

35 epochs completed in 6.029 hours.
Optimizer stripped from runs\detect\hand_gesture_model\weights\last.pt, 6.2MB
Optimizer stripped from runs\detect\hand_gesture_model\weights\best.pt, 6.2MB

Validating runs\detect\hand_gesture_model\weights\best.pt...
Ultralytics 8.3.91  Python-3.10.11 torch-2.6.0+cpu CPU (AMD Ryzen 7 5700U with Radeon Graphics)
Model summary (fused): 72 layers, 3,010,718 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:37<00:00,  2.08s/it]


                   all        546        546      0.993      0.999      0.995      0.995
                     A         21         21          1      0.965      0.995      0.995
                     B         21         21      0.993          1      0.995      0.995
                     C         21         21      0.993          1      0.995      0.995
                     D         21         21      0.993          1      0.995      0.995
                     E         21         21      0.992          1      0.995      0.995
                     F         21         21      0.993          1      0.995      0.995
                     G         21         21      0.998          1      0.995      0.995
                     H         21         21      0.993          1      0.995      0.995
                     I         21         21      0.994          1      0.995      0.995
                     J         21         21      0.993          1      0.995      0.995
                     

In [3]:
import os

# Check if model file exists
model_path = 'runs/detect/hand_gesture_model/weights/best.pt'
if os.path.exists(model_path):
    print(f"Model file found at: {model_path}")
else:
    print(f"Model file not found at: {model_path}")

Model file found at: runs/detect/hand_gesture_model/weights/best.pt


In [4]:
# Try to load the model
try:
    from ultralytics import YOLO
    model = YOLO('runs/detect/hand_gesture_model/weights/best.pt')
    print("Model loaded successfully!")
    # You can print some model information
    print(f"Model type: {type(model)}")
    print(f"Model architecture: {model.names}")
except Exception as e:
    print(f"Error loading model: {e}")
    

Model loaded successfully!
Model type: <class 'ultralytics.models.yolo.model.YOLO'>
Model architecture: {0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I', 9: 'J', 10: 'K', 11: 'L', 12: 'M', 13: 'N', 14: 'O', 15: 'P', 16: 'Q', 17: 'R', 18: 'S', 19: 'T', 20: 'U', 21: 'V', 22: 'W', 23: 'X', 24: 'Y', 25: 'Z'}


In [None]:
import cv2
import numpy as np
import time
from datetime import datetime
import torch
from ultralytics import YOLO

class HandGestureRecognition:
    def __init__(self, model_path):
        self.model = YOLO(model_path)
        self.class_names = {0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I', 9: 'J', 10: 'K', 11: 'L', 12: 'M', 13: 'N', 14: 'O', 15: 'P', 16: 'Q', 17: 'R', 18: 'S', 19: 'T', 20: 'U', 21: 'V', 22: 'W', 23: 'X', 24: 'Y', 25: 'Z'}
    
    def real_time_detection(self):
        cap = cv2.VideoCapture(0)
        cap.set(3, 1280)  # Set width to 1280
        cap.set(4, 720)   # Set height to 720

        if not cap.isOpened():
            print("Error: Could not open webcam.")
            return
        
        print("Press 'q' to quit. Press 'Enter' to add the word to the sentence. Press 'Space' to add a space.")
        
        prev_time = 0
        gesture_history = []
        current_word = []  
        sentence = ""
        last_prediction = None  
        confidence_threshold = 0.65
        debounce_time = 0.5
        last_prediction_time = time.time()
        
        while True:
            ret, frame = cap.read()
            if not ret:
                continue
            
            frame = cv2.flip(frame, 1)
            screen_height, screen_width = frame.shape[:2]

            # ROI settings
            roi_width, roi_height = 300, 300
            roi_start_x, roi_start_y = screen_width - roi_width - 50, (screen_height - roi_height) // 2
            roi_end_x, roi_end_y = roi_start_x + roi_width, roi_start_y + roi_height
            
            roi = frame[roi_start_y:roi_end_y, roi_start_x:roi_end_x]
            results = self.model(roi, verbose=False)
            
            predicted_label, predicted_confidence = None, 0
            if results and len(results) > 0:
                for result in results:
                    boxes = result.boxes
                    if len(boxes) > 0:
                        confidences = boxes.conf
                        best_idx = np.argmax(confidences)
                        cls_id = int(boxes.cls[best_idx].item())
                        predicted_confidence = confidences[best_idx].item()
                        predicted_label = self.class_names[cls_id]

            if predicted_label and predicted_confidence >= confidence_threshold and time.time() - last_prediction_time > debounce_time:
                last_prediction_time = time.time()
                last_prediction = predicted_label
                if len(gesture_history) >= 5:
                    gesture_history.pop(0)
                gesture_history.append(predicted_label)
                if len(current_word) == 0 or (current_word and current_word[-1] != predicted_label):
                    current_word.append(predicted_label)
            
            # Overlay for ROI with stable display
            overlay = frame.copy()
            cv2.rectangle(overlay, (roi_start_x, roi_start_y), (roi_end_x, roi_end_y), (255, 200, 150), -1)
            alpha = 0.4
            cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0, frame)
            cv2.rectangle(frame, (roi_start_x, roi_start_y), (roi_end_x, roi_end_y), (0, 0, 255), 2)

            # Key events for sentence formation
            key = cv2.waitKey(1) & 0xFF
            if key == ord(' '):
                if current_word:
                    sentence += ''.join(current_word) + " "
                    current_word = []
            elif key == 13:
                if current_word:
                    sentence += ''.join(current_word) + " "
                    current_word = []
            elif key == ord('q'):
                break
            
            # FPS Calculation
            curr_time = time.time()
            fps = 1 / (curr_time - prev_time) if curr_time - prev_time > 0 else 0
            prev_time = curr_time
            
            # Display info
            cv2.rectangle(frame, (20, screen_height - 120), (screen_width - 20, screen_height - 20), (0, 0, 0), -1)
            cv2.putText(frame, f'Current Word: {"".join(current_word)}', (30, screen_height - 90), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
            cv2.putText(frame, f'Sentence: {sentence.strip()}', (30, screen_height - 50), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
            cv2.putText(frame, f'FPS: {int(fps)}', (roi_start_x + 10, roi_end_y + 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
            
            # Display datetime in non-overlapping area
            datetime_text = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            cv2.putText(frame, datetime_text, (30, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            
            cv2.imshow("Hand Gesture Recognition", frame)
        
        cap.release()
        cv2.destroyAllWindows()

model_path = "runs/detect/hand_gesture_model/weights/best.pt"
hgr = HandGestureRecognition(model_path)
hgr.real_time_detection()


Press 'q' to quit. Press 'Enter' to add the word to the sentence. Press 'Space' to add a space.


In [None]:
from ultralytics import YOLO
import os
from pathlib import Path

# Paths
teacher_model_path = "runs/detect/hand_gesture_model/weights/best.pt"
student_model_path = "yolov8n.pt"  # Smaller model
images_dir = "D:\objdetection_imp_model\hand_gestures_dataset\train\images"  # Replace with actual path
distilled_labels_dir = "distilled_labels/"
data_yaml = "hand_gestures.yaml"  # Replace with your dataset YAML

# Load models
teacher_model = YOLO(teacher_model_path)
student_model = YOLO(student_model_path)

# Step 1: Generate pseudo-labels using teacher model
os.makedirs(distilled_labels_dir, exist_ok=True)

print("\nGenerating pseudo-labels using teacher model...")
for img_path in Path(images_dir).glob("*.jpg"):
    results = teacher_model(img_path)
    results.save_txt(save_dir=distilled_labels_dir)

print("Pseudo-labels generated and saved.")

# Step 2: Optional - Replace original labels with distilled labels
# (This step depends on your dataset structure)
# shutil.copytree(distilled_labels_dir, 'path/to/your/train/labels')

# Step 3: Train the student model using YOLO CLI (Run this manually or via subprocess)
print("\nStarting training for student model using distilled labels...")

os.system(f"yolo detect train model={student_model_path} data={data_yaml} epochs=50 imgsz=640 name=distilled_student")

print("\nStudent model training complete.")



Generating pseudo-labels using teacher model...
Pseudo-labels generated and saved.

Starting training for student model using distilled labels...
