# install necessary libraries 

In [5]:
!pip install cvzone tensorflow opencv-python

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


# Script to create a dataset and save in folder for each alphabet

In [None]:
import cv2
from cvzone.HandTrackingModule import HandDetector
import numpy as np
import math
import time

offset = 100  #Determine offset
imgSize = 300 #Determone image size
counter = 0

#Change folders from A-z
folder = "C:/Users/Raj/Desktop/CVIP project/ASl_data/A"  
cap = cv2.VideoCapture(0)        #Start webcam
detector = HandDetector(maxHands = 1)  #Set the maximum number of hands to 1
while True:
    success, img = cap.read()                      #read the images frim the webcam
    hands, img = detector.findHands(img)           #Detect hands
    cv2.imshow("imm",img)
    if hands:
        hand1 = hands[0]                             
        x,y,w,h = hand1['bbox']
        imgWhite = np.ones((imgSize,imgSize,3),np.uint8)*255
        #Crop the image to only save the data within region of interest i.e hand
        imgCrop = img[max(0,y-offset):y+h+offset,max(0,x-offset):x+w+offset]     
        aspectRatio = h/w
        #Resize the image to fit to size of 300*300 while still keeping all the features of hand
        if aspectRatio > 1:                                     
            k = imgSize/h
            wCal = math.ceil(k*w)
            imgResize =cv2.resize(imgCrop,(wCal,imgSize))                       
            imgResizeShape = imgResize.shape
            wGap = math.ceil((imgSize - wCal)/2)
            imgWhite[:,wGap:wCal+wGap] = imgResize
        else:
            k = imgSize/w
            hCal = math.ceil(k*h)
            imgResize =cv2.resize(imgCrop,(imgSize,hCal))                        
            hGap = math.ceil((imgSize - hCal)/2)
            b = hGap + hCal
            imgWhite[hGap:hCal+hGap,:] = imgResize
           

        cv2.imshow("ImageWhite",imgWhite)
    key = cv2.waitKey(1)    
    if key ==ord("s"):
        counter += 1
        cv2.imwrite(f'{folder}/Image_{time.time()}.jpg',imgWhite)
        print(counter)
    elif key ==ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

# Preprocessing image dataset before feeding it  to CNN

In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define constants
image_size = (300, 300)
batch_size = 32
num_classes = 26

# Set the path to your dataset folder
dataset_path = 'C:/Users/Raj/Desktop/CVIP project/ASl_data'

# Create an ImageDataGenerator and specify the preprocessing steps
data_generator = ImageDataGenerator(
    rescale=1./255,  # Normalize pixel values to [0, 1]
    validation_split=0.2  # Split the data into training and validation sets
)

# Load and preprocess the training set
train_generator = data_generator.flow_from_directory(
    dataset_path,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'  # Specify 'training' to get the training set
)

# Load and preprocess the validation set
validation_generator = data_generator.flow_from_directory(
    dataset_path,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation' 
)

# Verify the class indices
print(train_generator.class_indices)



Found 1809 images belonging to 26 classes.
Found 440 images belonging to 26 classes.
{'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'H': 7, 'I': 8, 'J': 9, 'K': 10, 'L': 11, 'M': 12, 'N': 13, 'O': 14, 'P': 15, 'Q': 16, 'R': 17, 'S': 18, 'T': 19, 'U': 20, 'V': 21, 'W': 22, 'X': 23, 'Y': 24, 'Z': 25}


# CNN Model to train the dataset

In [8]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator

image_size = (300, 300)
batch_size = 1
num_classes = 26

dataset_path = 'C:/Users/Raj/Desktop/CVIP project/ASl_data'

data_generator = ImageDataGenerator(
    rescale=1./255,  
    validation_split=0.2 
)

train_generator = data_generator.flow_from_directory(
    dataset_path,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'  
)

validation_generator = data_generator.flow_from_directory(
    dataset_path,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'  
)

#Define the CNN model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(300, 300, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.fit(train_generator,
          steps_per_epoch=train_generator.samples // batch_size,
          epochs=10,
          validation_data=validation_generator,
          validation_steps=validation_generator.samples // batch_size)

model.save('alphabet_classifier.h5')

Found 1803 images belonging to 26 classes.
Found 438 images belonging to 26 classes.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# Testing Script


In [15]:
import cv2
import torch
import math
from torchvision.transforms import transforms
from cvzone.HandTrackingModule import HandDetector
from cvzone.ClassificationModule import Classifier
import numpy as np

cap = cv2.VideoCapture(0)
detector = HandDetector(maxHands =1)

labels =["A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z"]

classifier = Classifier("./model/alphabet_classifier.h5","./model/labels.txt")

imgSize = 300
offset = 50


while True:
    ret, img = cap.read()
    imgout = img.copy()
    hands, frame = detector.findHands(img)
    if hands:
        for hand in hands:
            x,y,w,h = hand['bbox']
            imgWhite = np.ones((imgSize,imgSize,3),np.uint8)*255
            imgCrop = img[max(0,y-offset):y+h+offset,max(0,x-offset):x+w+offset]
            aspectRatio = h/w

            if aspectRatio > 1:
                k = imgSize/h
                wCal = math.ceil(k*w)
                imgResize =cv2.resize(imgCrop,(wCal,imgSize))
                imgResizeShape = imgResize.shape
                wGap = math.ceil((imgSize - wCal)/2)
                imgWhite[:,wGap:wCal+wGap] = imgResize
                prediction,index = classifier.getPrediction(imgWhite)
               # print(prediction,index)
            else:
                k = imgSize/w
                hCal = math.ceil(k*h)
                imgResize =cv2.resize(imgCrop,(imgSize,hCal))
                hGap = math.ceil((imgSize - hCal)/2)
                b = hGap + hCal
                imgWhite[hGap:hCal+hGap,:] = imgResize
                prediction,index = classifier.getPrediction(imgWhite)
            
            cv2.putText(imgout,labels[index],(x,y-20),cv2.FONT_HERSHEY_COMPLEX,2,(255,0,255),2)
            cv2.imshow("im",imgCrop)
    cv2.imshow('Webcam', imgout)

    if cv2.waitKey(1) == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()






