In [1]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt




In [2]:
# Configure TensorFlow to use memory growth - this helps with memory issues
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Memory growth needs to be set before GPUs have been initialized
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

In [3]:
# Set memory limits to avoid crashes
physical_devices = tf.config.list_physical_devices('GPU') 
if physical_devices:
    tf.config.set_logical_device_configuration(physical_devices[0],
        [tf.config.LogicalDeviceConfiguration(memory_limit=3072)]) # Adjust this value based on your GPU

In [4]:
# Path to your dataset
dataset_path = "C:/Users/Tisha Verma/Desktop/UML Project - Hand Signs/Data"
labels = sorted(os.listdir(dataset_path))
print(f"Found {len(labels)} classes: {labels}")

Found 26 classes: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']


In [5]:
# Smaller batch size for data loading to reduce memory usage
BATCH_SIZE = 16
IMAGE_SIZE = (64, 64)  # Back to smaller image size to save memory

In [6]:
def load_data_in_batches():
    data = []
    target = []
    failed_images = 0
    
    print("Loading and preprocessing images...")
    for label_idx, label in enumerate(labels):
        label_path = os.path.join(dataset_path, label)
        print(f"Processing class {label} ({label_idx+1}/{len(labels)})")
        
        # Get list of image files in this class
        image_files = os.listdir(label_path)
        for i, img_name in enumerate(image_files):
            try:
                img_path = os.path.join(label_path, img_name)
                image = cv2.imread(img_path)
                if image is None:
                    print(f"Warning: Could not read image {img_path}")
                    failed_images += 1
                    continue
                    
                image = cv2.resize(image, IMAGE_SIZE)
                image = image / 255.0  # Normalize
                data.append(image)
                target.append(label_idx)
                
                # Process in smaller batches to save memory
                if len(data) >= 500:  # Process 500 images at a time
                    yield np.array(data), np.array(target)
                    data, target = [], []  # Clear lists to free memory
                
                if (i+1) % 100 == 0:
                    print(f"  Processed {i+1}/{len(image_files)} images in {label}")
                
            except Exception as e:
                print(f"Error loading image {img_path}: {e}")
                failed_images += 1
    
    # Yield any remaining data
    if data:
        yield np.array(data), np.array(target)
    
    print(f"Completed processing. Failed to load {failed_images} images.")

In [7]:
# Load and process data in batches
all_data = []
all_targets = []

for batch_data, batch_targets in load_data_in_batches():
    all_data.append(batch_data)
    all_targets.append(batch_targets)

Loading and preprocessing images...
Processing class A (1/26)
  Processed 100/122 images in A
Processing class B (2/26)
Processing class C (3/26)
Processing class D (4/26)
Processing class E (5/26)
Processing class F (6/26)
  Processed 100/108 images in F
Processing class G (7/26)
  Processed 100/102 images in G
Processing class H (8/26)
  Processed 100/101 images in H
Processing class I (9/26)
  Processed 100/111 images in I
Processing class J (10/26)
Processing class K (11/26)
  Processed 100/106 images in K
Processing class L (12/26)
  Processed 100/105 images in L
Processing class M (13/26)
Processing class N (14/26)
  Processed 100/110 images in N
Processing class O (15/26)
Processing class P (16/26)
Processing class Q (17/26)
  Processed 100/105 images in Q
Processing class R (18/26)
  Processed 100/103 images in R
Processing class S (19/26)
  Processed 100/103 images in S
Processing class T (20/26)
  Processed 100/106 images in T
Processing class U (21/26)
  Processed 100/113 im

In [8]:
# Combine batches
data = np.concatenate(all_data) if all_data else np.array([])
target_array = np.concatenate(all_targets) if all_targets else np.array([])

if len(data) == 0:
    raise ValueError("No valid images were loaded from the dataset!")

print(f"Total loaded images: {len(data)}")

# Convert targets to one-hot encoding
target = to_categorical(target_array, num_classes=len(labels))

# Split data into training and validation sets
x_train, x_val, y_train, y_val = train_test_split(
    data, target, test_size=0.2, random_state=42, stratify=target_array
)

Total loaded images: 2598


In [9]:
print(f"Training data shape: {x_train.shape}")
print(f"Validation data shape: {x_val.shape}")

Training data shape: (2078, 64, 64, 3)
Validation data shape: (520, 64, 64, 3)


In [10]:
# Free up memory
del data, target, all_data, all_targets
import gc
gc.collect()

0

In [11]:
# Simpler model architecture to avoid memory issues
model = models.Sequential([
    # First convolutional block
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3)),
    layers.MaxPooling2D((2, 2)),
    
    # Second convolutional block
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    
    # Third convolutional block
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    
    # Fully connected layers
    layers.Flatten(),
    layers.Dropout(0.3),
    layers.Dense(128, activation='relu'),
    layers.Dense(len(labels), activation='softmax')
])





In [12]:
# Model summary
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 62, 62, 32)        896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 31, 31, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 29, 29, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 14, 14, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 12, 12, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 6, 6, 128)         0

In [13]:
# Compile model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)




In [14]:
# Define callbacks
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', 
    patience=5, 
    restore_best_weights=True
)

In [15]:
# Use ReduceLROnPlateau instead of custom scheduler to save memory
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss', 
    factor=0.2, 
    patience=3, 
    min_lr=0.0001
)

In [16]:
# Train model
print("Training model...")
history = model.fit(
    x_train, y_train,
    epochs=15,
    batch_size=BATCH_SIZE,
    validation_data=(x_val, y_val),
    callbacks=[early_stopping, reduce_lr],
    verbose=1
)

Training model...
Epoch 1/15


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [17]:
# Save model
model.save('hand_sign_model_main.h5')
print("Model saved as 'hand_sign_model_main.h5'")

Model saved as 'hand_sign_model_main.h5'


  saving_api.save_model(


In [18]:
# Evaluate model
test_loss, test_acc = model.evaluate(x_val, y_val)
print(f"Test accuracy: {test_acc:.4f}")

Test accuracy: 1.0000


In [19]:
def predict_hand_sign(image):
    # Preprocess the image
    resized_img = cv2.resize(image, IMAGE_SIZE)
    normalized_img = resized_img / 255.0
    
    # Make prediction
    pred = model.predict(np.expand_dims(normalized_img, axis=0))[0]
    
    # Get the predicted class
    predicted_class = np.argmax(pred)
    confidence = pred[predicted_class] * 100
    
    return labels[predicted_class], confidence

In [20]:
# Webcam implementation function
def run_webcam():
    cap = cv2.VideoCapture(0)
    
    print("Starting webcam. Press 'q' to quit.")
    
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab frame")
            break
            
        # Flip frame for mirror effect
        frame = cv2.flip(frame, 1)
        
        # Draw rectangle for hand placement
        h, w = frame.shape[:2]
        center_x, center_y = w // 2, h // 2
        rect_size = min(w, h) // 3
        
        cv2.rectangle(
            frame, 
            (center_x - rect_size // 2, center_y - rect_size // 2),
            (center_x + rect_size // 2, center_y + rect_size // 2),
            (0, 255, 0), 2
        )
        
        # Extract hand region for prediction
        hand_region = frame[
            center_y - rect_size // 2:center_y + rect_size // 2,
            center_x - rect_size // 2:center_x + rect_size // 2
        ]
        
        if hand_region.size > 0:
            sign, confidence = predict_hand_sign(hand_region)
            
            # Display prediction on frame
            cv2.putText(
                frame, 
                f"{sign}: {confidence:.1f}%", 
                (20, 50), 
                cv2.FONT_HERSHEY_SIMPLEX, 
                1, (0, 255, 0), 2
            )
        
        # Display the frame
        cv2.imshow('Hand Sign Recognition', frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
            
    cap.release()
    cv2.destroyAllWindows()

In [21]:
# Function for Streamlit
def streamlit_app():
    try:
        import streamlit as st
        
        st.title("Hand Sign Recognition System")
        st.write("This application recognizes hand signs for letters A-Z.")
        
        run = st.button("Start Webcam")
        stop = st.button("Stop Webcam")
        
        if run:
            cap = cv2.VideoCapture(0)
            stframe = st.empty()
            result_text = st.empty()
            
            while True and not stop:
                ret, frame = cap.read()
                if not ret:
                    st.error("Webcam not accessible!")
                    break
                    
                # Flip frame for mirror effect
                frame = cv2.flip(frame, 1)
                
                # Draw rectangle for hand placement
                h, w = frame.shape[:2]
                center_x, center_y = w // 2, h // 2
                rect_size = min(w, h) // 3
                
                cv2.rectangle(
                    frame, 
                    (center_x - rect_size // 2, center_y - rect_size // 2),
                    (center_x + rect_size // 2, center_y + rect_size // 2),
                    (0, 255, 0), 2
                )
                
                # Extract hand region for prediction
                hand_region = frame[
                    center_y - rect_size // 2:center_y + rect_size // 2,
                    center_x - rect_size // 2:center_x + rect_size // 2
                ]
                
                if hand_region.size > 0:
                    sign, confidence = predict_hand_sign(hand_region)
                    
                    # Display prediction on frame
                    cv2.putText(
                        frame, 
                        f"{sign}: {confidence:.1f}%", 
                        (20, 50), 
                        cv2.FONT_HERSHEY_SIMPLEX, 
                        1, (0, 255, 0), 2
                    )
                    
                    # Display prediction in Streamlit
                    result_text.write(f"### Detected Sign: {sign}")
                    result_text.write(f"Confidence: {confidence:.1f}%")
                
                stframe.image(frame, channels="BGR")
                
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
                    
            cap.release()
    except ImportError:
        print("Streamlit not installed. Please install with: pip install streamlit")
        print("Running with OpenCV interface instead.")
        run_webcam()

In [22]:
# Main execution
if __name__ == "__main__":
    # Ask which interface to use
    print("Choose an interface:")
    print("1. OpenCV (simple window)")
    print("2. Streamlit (web interface)")
    choice = input("Enter 1 or 2: ")
    
    if choice == "1":
        run_webcam()
    else:
        try:
            streamlit_app()
        except Exception as e:
            print(f"Error running Streamlit: {e}")
            print("Falling back to OpenCV interface...")
            run_webcam()

Choose an interface:
1. OpenCV (simple window)
2. Streamlit (web interface)
Enter 1 or 2: 2


2025-03-30 10:17:26.244 
  command:

    streamlit run C:\Users\Tisha Verma\anaconda3\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
