In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Load the dataset
# Adjust the path according to your Kaggle dataset location
train_data = pd.read_csv('/home/snorpiii/pro/AI/ASl/archive/sign_mnist_train/sign_mnist_train.csv')
test_data = pd.read_csv('/home/snorpiii/pro/AI/ASl/archive/sign_mnist_test/sign_mnist_test.csv')

# Separate features and labels
X_train = train_data.drop('label', axis=1).values.reshape(train_data.shape[0], 28, 28, 1).astype('float32')
X_test = test_data.drop('label', axis=1).values.reshape(test_data.shape[0], 28, 28, 1).astype('float32')
y_train = train_data['label'].values
y_test = test_data['label'].values

# Normalize the data
X_train /= 255
X_test /= 255

# One-hot encode labels
y_train = to_categorical(y_train, num_classes=26)
y_test = to_categorical(y_test, num_classes=26)

# Define the model
model = Sequential([
    Conv2D(64, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(26, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=False
)

# Train the model
model.fit(datagen.flow(X_train, y_train, batch_size=32),
          epochs=20,
          validation_data=(X_test, y_test),
          verbose=2)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test accuracy: {test_accuracy*100:.2f}%")


2024-03-28 20:24:27.509255: I tensorflow/core/platform/cpu_feature_guard.cc:181] Beginning TensorFlow 2.15, this package will be updated to install stock TensorFlow 2.15 alongside Intel's TensorFlow CPU extension plugin, which provides all the optimizations available in the package and more. If a compatible version of stock TensorFlow is present, only the extension will get installed. No changes to code or installation setup is needed as a result of this change.
More information on Intel's optimizations for TensorFlow, delivered as TensorFlow extension plugin can be viewed at https://github.com/intel/intel-extension-for-tensorflow.
2024-03-28 20:24:27.509334: I tensorflow/core/platform/cpu_feature_guard.cc:192] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


KeyboardInterrupt: 

In [None]:
# Import necessary libraries
import numpy as np
import cv2
import tensorflow as tf


# Define the alphabet mapping here
# Ensure this matches the classes your model was trained on
alphabet = 'abcdefghijklmnopqrstuvwxyz'  # Add or remove letters depending on your model

# Start the webcam capture
cap = cv2.VideoCapture(0)

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame")
        break
    
    # Define the region of interest (ROI) coordinates
    x, y, w, h = 100, 100, 200, 200
    cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
    
    # Extract the ROI from the frame
    roi = frame[y:y+h, x:x+w]
    
    # Preprocess the ROI for prediction
    roi_resized = cv2.resize(roi, (28, 28))  # Resize to model's expected input size
    roi_gray = cv2.cvtColor(roi_resized, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
    roi_normalized = roi_gray / 255.0  # Normalize pixel values
    roi_reshaped = np.expand_dims(roi_normalized, axis=[0, -1])  # Reshape to model's input shape
    
    # Perform the prediction
    prediction = model.predict(roi_reshaped)
    predicted_index = np.argmax(prediction)
    predicted_letter = alphabet[predicted_index]
    
    # Draw the predicted letter on the video frame
    cv2.putText(frame, predicted_letter, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Display the resulting frame
    cv2.imshow('Sign Language Detection', frame)
    
    # Break the loop if 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything is done, release the capture
cap.release()
cv2.destroyAllWindows()

