# ***ASL Prediction/Inference Pipeline***

### **Imports**

In [1]:
import numpy as np
import cv2
from tensorflow.keras.models import load_model
from tensorflow.keras.activations import swish
from tensorflow.keras.layers import Dropout
import matplotlib.pyplot as plt

### **Loading my best model after training**

In [2]:
# Defining a custom object for having fixed dropout while loading the model
class FixedDropout(Dropout):
    def __init__(self, rate, **kwargs):
        super(FixedDropout, self).__init__(rate, **kwargs)

    def call(self, inputs, training=None):
        return super().call(inputs, training=training)

# Loading the model with custom objects
model = load_model(
    'efficientnet_hand_gesture_model.h5',
    custom_objects={
        'swish': swish,
        'FixedDropout': FixedDropout
    }
)



### **Utils**

In [33]:
def preprocess_image(image):
    """Preprocesses the image for model input."""
    if len(image.shape) == 2:  # If the image is grayscale
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)

    image = cv2.resize(image, (224, 224))  # Resizes to 224x224
    image = np.array(image, dtype="float32")
    image = image / 255.0  # Normalizes pixel values
    image = np.expand_dims(image, axis=0)  # Adds batch dimension for model input
    return image

def predict_asl_sign(image):
    """Predicts the ASL gesture using the loaded model."""
    preprocessed_image = preprocess_image(image)
    prediction = model.predict(preprocessed_image)
    predicted_class = np.argmax(prediction, axis=1)
    return predicted_class

def convert_index_to_sign(index):
    """Converts predicted class index to corresponding sign language gesture."""
    sign_map = {
        0: "A", 1: "B", 2: "C", 3: "D", 4: "E", 5: "F", 6: "G", 
        7: "H", 8: "I", 9: "J", 10: "K", 11: "L", 12: "M", 13: "N", 
        14: "O", 15: "P", 16: "Q", 17: "R", 18: "S", 19: "T", 
        20: "U", 21: "V", 22: "W", 23: "X", 24: "Y", 25: "Z", 
        26: "DEL", 27: "NOTHING", 28: "SPACE"
    }
    return sign_map.get(index, "Unknown")

### **Handling the web cam for inference**

In [31]:
# Starts the webcam and capture images
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open webcam.")
else:
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Error: Could not read frame.")
            break
        
        # Displays the captured frame
        cv2.imshow('Webcam', frame)

        # Predicts ASL sign on a specific key press (e.g., 'p' for prediction)
        key = cv2.waitKey(1)
        if key == ord('p'):
            # Makes prediction
            predicted_class = predict_asl_sign(frame)
            sign = convert_index_to_sign(predicted_class[0])
            print(f"Predicted Sign: {sign}")

        # Exits on 'q' key press
        if key == ord('q'):
            break

# Releases the webcam and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
Predicted Sign: B
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
Predicted Sign: A
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
Predicted Sign: A
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
Predicted Sign: B
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
Predicted Sign: B


In [32]:
## Manual testing for correcting the key value pairs for IDs
test_image = preprocess_image(cv2.imread(r'dataset_asl\asl_alphabet_test\asl_alphabet_test\nothing\nothing_test.jpg'))
prediction = model.predict(test_image)
predicted_class = np.argmax(prediction, axis=1)
print(predicted_class) 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[27]
