#**DEVELOP A MODEL FOR RECOGNIZING 24 STATIC ALPHABETS OF THE NIGERIAN SIGN LANGUAGE (NSL)**

#Data Preprocessing

**Normalize and Resize Images**

Images resized to a uniform size (64x64) and normalized

In [None]:
import cv2
import numpy as np
import os

# Function to preprocess an image
def preprocess_image(image_path, target_size=(64, 64)):
    # Load image
    img = cv2.imread(image_path)

    # Resize image to target size
    img_resized = cv2.resize(img, target_size)

    # Convert image to grayscale
    img_gray = cv2.cvtColor(img_resized, cv2.COLOR_BGR2GRAY)

    # Normalize pixel values to range [0, 1]
    img_normalized = img_gray / 255.0

    return img_normalized

# Example: Apply preprocessing to all images in the folder
image_folder = '/content/drive/MyDrive/Colab Notebooks/sign image corpuses'
processed_images = []

for img_filename in os.listdir(image_folder):
    img_path = os.path.join(image_folder, img_filename)

    if img_filename.endswith('.jpg') or img_filename.endswith('.png'):
        processed_img = preprocess_image(img_path)
        processed_images.append(processed_img)

# Now 'processed_images' contains your normalized and resized images


**Data Augmentation**

Data augmentation applied to increase the variability of the dataset, which helps the model generalize better.

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np

# Initialize the ImageDataGenerator for real-time data augmentation
datagen = ImageDataGenerator(
    rotation_range=40,       # Rotate by up to 30 degrees
    width_shift_range=0.2,   # Shift the image horizontally by up to 20%
    height_shift_range=0.2,  # Shift the image vertically by up to 20%
    shear_range=0.2,         # Apply shear transformation
    zoom_range=0.2,          # Zoom in by up to 20%
    horizontal_flip=True,    # Flip images horizontally
    fill_mode='nearest'      # Fill missing pixels after transformation
)




# Function to augment image (add channel dimension)
def augment_image(image):
    # Add batch and channel dimensions to the image
    image = np.expand_dims(image, axis=-1)  # Shape becomes (height, width, 1)
    image = np.expand_dims(image, axis=0)   # Shape becomes (1, height, width, 1)

    # Generate augmented images
    augmented_images = datagen.flow(image, batch_size=1)

    # Retrieve one augmented image
    augmented_image = next(augmented_images)[0]

    return augmented_image

# Example: Apply preprocessing and augmentation to all images in the folder
augmented_images = []

for img in processed_images:
    augmented_img = augment_image(img)
    augmented_images.append(augmented_img)

# Now 'augmented_images' contains your augmented images




#Feature Extraction

In this step, the relevant features from the preprocessed images are extracted. The goal is to obtain meaningful information that will help the model classify the images into one of the 24 static NSL alphabets. Four different feature extraction techniques will be used:

    1. Histogram of Oriented Gradients (HOG)
    2. Local Binary Patterns (LBP)
    3. Edge Orientation Histogram (EOH)
    4. Speeded-Up Robust Features (SURF)

**Histogram of Oriented Gradients (HOG)**

HOG is a feature descriptor that captures the edge directions and magnitudes of an image. It is commonly used in object detection and image classification tasks.

In [None]:
from skimage.feature import hog
from skimage import exposure
import numpy as np

# Function to compute HOG features for grayscale images
def extract_hog_features(image):
    # Compute HOG features (no need for multichannel argument since the image is grayscale)
    features, hog_image = hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=True)

    # Rescale the HOG image for better visualization
    hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 10))

    return features

# Example usage for one image
#hog_features = extract_hog_features(processed_images[0])  # process one image


**Local Binary Patterns (LBP)**


In [None]:
from skimage.feature import local_binary_pattern
import numpy as np

# Function to compute LBP features
def extract_lbp_features(image, radius=1, n_points=8):
    # Convert the image to uint8 type to avoid the warning
    image = (image * 255).astype(np.uint8)  # scale and convert to integer type

    # Compute LBP features
    lbp = local_binary_pattern(image, n_points, radius, method='uniform')

    # Calculate the LBP histogram
    lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points+3), range=(0, n_points+2))

    # Normalize the histogram
    lbp_hist = lbp_hist.astype('float')
    lbp_hist /= (lbp_hist.sum() + 1e-6)

    return lbp_hist

# Example usage for one image
#lbp_features = extract_lbp_features(processed_images[0])  # process one image


**Edge Orientation Histogram (EOH)**

EOH captures the gradient direction and magnitude, similar to HOG but focuses on the orientation of edges.

Here’s the implementation for EOH:

In [None]:
import cv2
import numpy as np

# Function to compute EOH features
def extract_eoh_features(image):
    # Compute gradient along x and y axes
    grad_x = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=3)
    grad_y = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=3)

    # Compute the gradient orientation (angle)
    magnitude, angle = cv2.cartToPolar(grad_x, grad_y, angleInDegrees=True)

    # Create the EOH histogram of orientations (18 bins)
    angle_hist = np.histogram(angle.ravel(), bins=18, range=(0, 180))[0]

    # Normalize the histogram
    angle_hist = angle_hist.astype('float')
    angle_hist /= (angle_hist.sum() + 1e-6)

    return angle_hist

# Example usage for one image
#eoh_features = extract_eoh_features(processed_images[0])  # process one image


**Combining the Features**

Combining the HOG, LBP, and EOH features into one feature vector:

In [None]:
# Function to concatenate HOG, LBP, and EOH features
def combine_features(hog_features, lbp_features, eoh_features):
    combined_features = np.hstack((hog_features, lbp_features, eoh_features))
    return combined_features

# Example for one image
#combined_features = combine_features(hog_features, lbp_features, eoh_features)


#Classification of the extracted features using a Support Vector Machine

We'll use three different kernels: Linear Kernel, Gaussian Kernel (RBF), and Radial Basis Function Kernel.

We will classify the features we extracted (HOG, LBP, and EOH) using an SVM classifier. We'll also use the One-vs-All approach to handle multi-class classification.


**SVM Classification**


In [None]:
import os
import cv2
import numpy as np

# Folder containing the images
image_folder = '/content/drive/MyDrive/Colab Notebooks/sign image corpuses'  # Update this to your actual path

# List of alphabet letters excluding 'j' and 'z' (a to y, minus j and z)
alphabet = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y']

# Initialize lists to store features and labels
combined_features = []  # This will store all extracted features
labels = []  # This will store the corresponding labels (0-23 for NSL alphabets)

# Loop through the images in the folder
for image_name in os.listdir(image_folder):
    if image_name.endswith(".png.PNG"):  # Ensure we're only processing .png image files

        # Extract the letter (a, b, c, ...) from the image name (e.g., 'a1.png' -> 'a')
        letter = image_name[0]  # First character of the filename is the letter (e.g., 'a' from 'a1.png')

        # Check if the letter is valid (a to y excluding j and z)
        if letter in alphabet:
            # Determine the label from the alphabet list
            label_idx = alphabet.index(letter)  # Get the label index for the letter

            # Construct the full path of the image
            image_path = os.path.join(image_folder, image_name)

            # Read and process the image (convert to grayscale)
            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

            # Apply preprocessing steps like resizing, normalization here (if needed)
            image_resized = cv2.resize(image, (64, 64))  # Example of resizing to 64x64

            # Extract HOG, LBP, and EOH features for each image
            hog_features = extract_hog_features(image_resized)
            lbp_features = extract_lbp_features(image_resized)
            eoh_features = extract_eoh_features(image_resized)

            # Combine features
            combined_features.append(np.hstack((hog_features, lbp_features, eoh_features)))

            # Add the corresponding label (0-23 based on alphabet)
            labels.append(label_idx)

# Check the first few labels and corresponding letters
for i in range(min(10, len(labels))):  # Check up to the first 10 images, or fewer if there are less than 10 images
    print(f"Image {i+1}: Label = {labels[i]}, Letter = {alphabet[labels[i]]}")

# Convert combined_features and labels to numpy arrays
combined_features = np.array(combined_features)
labels = np.array(labels)

# Check the distribution of classes in the full dataset
print("\nFull dataset labels distribution:")
print(np.unique(labels, return_counts=True))



Image 1: Label = 1, Letter = b
Image 2: Label = 2, Letter = c
Image 3: Label = 1, Letter = b
Image 4: Label = 2, Letter = c
Image 5: Label = 1, Letter = b
Image 6: Label = 1, Letter = b
Image 7: Label = 0, Letter = a
Image 8: Label = 0, Letter = a
Image 9: Label = 0, Letter = a
Image 10: Label = 0, Letter = a

Full dataset labels distribution:
(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23]), array([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 4, 4,
       4, 4]))


**Training SVM Classifier with Linear Kernel**

In [None]:
#We'll use scikit-learn for implementing SVM linear kernel.

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: Split the dataset into training and testing sets using Stratified Sampling
# Adjust test_size to be at least the number of classes (24) divided by total samples (94)
X_train, X_test, y_train, y_test = train_test_split(
    combined_features, labels, test_size=24/96, random_state=42, stratify=labels
)

# Step 2: Normalize the features (scaling)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 3: Train the SVM Classifier with Linear Kernel
svm_linear = SVC(kernel='linear', random_state=42, C=1.0, gamma='scale')  # Tuning C and gamma
svm_linear.fit(X_train_scaled, y_train)

# Step 4: Predict on the test set
y_pred = svm_linear.predict(X_test_scaled)

# Step 5: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Detailed evaluation: Classification report
print("\nClassification Report:")
# Use 'labels' parameter to ensure we specify all possible classes (0 to 23)
print(classification_report(y_test, y_pred, target_names=alphabet, labels=np.arange(24)))

# Confusion Matrix
print("\nConfusion Matrix:")
# Use 'labels' parameter to ensure we specify all possible classes (0 to 23)
print(confusion_matrix(y_test, y_pred, labels=np.arange(24)))


Accuracy: 12.50%

Classification Report:
              precision    recall  f1-score   support

           a       0.00      0.00      0.00         1
           b       0.00      0.00      0.00         1
           c       0.00      0.00      0.00         1
           d       0.00      0.00      0.00         1
           e       0.00      0.00      0.00         1
           f       0.00      0.00      0.00         1
           g       0.00      0.00      0.00         1
           h       0.00      0.00      0.00         1
           i       0.50      1.00      0.67         1
           k       0.00      0.00      0.00         1
           l       1.00      1.00      1.00         1
           m       0.00      0.00      0.00         1
           n       0.00      0.00      0.00         1
           o       0.00      0.00      0.00         1
           p       0.00      0.00      0.00         1
           q       0.00      0.00      0.00         1
           r       0.00      0.00      0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


**Training SVM Classifier with Radial Basis Function (RBF) Kernel**

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix



# Step 1: Split the dataset into training and testing sets using Stratified Sampling
X_train, X_test, y_train, y_test = train_test_split(
    combined_features, labels, test_size=24/96, random_state=42, stratify=labels
)

# Step 2: Normalize the features (scaling)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 3: Train the SVM Classifier with RBF Kernel (non-linear kernel)
svm_rbf = SVC(kernel='rbf', random_state=42, C=1.0, gamma='scale')  # Using RBF kernel
svm_rbf.fit(X_train_scaled, y_train)

# Step 4: Predict on the test set
y_pred = svm_rbf.predict(X_test_scaled)

# Step 5: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Detailed evaluation: Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=alphabet, labels=np.arange(24)))

# Confusion Matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred, labels=np.arange(24)))


Accuracy: 4.17%

Classification Report:
              precision    recall  f1-score   support

           a       0.00      0.00      0.00         1
           b       0.00      0.00      0.00         1
           c       0.00      0.00      0.00         1
           d       0.00      0.00      0.00         1
           e       0.00      0.00      0.00         1
           f       0.00      0.00      0.00         1
           g       0.00      0.00      0.00         1
           h       0.00      0.00      0.00         1
           i       0.00      0.00      0.00         1
           k       0.00      0.00      0.00         1
           l       1.00      1.00      1.00         1
           m       0.00      0.00      0.00         1
           n       0.00      0.00      0.00         1
           o       0.00      0.00      0.00         1
           p       0.00      0.00      0.00         1
           q       0.00      0.00      0.00         1
           r       0.00      0.00      0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


**Regularizing Radial Basis Function (RBF) to Improve Performance**

In [None]:
from sklearn.model_selection import GridSearchCV

# Set up the parameter grid for RBF kernel
param_grid = {
    'C': [0.1, 1, 10],
    'gamma': ['scale', 0.1, 1],
    'kernel': ['rbf']
}

# Initialize and perform Grid Search with Cross-Validation
# Reduced cv to 3 to ensure it's less than the minimum samples per class
grid_search = GridSearchCV(SVC(), param_grid, cv=3)
grid_search.fit(X_train_scaled, y_train)

print(f"Best Parameters: {grid_search.best_params_}")
y_pred_rbf = grid_search.predict(X_test_scaled)
print(f"Accuracy with optimized RBF kernel: {accuracy_score(y_test, y_pred_rbf) * 100:.2f}%")



Best Parameters: {'C': 0.1, 'gamma': 0.1, 'kernel': 'rbf'}
Accuracy with optimized RBF kernel: 12.50%


**Training SVM Classifier with Polynomial Kernel**

In [None]:
from sklearn.svm import SVC

# Polynomial kernel SVM
svm_poly = SVC(kernel='poly', degree=3, C=1.0, gamma='scale', random_state=42)
svm_poly.fit(X_train_scaled, y_train)
y_pred_poly = svm_poly.predict(X_test_scaled)

print(f"Accuracy with Polynomial Kernel: {accuracy_score(y_test, y_pred_poly) * 100:.2f}%")


Accuracy with Polynomial Kernel: 4.17%


**Linear SVM with Principal Component Analysis (PCA)**

In [None]:
from sklearn.decomposition import PCA

# Reduce dimensionality with PCA
pca = PCA(n_components=50)  # Adjust components as needed
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

# Train the linear kernel SVM on reduced data
svm_linear = SVC(kernel='linear', random_state=42, C=1.0)
svm_linear.fit(X_train_pca, y_train)
y_pred_linear = svm_linear.predict(X_test_pca)

print(f"Accuracy with PCA + Linear Kernel: {accuracy_score(y_test, y_pred_linear) * 100:.2f}%")


Accuracy with PCA + Linear Kernel: 8.33%


**Training SVM with Sigmoid Kernel (Hyperbolic Tangent)**

In [None]:
svm_sigmoid = SVC(kernel='sigmoid', C=1.0, gamma='scale', random_state=42)
svm_sigmoid.fit(X_train_scaled, y_train)
y_pred_sigmoid = svm_sigmoid.predict(X_test_scaled)

print(f"Accuracy with Sigmoid Kernel: {accuracy_score(y_test, y_pred_sigmoid) * 100:.2f}%")


Accuracy with Sigmoid Kernel: 4.17%
