In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import cv2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Define the path to the dataset
train_folder = '/kaggle/input/indian-sign-language-dataset/data'

# Step 1: Data Exploration and Visualization
# Get the list of classes (labels) from the subfolders
class_names = sorted(os.listdir(train_folder))  # Sorted to maintain order

# Display some images from each class in the dataset
num_images_to_display = 5  # Number of images to display for visualization

plt.figure(figsize=(15, 5))
for i, class_name in enumerate(class_names[:num_images_to_display]):
    class_folder = os.path.join(train_folder, class_name)
    image_files = os.listdir(class_folder)

    if image_files:
        image_path = os.path.join(class_folder, image_files[0])
        image = Image.open(image_path)
        plt.subplot(1, num_images_to_display, i + 1)
        plt.imshow(image)
        plt.axis('off')
        plt.title(class_name)

plt.tight_layout()
plt.show()

In [None]:
def load_images_and_labels_with_augmentation(data_folder):
    images, labels = [], []
    for label in os.listdir(data_folder):
        label_folder = os.path.join(data_folder, label)
        for image_file in os.listdir(label_folder):
            image_path = os.path.join(label_folder, image_file)
            
            # Load the image
            image = cv2.imread(image_path)
            
            # Original Image
            if image is not None:
                # Ensure the original image is processed correctly
                preprocessed_image = preprocess_image(image)
                images.append(preprocessed_image)
                labels.append(label)
                
                # Data Augmentation: Resize
                resized_image = cv2.resize(image, (64, 64))
                images.append(preprocess_image(resized_image))  # Ensure consistency
                labels.append(label)
                
                # Data Augmentation: Crop
                cropped_image = crop_image(image, (0, 0, 64, 64))  # Adjust coordinates as needed
                images.append(preprocess_image(cropped_image))  # Ensure consistency
                labels.append(label)

                # Data Augmentation: Grayscale Conversion
                gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                gray_image = cv2.resize(gray_image, (64, 64))
                gray_image = cv2.cvtColor(gray_image, cv2.COLOR_GRAY2BGR)  # Convert to 3 channels
                images.append(gray_image)  # Ensure consistency
                labels.append(label)

                # Data Augmentation: Add Noise
                noisy_image = add_noise(image)
                noisy_image = preprocess_image(noisy_image)  # Ensure consistency
                images.append(noisy_image)
                labels.append(label)

    return np.array(images), np.array(labels)

# Function to preprocess images (resize to consistent shape)
def preprocess_image(image):
    # Resize image to (64, 64, 3) for color images
    if len(image.shape) == 3:
        return cv2.resize(image, (64, 64))
    # Resize grayscale image to (64, 64, 3) by converting
    elif len(image.shape) == 2:  
        return cv2.cvtColor(cv2.resize(image, (64, 64)), cv2.COLOR_GRAY2BGR)
    return image

# Function to crop the image (defining a rectangular region)
def crop_image(image, roi):
    x1, y1, x2, y2 = roi
    return image[y1:y2, x1:x2]

# Function to add Gaussian noise to an image
def add_noise(image):
    noise = np.random.normal(0, 25, image.shape).astype(np.uint8)  # Mean=0, Stddev=25
    noisy_image = cv2.add(image, noise)
    return noisy_image

# Load images and labels with augmentation
images, labels = load_images_and_labels_with_augmentation(train_folder)
print("Done")


In [None]:
from skimage.feature import hog

HOG_PARAMS = {
    'orientations': 9,
    'pixels_per_cell': (8, 8),
    'cells_per_block': (2, 2),
    'block_norm': 'L2-Hys'
}

def extract_hog_features(images, hog_params):
    hog_features = []
    for image in images:
        # Convert image to grayscale if it's not already
        if len(image.shape) == 3:  # Check if the image is color
            image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        
        hog_feature = hog(image, **hog_params)
        hog_features.append(hog_feature)
    return np.array(hog_features)

hog_features = extract_hog_features(images, HOG_PARAMS)

print("HOG feature extraction completed!")
print("HOG features shape:", hog_features.shape)
print("Done")


In [None]:
# Step 4: Split the Dataset
X_train, X_test, y_train, y_test = train_test_split(hog_features, labels, test_size=0.2, random_state=42)

# Step 5: Scale Features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print("Done")


In [None]:
# Step 6: Train SVM Classifier
svm = SVC(C=0.1, kernel='linear')
svm.fit(X_train, y_train)

print("Done")


In [None]:
# Step 6: Train KNN Classifier with best parameters
knn = KNeighborsClassifier(n_neighbors=3, weights='distance', metric='euclidean')
knn.fit(X_train, y_train)

print("Done")


In [None]:
# Step 7: Evaluate Model
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"SVM Model Accuracy: {accuracy:.2f}")
print("Confusion Matrix:\n", conf_matrix)
print("\nClassification Report:\n", classification_report(y_test, y_pred))



In [None]:
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"KNN Model Accuracy: {accuracy:.2f}")
print("Confusion Matrix:\n", conf_matrix)
print("\nClassification Report:\n", classification_report(y_test, y_pred))


In [None]:
# Optional: Save the Model and Scaler
import joblib
joblib.dump(svm, '/kaggle/working/svm_model.pkl')
joblib.dump(knn, '/kaggle/working/knn_model.pkl')
joblib.dump(scaler, '/kaggle/working/scaler.pkl')