In [None]:
import opendatasets as od
od.download_kaggle_dataset('https://www.kaggle.com/datasets/grassknoted/asl-alphabet','')

In [None]:
import subprocess

def run_command(command):
    try:
        # Run the command and capture the output
        result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=True)

        # Check if the command was successful
        if result.returncode == 0:
            print("Command executed successfully!")
            print("Output:\n", result.stdout)
        else:
            print("Command failed with return code:", result.returncode)
            print("Error output:\n", result.stderr)

        return result.stdout, result.stderr, result.returncode

    except Exception as e:
        print("An error occurred:", e)
        return None, None, -1

output, error, returncode = run_command('pip install xgboost==1.7.6')

In [None]:
import xgboost
xg = xgboost.XGBClassifier.__sklearn_tags__
print(xg)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# To process word dataset:
Extract the csv file containing one hot encoding of classes and save corresponding image in a dataframe


In [None]:
train_csv, test_csv, valid_csv = '/content/drive/MyDrive/ASL Dataset.v4-v1-aug-and-prep-with-validation-set.multiclass/train/_classes.csv', '/content/drive/MyDrive/ASL Dataset.v4-v1-aug-and-prep-with-validation-set.multiclass/test/_classes.csv', '/content/drive/MyDrive/ASL Dataset.v4-v1-aug-and-prep-with-validation-set.multiclass/valid/_classes.csv'
train_dir, test_dir, valid_dir = '/content/drive/MyDrive/ASL Dataset.v4-v1-aug-and-prep-with-validation-set.multiclass/train','/content/drive/MyDrive/ASL Dataset.v4-v1-aug-and-prep-with-validation-set.multiclass/test','/content/drive/MyDrive/ASL Dataset.v4-v1-aug-and-prep-with-validation-set.multiclass/valid'

In [None]:
import pandas as pd
import os

def get_class_label(row):
    # Extract the class name where value is 1
    classes = row.index[1:]  # Skip 'filename' column
    return classes[row.values[1:].argmax()]

# Load CSV and map filenames to class labels
def process_roboflow_csv(csv_path, image_dir):
    df = pd.read_csv(csv_path)
    df["label"] = df.apply(get_class_label, axis=1)
    df["filepath"] = df["filename"].apply(lambda x: os.path.join(image_dir, x))
    return df[["filepath", "label"]]

# Example usage
train_df = process_roboflow_csv(train_csv, train_dir)
valid_df = process_roboflow_csv(valid_csv, valid_dir)
test_df = process_roboflow_csv(test_csv, test_dir)

# Combine all splits
roboflow_df = pd.concat([train_df, valid_df, test_df])

#Handling ASL alphabets data:
Load the image path and corresponding class lable (directory name) in a dataframe

In [None]:
asl_dir = "/content/asl-alphabet/asl_alphabet_train/asl_alphabet_train"

In [None]:
kaggle_data = []
for class_label in os.listdir(asl_dir):
    class_dir = os.path.join(asl_dir, class_label)
    if not os.path.isdir(class_dir):
        continue
    for img_name in os.listdir(class_dir):
        img_path = os.path.join(class_dir, img_name)
        kaggle_data.append({"filepath": img_path, "label": class_label})

kaggle_df = pd.DataFrame(kaggle_data)

# Combine both dataframes

In [None]:
combined_df = pd.concat([roboflow_df, kaggle_df], ignore_index=True)

# Preprocessing
Use mediapipe to extract landmark from images

In [None]:
import cv2
import mediapipe as mp
import numpy as np

In [None]:
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=2, min_detection_confidence=0.5)

def extract_landmarks(image_path):
    image = cv2.imread(image_path)
    if image is None:
        return None
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(image)

    landmarks = []
    if results.multi_hand_landmarks:
        # Use first detected hand
        for landmark in results.multi_hand_landmarks[0].landmark:
            landmarks.extend([landmark.x, landmark.y, landmark.z])
    return landmarks

# Process all images
combined_df["landmarks"] = combined_df["filepath"].apply(extract_landmarks)

# Drop rows where landmarks failed to extract
combined_df = combined_df.dropna(subset=["landmarks"])

#Export dataframe as CSV for further processing

In [None]:
# Expand landmarks into columns
landmark_cols = [f"{ax}_{i}" for i in range(21) for ax in ["x", "y", "z"]]
landmark_df = pd.DataFrame(
    combined_df["landmarks"].tolist(),
    columns=landmark_cols
)

final_df = pd.concat([combined_df[["label"]], landmark_df], axis=1)
final_df.to_csv("asl_landmarks_combined.csv", index=False)

#Remove undetected missing values

In [None]:
import pandas as pd

def process_landmarks_dataset(csv_path):
    # Load the dataset
    df = pd.read_csv(csv_path)

    # Identify landmark columns (all columns except 'label')
    landmark_cols = df.columns.drop('label')

    # Find rows where ALL landmarks are missing
    all_missing_mask = df[landmark_cols].isna().all(axis=1)

    # Create cleaned dataset
    cleaned_df = df[~all_missing_mask].copy()

    # Count records per class before/after cleaning
    original_counts = df['label'].value_counts().rename('Original Records')
    cleaned_counts = cleaned_df['label'].value_counts().rename('Populated Records')

    # Combine counts and fill missing classes with 0
    count_df = pd.concat([original_counts, cleaned_counts], axis=1).fillna(0)
    count_df['Populated Records'] = count_df['Populated Records'].astype(int)

    # Calculate removal stats
    total_removed = len(df) - len(cleaned_df)
    removal_rate = total_removed / len(df) * 100

    print(f"Total records removed: {total_removed} ({removal_rate:.2f}%)\n")
    print("Record counts per class:")
    return count_df.sort_values(by='Populated Records', ascending=False)


result_df = process_landmarks_dataset("/content/asl_landmarks_combined.csv")
print(result_df.to_string())

In [None]:
result_df.to_csv('/content/asl_landmarks_combined.csv',index=False)

In [None]:
print(pd.read_csv('/content/asl_landmarks_combined.csv').columns)

Index(['Original Records', 'Populated Records'], dtype='object')


In [None]:
import pandas as pd
csv_path = '/content/drive/MyDrive/asl_landmarks_combined.csv'
df = pd.read_csv(csv_path)

    # Identify landmark columns (all columns except 'label')
landmark_cols = df.columns.drop('label')

    # Find rows where ALL landmarks are missing
all_missing_mask = df[landmark_cols].isna().all(axis=1)

    # Create cleaned dataset
cleaned_df = df[~all_missing_mask].copy()

In [None]:
print(cleaned_df.tail())

In [None]:
import pandas as pd
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline
import numpy as np

# Load cleaned data
df = pd.read_csv("/content/drive/MyDrive/asl_landmarks_cleaned.csv")

# 1. Remove 'nothing' class
df = df[df['label'] != 'nothing']

# 2. Separate features and labels
X = df.drop('label', axis=1)
y = df['label']

# 3. Define sampling strategy
class_counts = y.value_counts()

sampling_strategy = {
    class_name: (
        2000 if count > 2000 else  # Undersample majority
        500 if count < 500 else    # Oversample minority
        count                      # Leave mid-tier
    )
    for class_name, count in class_counts.items()
}

# 4. Create pipeline
pipeline = Pipeline([
    ('oversample', SMOTE(
        sampling_strategy={k: v for k, v in sampling_strategy.items() if v > class_counts[k]},
        k_neighbors=2,  # Reduced for small classes
        random_state=42
    )),
    ('undersample', RandomUnderSampler(
        sampling_strategy={k: v for k, v in sampling_strategy.items() if v < class_counts[k]},
        random_state=42
    ))
])

# 5. Apply resampling
X_res, y_res = pipeline.fit_resample(X, y)

# 6. Add synthetic noise to oversampled minority classes
minority_classes = [cls for cls, count in sampling_strategy.items() if count > class_counts[cls]]
noise_scale = 0.02  # 2% of value range

for cls in minority_classes:
    mask = y_res == cls
    X_res[mask] += np.random.normal(loc=0, scale=noise_scale, size=X_res[mask].shape)

# 7. Save balanced dataset
balanced_df = pd.concat([pd.DataFrame(X_res, columns=X.columns), pd.Series(y_res, name='label')], axis=1)
balanced_df.to_csv("balanced_asl_dataset.csv", index=False)

# 8. Print new class distribution
print("Balanced Class Counts:")
print(pd.Series(y_res).value_counts().sort_values(ascending=False))

In [None]:
import pandas as pd
df = pd.read_csv('/content/drive/MyDrive/asl_landmarks_balanced.csv')
print(df.columns)

In [None]:
df = df.drop(df.columns[[0,1]],axis=1)
print(df.columns)

In [None]:
df.to_csv('/content/drive/MyDrive/asl_landmarks_balanced.csv')

#To train an XGboost classifier

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from xgboost import XGBClassifier
import joblib

# Load the dataset
data = pd.read_csv('/content/drive/MyDrive/asl_landmarks_balanced.csv')

# Define feature columns: mediapipe landmarks from x_0,y_0,z_0 to x_20,y_20,z_20
feature_columns = [f"{axis}_{i}" for i in range(21) for axis in ['x', 'y', 'z']]
X = data[feature_columns]
y = data['label']

# Encode the string labels to integers for training
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and testing sets (stratify to preserve class distribution)
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize and train the XGBoost classifier
model = XGBClassifier(
    objective='multi:softmax',
    num_class=len(np.unique(y_encoded)),
    eval_metric='mlogloss',  # multi-class log loss
    use_label_encoder=False,
    random_state=42
)
model.fit(X_train, y_train)

# Make predictions (numeric)
y_pred_numeric = model.predict(X_test)

# Convert numeric predictions back to original string labels
y_pred = label_encoder.inverse_transform(y_pred_numeric)
y_test_str = label_encoder.inverse_transform(y_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test_str, y_pred))
print("\nClassification Report:")
print(classification_report(y_test_str, y_pred))

# Save the model, scaler, and label encoder for deployment
joblib.dump(model, "xgb_classifier.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(label_encoder, "label_encoder.pkl")


#Testing the model

In [None]:
import os
import cv2
import pandas as pd
import numpy as np
import mediapipe as mp
import joblib
from sklearn.metrics import accuracy_score, classification_report

# --- Load test CSV ---
# Assume test.csv has a column "filenames" and the rest of the columns (one-hot) are the class labels.
test_csv_path = "/content/drive/MyDrive/ASL Dataset.v4-v1-aug-and-prep-with-validation-set.multiclass/test/_classes.csv"
test_df = pd.read_csv(test_csv_path)

# Determine the one-hot encoded label columns (all columns except "filenames")
onehot_columns = [col for col in test_df.columns if col != "filenames"]

# Function to convert a one-hot encoded row to the corresponding label string.
def one_hot_to_label(row):
    for col in onehot_columns:
        if row[col] == 1:
            return col
    return None

test_df['true_label'] = test_df.apply(one_hot_to_label, axis=1)

# --- Load saved model artifacts ---
model = joblib.load("xgb_classifier.pkl")
scaler = joblib.load("scaler.pkl")
label_encoder = joblib.load("label_encoder.pkl")

# --- Initialize MediaPipe Hands ---
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=True,
    max_num_hands=1,
    min_detection_confidence=0.5
)

# --- Process each test image ---
predictions = []
ground_truths = []
for idx, row in test_df.iterrows():
    filename = row['filename']
    true_label = row['true_label']

    # Construct full file path (assumes images are in content/test/)
    filepath = os.path.join("/content/drive/MyDrive/ASL Dataset.v4-v1-aug-and-prep-with-validation-set.multiclass/test/", filename)

    # Load image using cv2 (BGR format)
    image = cv2.imread(filepath)
    if image is None:
        print(f"Error reading image: {filepath}")
        continue

    # Convert to RGB as required by MediaPipe
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Process the image to detect hand landmarks
    results = hands.process(image_rgb)
    if not results.multi_hand_landmarks:
        print(f"No hand landmarks detected for image: {filepath}")
        continue

    # Use the first detected hand
    hand_landmarks = results.multi_hand_landmarks[0]

    # Extract features: we expect 21 landmarks with x, y, z each -> 63 features
    features = []
    for landmark in hand_landmarks.landmark:
        features.extend([landmark.x, landmark.y, landmark.z])

    if len(features) != 63:
        print(f"Unexpected number of landmarks for image: {filepath}")
        continue

    # Prepare features for prediction
    features = np.array(features).reshape(1, -1)
    features_scaled = scaler.transform(features)

    # Predict using the trained model (model expects numeric labels)
    pred_numeric = model.predict(features_scaled)
    pred_label = label_encoder.inverse_transform(pred_numeric)[0]

    predictions.append(pred_label)
    ground_truths.append(true_label)

# Clean up MediaPipe resources
hands.close()

# --- Evaluate predictions ---
if ground_truths:
    acc = accuracy_score(ground_truths, predictions)
    report = classification_report(ground_truths, predictions)
    print("Test Accuracy:", acc)
    print("\nClassification Report:")
    print(report)
else:
    print("No valid predictions were made. Check if the test images are processed correctly.")


#Data processing 
test.csv has filename and one hot encoded labels, change it to absolute path and string label to make test easier

In [None]:
import pandas as pd

def process_csv(input_file, output_file):
    """Processes CSV with one-hot labels to simpler format."""

    try:
        df = pd.read_csv(input_file)
    except FileNotFoundError:
        print(f"Error: Input file '{input_file}' not found.")
        return

    df['filepath'] = '/content/drive/MyDrive/ASL Dataset.v4-v1-aug-and-prep-with-validation-set.multiclass/test/' + df['filename']
    df.drop(columns=['filename'], inplace=True)
    label_cols = df.columns[1:]  # Exclude filepath

    # More robust way to find the label:
    df['label'] = ''  # Initialize an empty label column
    for index, row in df.iterrows():  # Iterate through each row
        for col in label_cols:
            if row[col] == 1:
                df.loc[index, 'label'] = col  # Assign correct label string
                break  # Stop checking once label is found
    df.drop(columns=df.columns[:-2], inplace=True)
    print(df)
    #df = df[['filepath', 'label']]

    try:
        df.to_csv(output_file, index=False)
        print(f"Processed '{input_file}' and saved to '{output_file}'.")
    except Exception as e:
        print(f"Error writing to output file: {e}")



# Example usage:
input_csv = "/content/drive/MyDrive/ASL Dataset.v4-v1-aug-and-prep-with-validation-set.multiclass/test/_classes.csv"
output_csv = "/content/drive/MyDrive/ASL Dataset.v4-v1-aug-and-prep-with-validation-set.multiclass/test/classes_procesed.csv"
process_csv(input_csv, output_csv)

#Another model training
The previous one did not have good test accuracy. Take landmark x_0, y_0 and z_0 (wrist) as origin and take the relative position to train the model.

In [None]:
# ====================
# TRAINING PIPELINE
# ====================
import cv2
import mediapipe as mp
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import classification_report

# 1. Load and preprocess training data
df = pd.read_csv('/content/drive/MyDrive/asl_landmarks_balanced.csv')

"""
There is a weird bug where colab saves the index as 'Unnamed'
This will mess up the shape so if it is saved as such, drop that column
"""
if df.columns[0]=='Unnamed: 0':
  df.drop(columns=df.columns[0],inplace = True)

# Convert absolute landmarks to relative (wrist as origin)
def convert_to_relative(landmarks):
    landmarks = np.array(landmarks).reshape(-1, 21, 3)
    wrist = landmarks[:, 0, :]
    relative = landmarks - wrist[:, np.newaxis, :]
    return relative[:, 1:, :].reshape(-1, 60)  # Remove wrist (now 0,0,0)

X_absolute = df.drop('label', axis=1).values
X = convert_to_relative(X_absolute)
y = df['label'].values

# 2. Encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# 3. Split and scale
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, stratify=y_encoded, random_state=42
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 4. Train model
model = XGBClassifier(
    n_estimators=1000,
    learning_rate=0.05,
    max_depth=7,
    subsample=0.8,
    colsample_bytree=0.8,
    tree_method='hist',
    eval_metric='mlogloss',
    early_stopping_rounds=50,
    use_label_encoder=False
)

model.fit(X_train_scaled, y_train,
          eval_set=[(X_test_scaled, y_test)],
          verbose=True)

# 5. Save artifacts
joblib.dump(scaler, 'scaler_relative.pkl')
joblib.dump(le, 'label_encoder_relative.pkl')
model.save_model('asl_model_relative.json')


#For model inference

In [None]:
import cv2
import mediapipe as mp
import joblib
import xgboost as xgb
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin

class XGBoostCompatWrapper(BaseEstimator, ClassifierMixin):
    """Wrapper to ensure full sklearn compatibility"""
    def __init__(self, model_path):
        self.model = xgb.Booster()
        self.model.load_model(model_path)

    def predict_proba(self, X):
        dmat = xgb.DMatrix(X)
        return self.model.predict(dmat)

    def predict(self, X):
        return np.argmax(self.predict_proba(X), axis=1)

class ASLClassifier:
    def __init__(self):
        # Initialize MediaPipe
        self.hands = mp.solutions.hands.Hands(
            static_image_mode=True,
            max_num_hands=2,
            min_detection_confidence=0.5
        )

        # Load preprocessing artifacts
        self.scaler = joblib.load('scaler_relative.pkl')
        self.le = joblib.load('label_encoder_relative.pkl')

        # Load XGBoost model with compatibility wrapper
        self.model = XGBoostCompatWrapper('asl_model_relative.json')

    def _process_landmarks(self, landmarks):
        """Convert absolute landmarks to relative format"""
        landmarks = np.array(landmarks).reshape(1, 21, 3)
        wrist = landmarks[:, 0, :]
        relative = landmarks - wrist
        return relative[:, 1:, :].reshape(1, 60)

    def predict(self, image_path):
        try:
            # Load and process image
            image = cv2.imread(image_path)
            if image is None:
                return "Invalid image", 0.0

            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            results = self.hands.process(image)

            if not results.multi_hand_landmarks:
                return "No hands detected", 0.0

            # Extract landmarks
            raw_landmarks = []
            for landmark in results.multi_hand_landmarks[0].landmark:
                raw_landmarks.extend([landmark.x, landmark.y, landmark.z])

            if len(raw_landmarks) != 63:
                return "Invalid landmarks", 0.0

            # Process and predict
            processed = self._process_landmarks(raw_landmarks)
            scaled = self.scaler.transform(processed)

            proba = self.model.predict_proba(scaled)[0]
            pred_class = self.le.inverse_transform([np.argmax(proba)])[0]
            return pred_class, np.max(proba)

        except Exception as e:
            return f"Error: {str(e)}", 0.0

#Testing

In [None]:
# Load test data
test_df = pd.read_csv('/content/drive/MyDrive/ASL Dataset.v4-v1-aug-and-prep-with-validation-set.multiclass/test/classes_procesed.csv')

# Initialize classifier
classifier = ASLClassifier()

# Process test images
y_true = []
y_pred = []
confidences = []

for _, row in test_df.iterrows():
    pred, conf = classifier.predict(row['filepath'])
    y_true.append(row['label'])
    y_pred.append(pred)
    confidences.append(conf)

# Generate report
print("\nClassification Report:")
print(classification_report(y_true, y_pred,))

# Calculate accuracy
accuracy = np.mean(np.array(y_true) == np.array(y_pred))
print(f"\nOverall Accuracy: {accuracy:.4f}")
print(f"Average Confidence: {np.mean(confidences):.4f}")

# Error analysis
error_mask = np.array(y_true) != np.array(y_pred)
print("\nTop Misclassified Examples:")
for path, true, pred in zip(test_df[error_mask]['filepath'],
                            np.array(y_true)[error_mask],
                            np.array(y_pred)[error_mask]):
    print(f"{path}: {true} -> {pred}")