In [None]:
!pip install mediapipe opencv-python pandas tqdm




In [None]:
import os
import cv2
import mediapipe as mp
import pandas as pd
from tqdm import tqdm

In [None]:
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1)


In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


In [None]:
!pip install -q kaggle


In [1]:
!kaggle datasets download -d grassknoted/asl-alphabet


Traceback (most recent call last):
  File "/usr/local/bin/kaggle", line 10, in <module>
    sys.exit(main())
             ^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/kaggle/cli.py", line 68, in main
    out = args.func(**command_args)
          ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/kaggle/api/kaggle_api_extended.py", line 1741, in dataset_download_cli
    with self.build_kaggle_client() as kaggle:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/kaggle/api/kaggle_api_extended.py", line 688, in build_kaggle_client
    username=self.config_values['username'],
             ~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^
KeyError: 'username'


In [None]:
!unzip -q asl-alphabet.zip -d asl_data

In [None]:
import os
import cv2
import pickle
import mediapipe as mp

# Path to the ASL dataset folder
DATA_DIR = '/kaggle/input/asl-alphabet/asl_alphabet_train/asl_alphabet_train'

# MediaPipe setup
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

# Output storage
data = []
labels = []

# Limit to speed up
max_images_per_class = 300

# Process each class folder
for dir_ in sorted(os.listdir(DATA_DIR)):
    print(f"\n🔤 Processing letter: {dir_}")
    img_count = 0

    for img_name in os.listdir(os.path.join(DATA_DIR, dir_)):
        if img_count >= max_images_per_class:
            break

        img_path = os.path.join(DATA_DIR, dir_, img_name)
        img = cv2.imread(img_path)
        if img is None:
            continue  # Skip broken images

        # Resize and convert to RGB
        img = cv2.resize(img, (256, 256))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Detect hand landmarks
        results = hands.process(img_rgb)

        if results.multi_hand_landmarks:
            # Process only the first detected hand to maintain consistent feature vector length
            hand_landmarks = results.multi_hand_landmarks[0]
            data_aux = []
            x_ = []
            y_ = []

            for lm in hand_landmarks.landmark:
                x_.append(lm.x)
                y_.append(lm.y)
            for lm in hand_landmarks.landmark:
                data_aux.append(lm.x - min(x_))
                data_aux.append(lm.y - min(y_))

            data.append(data_aux)
            labels.append(dir_)
            img_count += 1

        if img_count % 50 == 0 and img_count > 0:
            print(f"  ✅ {img_count} images processed for '{dir_}'")

print(f"\n✅ Total samples collected: {len(data)}")

# Save as pickle file
with open('asl_landmarks.pickle', 'wb') as f:
    pickle.dump({'data': data, 'labels': labels}, f)

print("\n📦 Saved landmark dataset as 'asl_landmarks.pickle'")


🔤 Processing letter: A
  ✅ 50 images processed for 'A'
  ✅ 50 images processed for 'A'
  ✅ 100 images processed for 'A'
  ✅ 150 images processed for 'A'
  ✅ 200 images processed for 'A'
  ✅ 250 images processed for 'A'
  ✅ 300 images processed for 'A'

🔤 Processing letter: B
  ✅ 50 images processed for 'B'
  ✅ 100 images processed for 'B'
  ✅ 100 images processed for 'B'
  ✅ 150 images processed for 'B'
  ✅ 200 images processed for 'B'
  ✅ 250 images processed for 'B'
  ✅ 300 images processed for 'B'

🔤 Processing letter: C
  ✅ 50 images processed for 'C'
  ✅ 100 images processed for 'C'
  ✅ 150 images processed for 'C'
  ✅ 150 images processed for 'C'
  ✅ 200 images processed for 'C'
  ✅ 250 images processed for 'C'
  ✅ 300 images processed for 'C'

🔤 Processing letter: D
  ✅ 50 images processed for 'D'
  ✅ 100 images processed for 'D'
  ✅ 150 images processed for 'D'
  ✅ 150 images processed for 'D'
  ✅ 200 images processed for 'D'
  ✅ 250 images processed for 'D'
  ✅ 300 images pro

In [None]:
import pickle
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
import numpy as np

# Load your landmark data
with open('asl_landmarks.pickle', 'rb') as f:
    dataset = pickle.load(f)

X = dataset['data']
y = dataset['labels']

# Check if all data points have the same length
lengths = [len(x) for x in X]
if len(set(lengths)) > 1:
    print(f"Error: Data points have inconsistent lengths. Found lengths: {set(lengths)}")
    print("This is likely due to variations in the number of hands detected in the images.")
    print("Please revisit the data collection step (cell SYneibg6OZmK) to ensure consistent data representation.")
else:
    # Split into train/test
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train KNN model
    model = KNeighborsClassifier(n_neighbors=3)
    model.fit(X_train, y_train)

    # Evaluate
    y_pred = model.predict(X_test)
    print(f"\n✅ Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")
    print("\n📊 Classification Report:\n", classification_report(y_test, y_pred))

    # Save model
    with open('asl_knn_model.pickle', 'wb') as f:
        pickle.dump(model, f)

    print("\n💾 Model saved as 'asl_knn_model.pickle'")


✅ Accuracy: 97.33%

📊 Classification Report:
               precision    recall  f1-score   support

           A       0.96      0.98      0.97        65
           B       0.99      1.00      0.99        66
           C       1.00      0.98      0.99        59
           D       1.00      0.97      0.98        63
           E       1.00      0.98      0.99        65
           F       0.99      0.99      0.99        68
           G       0.96      1.00      0.98        51
           H       0.98      1.00      0.99        61
           I       0.93      0.96      0.95        54
           J       1.00      0.92      0.96        53
           K       1.00      1.00      1.00        58
           L       1.00      1.00      1.00        60
           M       0.90      0.86      0.88        51
           N       0.95      0.91      0.93        64
           O       0.98      1.00      0.99        57
           P       1.00      0.91      0.95        68
           Q       0.95      1.00 

In [None]:
import cv2
import pickle
import mediapipe as mp

# Load trained model
with open('asl_knn_model.pickle', 'rb') as f:
    model = pickle.load(f)

# MediaPipe setup
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.5)
mp_draw = mp.solutions.drawing_utils

# Webcam
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    h, w, _ = frame.shape
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process frame
    results = hands.process(frame_rgb)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            data_aux = []
            x_ = []
            y_ = []

            for lm in hand_landmarks.landmark:
                x_.append(lm.x)
                y_.append(lm.y)

            for lm in hand_landmarks.landmark:
                data_aux.append(lm.x - min(x_))
                data_aux.append(lm.y - min(y_))

            # Predict
            prediction = model.predict([data_aux])[0]

            # Draw landmarks
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Show prediction
            cv2.putText(frame, prediction, (10, 70), cv2.FONT_HERSHEY_SIMPLEX,
                        2, (0, 255, 0), 3, cv2.LINE_AA)

    cv2.imshow("ASL Recognition", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release
cap.release()
cv2.destroyAllWindows()
