In [1]:
import os
import zipfile
import cv2
import mediapipe as mp
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm
import pickle
import csv
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

In [2]:
try:
    import kaggle
except (OSError, ModuleNotFoundError):
    print("Kaggle API not found. Please run 'pip install kaggle' and set up your kaggle.json.")
    raise

print("Cell 1: All libraries imported successfully.")


Cell 1: All libraries imported successfully.


In [None]:
KAGGLE_DATASET = "alvinbintang/sibi-dataset"
DATASET_FOLDER = "SIBI_Alphabet_Dataset"
DATASET_PATH = os.path.join("../",DATASET_FOLDER, "train")

if not os.path.exists(DATASET_FOLDER):
    print(f"Downloading and unzipping '{KAGGLE_DATASET}'...")
    try:
        # The kaggle API downloads the file as 'sibi-dataset.zip'
        kaggle.api.dataset_download_files(KAGGLE_DATASET, path='.', unzip=False, quiet=False)
        zip_filename = "sibi-dataset.zip"
        with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
            zip_ref.extractall(DATASET_FOLDER)
        os.remove(zip_filename) # Clean up the zip file
        print("Download and extraction complete.")
    except Exception as e:
        print(f"An error occurred during download/unzip: {e}")
        raise
else:
    print(f"Dataset folder '{DATASET_FOLDER}' already exists.")

if not os.path.exists(DATASET_PATH):
    raise FileNotFoundError(f"Error: Training path not found at '{DATASET_PATH}'.")

print("Cell 2: SIBI Alphabet Dataset is ready.")

Dataset folder 'SIBI_Alphabet_Dataset' already exists.
Cell 2: SIBI Alphabet Dataset is ready.


In [4]:
RAW_CSV_FILE = "sibi_alphabet_landmarks.csv"

if not os.path.exists(RAW_CSV_FILE):
    print(f"'{RAW_CSV_FILE}' not found. Starting image processing...")
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.5)
    
    processed_data = []
    gesture_labels = sorted([d for d in os.listdir(DATASET_PATH) if os.path.isdir(os.path.join(DATASET_PATH, d))])

    for label in tqdm(gesture_labels, desc="Processing Labels"):
        label_path = os.path.join(DATASET_PATH, label)
        for image_file in os.listdir(label_path):
            image_path = os.path.join(label_path, image_file)
            image = cv2.imread(image_path)
            if image is None: continue
            
            results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
            if results.multi_hand_landmarks:
                hand_landmarks = results.multi_hand_landmarks[0]
                landmarks = np.array([[lm.x, lm.y, lm.z] for lm in hand_landmarks.landmark]).flatten()
                processed_data.append([label] + list(landmarks))

    with open(RAW_CSV_FILE, 'w', newline='') as f:
        writer = csv.writer(f)
        headers = ['label'] + [f'{axis}{i}' for i in range(21) for axis in ['x', 'y', 'z']]
        writer.writerow(headers)
        writer.writerows(processed_data)
    
    hands.close()
    print(f"Raw landmark data saved to '{RAW_CSV_FILE}'.")
else:
    print(f"'{RAW_CSV_FILE}' already exists. Skipping image processing.")

print("Cell 3: Raw data processing complete.")


'sibi_alphabet_landmarks.csv' already exists. Skipping image processing.
Cell 3: Raw data processing complete.


In [5]:
print("--- Preprocessing and Feature Engineering ---")
df = pd.read_csv(RAW_CSV_FILE)
X = df.drop('label', axis=1).values
y = df['label'].values

# Feature Engineering: Normalize landmarks relative to the wrist and hand size
X_processed = []
for row in X:
    landmarks = row.reshape(21, 3)
    wrist = landmarks[0]
    relative_landmarks = landmarks - wrist
    max_dist = np.max(np.linalg.norm(relative_landmarks, axis=1))
    if max_dist == 0: max_dist = 1
    normalized_landmarks = relative_landmarks / max_dist
    X_processed.append(normalized_landmarks.flatten())

X_processed = np.array(X_processed)

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X_processed, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

print(f"Data preprocessed. Training features shape: {X_train.shape}")
print("Cell 4: Preprocessing complete.")

--- Preprocessing and Feature Engineering ---
Data preprocessed. Training features shape: (1965, 63)
Cell 4: Preprocessing complete.


In [6]:
print("--- Modeling ---")
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

callbacks = [
    EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001)
]

history = model.fit(X_train, y_train,
                    epochs=100,
                    batch_size=32,
                    validation_data=(X_test, y_test),
                    callbacks=callbacks)

print("Cell 5: Model training complete.")

--- Modeling ---


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.0809 - loss: 3.0758 - val_accuracy: 0.3232 - val_loss: 2.7965 - learning_rate: 0.0010
Epoch 2/100
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1913 - loss: 2.7141 - val_accuracy: 0.4207 - val_loss: 2.3195 - learning_rate: 0.0010
Epoch 3/100
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.2799 - loss: 2.3665 - val_accuracy: 0.6565 - val_loss: 1.9198 - learning_rate: 0.0010
Epoch 4/100
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3567 - loss: 2.0618 - val_accuracy: 0.6992 - val_loss: 1.5692 - learning_rate: 0.0010
Epoch 5/100
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4270 - loss: 1.8008 - val_accuracy: 0.7500 - val_loss: 1.2683 - learning_rate: 0.0010
Epoch 6/100
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m

In [7]:
print("--- Model Validation ---")
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"\nModel Accuracy on Test Set: {accuracy * 100:.2f}%")

y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))
print("Cell 6: Validation complete.")

--- Model Validation ---

Model Accuracy on Test Set: 97.36%
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 

Classification Report:
              precision    recall  f1-score   support

           A       1.00      1.00      1.00        28
           B       0.96      1.00      0.98        24
           C       1.00      0.89      0.94        18
           D       0.96      1.00      0.98        22
           E       1.00      1.00      1.00        26
           F       1.00      1.00      1.00        22
           G       0.95      1.00      0.98        20
           H       1.00      1.00      1.00        21
           I       1.00      1.00      1.00        22
           K       1.00      1.00      1.00        19
           L       0.95      1.00      0.98        20
           M       1.00      0.95      0.98        22
           N       0.92      1.00      0.96        22
           O       0.89      1.00      0.94        17
           P       1.00      0

In [8]:
print("--- Exporting Artifacts ---")
STATIC_MODEL_FILE = 'sibi_static_alphabet_model.h5'
STATIC_ENCODER_FILE = 'sibi_static_alphabet_encoder.pkl'

model.save(STATIC_MODEL_FILE)
with open(STATIC_ENCODER_FILE, 'wb') as f:
    pickle.dump(label_encoder, f)

print(f"Model saved to '{STATIC_MODEL_FILE}'")
print(f"Label encoder saved to '{STATIC_ENCODER_FILE}'")
print("Cell 7: Export complete.")



--- Exporting Artifacts ---
Model saved to 'sibi_static_alphabet_model.h5'
Label encoder saved to 'sibi_static_alphabet_encoder.pkl'
Cell 7: Export complete.


In [None]:
live_hands = mp.solutions.hands.Hands(model_complexity=0, min_detection_confidence=0.5, min_tracking_confidence=0.5)
cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)

while cap.isOpened():
    success, image = cap.read()
    if not success: continue

    image = cv2.flip(image, 1)
    results = live_hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

    prediction_text = "No hand detected"
    if results.multi_hand_landmarks:
        hand_landmarks = results.multi_hand_landmarks[0]
        mp.solutions.drawing_utils.draw_landmarks(image, hand_landmarks, mp.solutions.hands.HAND_CONNECTIONS)
        
        # Preprocess landmarks for prediction (MUST MATCH TRAINING)
        landmarks = np.array([[lm.x, lm.y, lm.z] for lm in hand_landmarks.landmark])
        wrist = landmarks[0]
        relative_landmarks = landmarks - wrist
        max_dist = np.max(np.linalg.norm(relative_landmarks, axis=1))
        if max_dist == 0: max_dist = 1
        normalized_landmarks = relative_landmarks / max_dist
        
        landmarks_input = np.expand_dims(normalized_landmarks.flatten(), axis=0)
        
        # Predict
        prediction = model.predict(landmarks_input, verbose=0)
        predicted_class_index = np.argmax(prediction)
        confidence = np.max(prediction)
        
        if confidence > 0.7: # Confidence threshold
            predicted_letter = label_encoder.inverse_transform([predicted_class_index])[0]
            prediction_text = f'{predicted_letter} ({confidence:.2f})'
        else:
            prediction_text = 'Uncertain'

    # Display Prediction
    cv2.rectangle(image, (0, 0), (300, 60), (0, 0, 0), -1)
    cv2.putText(image, prediction_text, (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (255, 255, 255), 2, cv2.LINE_AA)
    cv2.imshow('Phase 1: SIBI Alphabet Recognition', image)

    if cv2.waitKey(5) & 0xFF == ord('q'): break

cap.release()
cv2.destroyAllWindows()
live_hands.close()
print("Webcam feed stopped.")



KeyboardInterrupt: 

: 