In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import joblib

In [3]:
# 1. Load the CSV
csv_path = "hand_keypoints_xy_dataset.csv"
df = pd.read_csv(csv_path)

# 2. Extract keypoints and labels
X = df.drop("label", axis=1).values
y = df["label"].values

# 3. Normalize keypoints relative to bounding box
def normalize_keypoints(keypoints):
    keypoints = keypoints.reshape(21, 2)
    x_coords = keypoints[:, 0]
    y_coords = keypoints[:, 1]

    x_min, x_max = x_coords.min(), x_coords.max()
    y_min, y_max = y_coords.min(), y_coords.max()

    width = x_max - x_min
    height = y_max - y_min

    # Avoid division by zero
    width = width if width != 0 else 1e-6
    height = height if height != 0 else 1e-6

    keypoints[:, 0] = (x_coords - x_min) / width
    keypoints[:, 1] = (y_coords - y_min) / height
    return keypoints.flatten()

X_norm = np.array([normalize_keypoints(sample) for sample in X])

# 4. Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# 5. Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_norm)

# Save the scaler and label encoder
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')

# 6. Split dataset
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)


In [4]:
# 7. Create PyTorch Dataset
class HandKeypointDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = HandKeypointDataset(X_train, y_train)
test_dataset = HandKeypointDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

# 8. Define a simple dense neural network
class PashtoSignClassifier(nn.Module):
    def __init__(self, input_size=42, num_classes=len(np.unique(y_encoded))):
        super(PashtoSignClassifier, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        return self.model(x)

model = PashtoSignClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [5]:
# 9. Train the model
epochs = 100
for epoch in range(epochs):
    model.train()
    total_loss = 0
    correct = 0
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        correct += (preds == batch_y).sum().item()

    acc = correct / len(train_dataset)
    print(f"Epoch {epoch+1}/{epochs} - Loss: {total_loss:.4f} - Accuracy: {acc:.4f}")

# 10. Save the trained model
torch.save(model.state_dict(), "gesture_classifier.pth")
print("✅ Model saved as gesture_classifier.pth")

Epoch 1/100 - Loss: 453.2146 - Accuracy: 0.3900
Epoch 2/100 - Loss: 188.3826 - Accuracy: 0.7048
Epoch 3/100 - Loss: 139.2650 - Accuracy: 0.7757
Epoch 4/100 - Loss: 115.2219 - Accuracy: 0.8171
Epoch 5/100 - Loss: 102.6006 - Accuracy: 0.8425
Epoch 6/100 - Loss: 94.5724 - Accuracy: 0.8513
Epoch 7/100 - Loss: 86.2952 - Accuracy: 0.8605
Epoch 8/100 - Loss: 79.8466 - Accuracy: 0.8741
Epoch 9/100 - Loss: 76.3992 - Accuracy: 0.8798
Epoch 10/100 - Loss: 71.3558 - Accuracy: 0.8852
Epoch 11/100 - Loss: 67.0358 - Accuracy: 0.8916
Epoch 12/100 - Loss: 64.7149 - Accuracy: 0.9006
Epoch 13/100 - Loss: 60.8593 - Accuracy: 0.9016
Epoch 14/100 - Loss: 58.4216 - Accuracy: 0.9075
Epoch 15/100 - Loss: 56.0912 - Accuracy: 0.9104
Epoch 16/100 - Loss: 53.9604 - Accuracy: 0.9145
Epoch 17/100 - Loss: 52.4011 - Accuracy: 0.9155
Epoch 18/100 - Loss: 51.7641 - Accuracy: 0.9193
Epoch 19/100 - Loss: 50.4821 - Accuracy: 0.9183
Epoch 20/100 - Loss: 48.2360 - Accuracy: 0.9204
Epoch 21/100 - Loss: 46.8544 - Accuracy: 0.9

In [6]:
# Load the saved model weights
model.load_state_dict(torch.load("gesture_classifier.pth"))
model.eval()

# Evaluate on the test set
correct = 0
total = 0
with torch.no_grad():
    for batch_X, batch_y in test_loader:
        outputs = model(batch_X)
        _, preds = torch.max(outputs, 1)
        correct += (preds == batch_y).sum().item()
        total += batch_y.size(0)

test_accuracy = correct / total
print(f"Test Accuracy: {test_accuracy:.4f}")

Test Accuracy: 0.9777
