In [7]:
import pickle
import numpy as np
from collections import Counter
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [8]:
data_dict = pickle.load(open('../asl_keypoints_dataset.pickle', 'rb'))

data = np.asarray(data_dict['data'])
labels = np.asarray(data_dict['labels'])

print("Dataset loaded!")
print("Shape of data:", data.shape)
print("Number of labels:", len(labels))

# Count samples per class
label_counts = Counter(labels)

# Keep only classes with >= 2 samples
valid_indices = [i for i, label in enumerate(labels) if label_counts[label] >= 2]
filtered_data = data[valid_indices]
filtered_labels = labels[valid_indices]

print("Filtered dataset shape:", filtered_data.shape)
print("Number of labels after filtering:", len(filtered_labels))

Dataset loaded!
Shape of data: (73283, 42)
Number of labels: 73283
Filtered dataset shape: (73283, 42)
Number of labels after filtering: 73283


In [9]:
x_train, x_test, y_train, y_test = train_test_split(
    filtered_data, filtered_labels, test_size=0.2, shuffle=True, stratify=filtered_labels
)

print(f"Training samples: {x_train.shape[0]}")
print(f"Testing samples: {x_test.shape[0]}")

Training samples: 58626
Testing samples: 14657


In [10]:
model = RandomForestClassifier()
model.fit(x_train, y_train)
print("Model training complete!")

Model training complete!


In [11]:
y_predict = model.predict(x_test)
score = accuracy_score(y_predict, y_test)
print(f"{score * 100:.2f}% of samples were classified correctly!")

98.62% of samples were classified correctly!


In [12]:
with open('../model.p', 'wb') as f:
    pickle.dump({'model': model}, f)

print("Model saved to model.p")

Model saved to model.p
