**Importing Libraries**

In [None]:
# Install gdown library
!pip install gdown

# For data loading and analysis
import pandas as pd
import numpy as np

# For downloading and unzipping
import gdown

# For model training
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# For saving the model
import joblib
from google.colab import files



**Downloading the dataset for 2d mapping**

In [None]:
url = 'https://drive.google.com/uc?id=10gaNXaGy25fZMiRUlHILw5Kzi0g0Tw0Z'
output = 'hand_keypoints.csv'
gdown.download(url, output, quiet=False)
csv_file_name = 'hand_keypoints.csv'
print("\nData downloaded.")

Downloading...
From: https://drive.google.com/uc?id=10gaNXaGy25fZMiRUlHILw5Kzi0g0Tw0Z
To: /content/hand_keypoints.csv
100%|██████████| 11.0M/11.0M [00:00<00:00, 46.3MB/s]


Data downloaded.





**Filtering out the space in the csv**

In [None]:
try:
    data = pd.read_csv(csv_file_name, sep=',')
    print(f"Successfully loaded '{csv_file_name}'.")
    print("First 5 rows of data:\n", data.head())

    df_alphabet = data[data['folder'] != 'space'].copy()
    print(f"Filtered out 'space' class. Working with {len(df_alphabet)} alphabet samples.")

except FileNotFoundError:
    print(f"ERROR: Could not find '{csv_file_name}'. Please re-run Block 2.")

print("Data is loaded and filtered.")

Successfully loaded 'hand_keypoints.csv'.
First 5 rows of data:
   folder filename        x0        y0        x1        y1        x2        y2  \
0      A    0.jpg  0.195703  0.405481  0.251328  0.378401  0.293216  0.316613   
1      A    1.jpg  0.195529  0.405478  0.250318  0.381913  0.292142  0.316723   
2      A   10.jpg  0.199760  0.439257  0.256980  0.407103  0.297316  0.346919   
3      A  100.jpg  0.434312  0.610718  0.486101  0.579385  0.526760  0.509808   
4      A  101.jpg  0.434486  0.611838  0.485553  0.581089  0.526583  0.511406   

         x3        y3  ...       x16       y16       x17       y17       x18  \
0  0.300587  0.247013  ...  0.202290  0.343694  0.173678  0.279436  0.178582   
1  0.299324  0.245252  ...  0.204136  0.345952  0.173469  0.278215  0.178356   
2  0.303871  0.277043  ...  0.206453  0.376744  0.175420  0.311579  0.179300   
3  0.545438  0.448159  ...  0.443862  0.549600  0.407171  0.465315  0.419606   
4  0.545746  0.448663  ...  0.445705  0.549681  

**Defining Features (X) & Target (y)**

In [None]:
X = df_alphabet.drop(['folder', 'filename'], axis=1)

y = df_alphabet['folder']

print("Features (X) and Target (y) are now defined.")
print(f"Feature (X) shape: {X.shape}") # Should be (12950, 42)
print(f"Target (y) shape: {y.shape}")   # Should be (12950,)
print(f"\nFirst 5 rows of X (Features):\n {X.head()}")
print(f"\nFirst 5 rows of y (Target):\n {y.head()}")

print("\nX and y are ready for training.")

Features (X) and Target (y) are now defined.
Feature (X) shape: (12950, 42)
Target (y) shape: (12950,)

First 5 rows of X (Features):
          x0        y0        x1        y1        x2        y2        x3  \
0  0.195703  0.405481  0.251328  0.378401  0.293216  0.316613  0.300587   
1  0.195529  0.405478  0.250318  0.381913  0.292142  0.316723  0.299324   
2  0.199760  0.439257  0.256980  0.407103  0.297316  0.346919  0.303871   
3  0.434312  0.610718  0.486101  0.579385  0.526760  0.509808  0.545438   
4  0.434486  0.611838  0.485553  0.581089  0.526583  0.511406  0.545746   

         y3        x4        y4  ...       x16       y16       x17       y17  \
0  0.247013  0.307915  0.191208  ...  0.202290  0.343694  0.173678  0.279436   
1  0.245252  0.307751  0.190003  ...  0.204136  0.345952  0.173469  0.278215   
2  0.277043  0.311055  0.220881  ...  0.206453  0.376744  0.175420  0.311579   
3  0.448159  0.559467  0.397122  ...  0.443862  0.549600  0.407171  0.465315   
4  0.448663  0

**Training and Testing**

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Data split: {len(X_train)} training samples, {len(X_test)} testing samples.")

model = RandomForestClassifier(n_estimators=100, random_state=42)

print("Starting model training")
model.fit(X_train, y_train)
print("Training complete!")

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"\n--- Model Accuracy on Test Data: {accuracy * 100:.2f}% ---")

print("\nModel is trained and tested.")

Data split: 10360 training samples, 2590 testing samples.
Starting model training
Training complete!

--- Model Accuracy on Test Data: 99.88% ---

Model is trained and tested.


**Saving the Model**

In [None]:
import joblib
from google.colab import files

model_filename = 'asl_model.joblib'

# Save the model to a file
joblib.dump(model, model_filename)
print(f"Model saved to file: {model_filename}")

# Download the model file to your computer
files.download(model_filename)
print("A download prompt should appear. Please save the file.")

print("\n")

Model saved to file: asl_model.joblib


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

A download prompt should appear. Please save the file.


