Import Libraries and Load Data

In [44]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the data with a semicolon as the delimiter
data = pd.read_csv('combined_data.csv', delimiter=';')  # Replace 'your_data.csv' with the path to your CSV file

# Display the first few rows to verify the data has been loaded correctly
print(data.head())


  Thumb Curl    Thumb Direction Index Curl   Index Direction Middle Curl  \
0  Half Curl  Diagonal Up Right  Full Curl  Diagonal Up Left   Full Curl   
1  Half Curl  Diagonal Up Right  Full Curl  Diagonal Up Left   Full Curl   
2  Half Curl  Diagonal Up Right  Full Curl  Diagonal Up Left   Full Curl   
3  Half Curl  Diagonal Up Right  Full Curl  Diagonal Up Left   Full Curl   
4  Half Curl  Diagonal Up Right  Full Curl  Diagonal Up Left   Full Curl   

  Middle Direction  Ring Curl Ring Direction Pinky Curl Pinky Direction Label  
0      Vertical Up  Full Curl    Vertical Up    No Curl     Vertical Up     I  
1      Vertical Up  Full Curl    Vertical Up    No Curl     Vertical Up     I  
2      Vertical Up  Full Curl    Vertical Up    No Curl     Vertical Up     I  
3      Vertical Up  Full Curl    Vertical Up    No Curl     Vertical Up     I  
4      Vertical Up  Full Curl    Vertical Up    No Curl     Vertical Up     I  


Encode Categorical Features

In [45]:
# Initialize LabelEncoder
le = LabelEncoder()

# Encode each categorical column
for column in data.columns[:-1]:  # Exclude the 'Label' column
    data[column] = le.fit_transform(data[column].astype(str))  # Ensure the column is string before encoding

# Encode the labels as well
data['Label'] = le.fit_transform(data['Label'].astype(str))

# Display the first few rows to verify the encoding
print(data.head())

   Thumb Curl  Thumb Direction  Index Curl  Index Direction  Middle Curl  \
0           0                2           0                0            0   
1           0                2           0                0            0   
2           0                2           0                0            0   
3           0                2           0                0            0   
4           0                2           0                0            0   

   Middle Direction  Ring Curl  Ring Direction  Pinky Curl  Pinky Direction  \
0                 4          0               4           2                4   
1                 4          0               4           2                4   
2                 4          0               4           2                4   
3                 4          0               4           2                4   
4                 4          0               4           2                4   

   Label  
0      8  
1      8  
2      8  
3      8  
4      8  


Split Data into Training and Testing Sets

In [46]:
# Split the data into features (X) and target (y)
X = data.iloc[:, :-1]  # All columns except the last one
y = data['Label']      # The last column is the target

# Split into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Display the shape of the datasets
print(f'Training data shape: {X_train.shape}')
print(f'Testing data shape: {X_test.shape}')


Training data shape: (3998, 10)
Testing data shape: (1000, 10)


Train the Random Forest Classifier

In [47]:
# Initialize the Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

# Predict on the test data
y_pred = rf_model.predict(X_test)

# Display the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Print the classification report for more detailed metrics
print(classification_report(y_test, y_pred, target_names=le.inverse_transform(range(len(le.classes_)))))


Accuracy: 0.94
              precision    recall  f1-score   support

           A       0.98      0.93      0.95        56
           B       1.00      1.00      1.00         5
           C       0.87      1.00      0.93        27
           D       1.00      1.00      1.00        36
           E       1.00      1.00      1.00        50
           F       1.00      1.00      1.00        28
           G       0.97      1.00      0.98        31
           H       1.00      1.00      1.00        39
           I       1.00      1.00      1.00        49
           K       0.90      0.98      0.94        58
           L       1.00      1.00      1.00        36
           M       0.71      1.00      0.83        29
           N       0.98      0.79      0.88        71
           O       1.00      0.92      0.96        48
           P       1.00      1.00      1.00        31
           Q       1.00      0.98      0.99        59
           R       0.68      0.85      0.76        60
           S

Saving the model

In [48]:
import joblib

# Save the model to a file
joblib.dump(rf_model, 'asl_rf_model.pkl')

print("Model saved to 'asl_rf_model.pkl'")

Model saved to 'asl_rf_model.pkl'
