Train and test data are inside combined_data.csv file. 

In [11]:
import pandas as pd

df = pd.read_csv('./combined_data.csv', delimiter=';')

In [12]:
df_encoded = pd.get_dummies(df, columns=['Thumb Curl', 'Thumb Direction', 'Index Curl', 'Index Direction', 
                                         'Middle Curl', 'Middle Direction', 'Ring Curl', 'Ring Direction', 
                                         'Pinky Curl', 'Pinky Direction'])

In [17]:
from sklearn.model_selection import train_test_split

X = df_encoded.drop('Label', axis=1)
y = df_encoded['Label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [18]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Initialize the model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.939
Classification Report:
               precision    recall  f1-score   support

           A       0.98      0.93      0.95        56
           B       1.00      1.00      1.00         5
           C       0.87      1.00      0.93        27
           D       1.00      1.00      1.00        36
           E       1.00      1.00      1.00        50
           F       1.00      1.00      1.00        28
           G       0.97      1.00      0.98        31
           H       1.00      1.00      1.00        39
           I       1.00      1.00      1.00        49
           K       0.90      0.98      0.94        58
           L       1.00      1.00      1.00        36
           M       0.71      1.00      0.83        29
           N       0.98      0.79      0.88        71
           O       1.00      0.92      0.96        48
           P       1.00      1.00      1.00        31
           Q       1.00      0.98      0.99        59
           R       0.68      0.85      0.

In [19]:
from sklearn.model_selection import GridSearchCV

# Define the parameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [10, 20, 30],
}

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=RandomForestClassifier(random_state=42),
                           param_grid=param_grid,
                           cv=5,
                           n_jobs=-1,
                           verbose=2)

# Fit GridSearchCV
grid_search.fit(X_train, y_train)

# Get the best parameters
print("Best Parameters:", grid_search.best_params_)


Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV] END ......................max_depth=10, n_estimators=50; total time=   0.1s
[CV] END ......................max_depth=10, n_estimators=50; total time=   0.1s
[CV] END ......................max_depth=10, n_estimators=50; total time=   0.1s
[CV] END .....................max_depth=10, n_estimators=100; total time=   0.2s
[CV] END .....................max_depth=10, n_estimators=200; total time=   0.3s
[CV] END ......................max_depth=10, n_estimators=50; total time=   0.1s
[CV] END ......................max_depth=10, n_estimators=50; total time=   0.1s
[CV] END .....................max_depth=10, n_estimators=100; total time=   0.2s
[CV] END ......................max_depth=20, n_estimators=50; total time=   0.1s
[CV] END .....................max_depth=10, n_estimators=100; total time=   0.2s
[CV] END .....................max_depth=10, n_estimators=100; total time=   0.2s
[CV] END ......................max_depth=20, n_es

In [20]:
import joblib

# Save the model
joblib.dump(model, 'random_forest_model.pkl')

# Load the model
loaded_model = joblib.load('random_forest_model.pkl')
