# Gesture Classification using Random Forest Classifier (Integral)

## Model Training

In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
import pickle

In [2]:
# Define a function to load all the CSVs
def load_csv_to_lists(gesture_folder_path:str)->list:
    
    """
    Load CSV files into List of DataFrams, calculate relative time for each reading
    
    Parameters:
        - gesture_folder_path (str): Path to the folder containing subfolders correspond to each person (class) and each containing csv files.
    
    Returns:
        - X(list): List of dataframe correspond to each reading csv 
        - y(list): List of label correspond to each dataframe 
    """
    
    X = []
    y = []
    # for person 1 to 5: 
    for label in range(1, 6):
        # generate path from subfolder 1 to 5, eg "Gesture/1"
        folder_path = f"{gesture_folder_path}/{label}"
        # for each csv file in the folder: 
        for file_name in os.listdir(folder_path):
                # ignore .DS_Store file or other hidden file
                if not file_name.startswith('.'): 
                    file_path = f"{folder_path}/{file_name}"
                    df = pd.read_csv(file_path)
                    # Calculate the relative time difference for each row of reading 
                    # compared to the 1st row of reading in each CSV file / reading 
                    # use it as a feature
                    df['relative_time'] = df['Timestamp'].apply(lambda x: (x - df['Timestamp'][0]))
                    df = df.drop(["Timestamp"], axis=1)  # Drop "Timestamp" columns as features as it is an absolute time
                    X.append(df)
                    y.append(label) 
                    
    return X, y

In [3]:
# Path to the folder containing the training data
gesture_folder_path = "Gesture" 

# Load the data  
X, y = load_csv_to_lists(gesture_folder_path)

In [4]:
# Inspect X
len(X)

500

In [5]:
# Inspect the first element of X
X[0].head()

Unnamed: 0,AccelX,AccelY,AccelZ,GyroX,GyroY,GyroZ,relative_time
0,4.419696,7.029675,5.210082,3.634725,-5.124854,-0.266185,0
1,4.419696,7.029675,5.210082,3.634725,-5.124854,-0.266185,2528035
2,4.419696,7.029675,5.210082,3.634725,-5.124854,-0.266185,4522535
3,4.419696,7.029675,5.210082,3.634725,-5.124854,-0.266185,6522868
4,4.419696,7.029675,5.210082,3.634725,-5.124854,-0.266185,8523118


In [6]:
# Inspect the length of each dataframe
len(X[0])

2868

In [7]:
len(X[1])

2842

In [8]:
# Get the average length of each data frame in x 
total_length = sum(len(df) for df in X)
average_length = total_length / len(X)

print(average_length)

2859.882


In [9]:
# Find the minimum length of the dataframe in the list
min_length = min(len(df) for df in X)
print(min_length)

2719


In [10]:
# For each dataframe (reading) in the X, remove the first 10 reading, and keep 2700 reading
standardized_X = [df.iloc[10:2710, :] for df in X]

len(standardized_X[0])

2700

In [11]:
# Flatten the List 
flattened_X = [df.to_numpy().flatten().tolist() for df in standardized_X]

In [12]:
flattened_X[0][:7]

[5.3800707, 7.266701, 5.0329113, 3.6347246, -5.124854, -0.26618454, 20523962.0]

In [13]:
# Split data into training and testing sets (20% test, 80% train)
X_train, X_test, y_train, y_test = train_test_split(flattened_X, y, test_size=0.2, random_state=42)

In [14]:
# Initialize Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
clf.fit(X_train, y_train)

In [15]:
# Predict on test data
y_pred = clf.predict(X_test)

## Evaluate the Model

### Confusion Matrix

In [16]:
confusion_mat = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", confusion_mat)

Confusion Matrix:
 [[27  0  0  1  0]
 [ 0 14  0  0  0]
 [ 0  0 10  0  0]
 [ 0  0  0 24  0]
 [ 0  0  0  0 24]]


### Accuracy

In [17]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.99


### Precision

In [18]:
precision_none = precision_score(y_test, y_pred, average=None)
precision_macro = precision_score(y_test, y_pred, average='macro')
print("Precision (None):", precision_none)
print("Precision (macro):", precision_macro)

Precision (None): [1.   1.   1.   0.96 1.  ]
Precision (macro): 0.992


### Recall

In [19]:
recall_none = recall_score(y_test, y_pred, average=None)
recall_macro = recall_score(y_test, y_pred, average='macro')
print("Recall (None):", recall_none)
print("Recall (macro):", recall_macro)

Recall (None): [0.96428571 1.         1.         1.         1.        ]
Recall (macro): 0.9928571428571429


### F1 Score

In [20]:
f1_none = f1_score(y_test, y_pred, average=None)
f1_macro= f1_score(y_test, y_pred, average="macro")
print("F1 Score (None):", f1_none)
print("F1 Score (macro):", f1_macro)

F1 Score (None): [0.98181818 1.         1.         0.97959184 1.        ]
F1 Score (macro): 0.9922820037105751


## Prediction Function

In [21]:
def predict_new_gesture(csv_file_path: str, clf: object) -> int:
    
    """
    Read a CSV file of IMU sensor reading and predict the label for each line of reading using trained model clf
    
    Parameters:
        - csv_file_path (str): Path to the CSV contraining IMU sensor reading
    
    Returns:
        - result (int): predicted user
    """
    input_list = []
    df = pd.read_csv(csv_file_path)
    # Calculate the relative time difference for each row of reading
    # compared to the 1st row of reading in each CSV file / reading
    df['relative_time'] = df['Timestamp'].apply(
        lambda x: (x - df['Timestamp'][0]))
    df = df.drop(["Timestamp"], axis=1)  # Drop "Timestamp" columns as features
    df = df.iloc[10:2710, :] # standardize the length of the dataframe 
    flattened_list = df.to_numpy().flatten().tolist()
    input_list.append(flattened_list)
    
    result_list = clf.predict(input_list)
    return result_list[0]

## Test Cases

In [22]:
# Expected output: 2
print(predict_new_gesture("Gesture/2/1677688805233.csv", clf))

2


In [23]:
# Expected output: 3
print(predict_new_gesture("Gesture/3/1677689464170.csv", clf))

3


In [24]:
# Expected output: 5
print(predict_new_gesture("Gesture/5/1678081679872.csv", clf))

5


## Export the Model

In [25]:
# Save the trained model to a file
filename = 'gesture_random_forest_classifier_model_integral.pkl'
with open(filename, 'wb') as file:
    pickle.dump(clf, file)

In [26]:
# Load the trained model from the file
filename = 'gesture_random_forest_classifier_model_integral.pkl'
with open(filename, 'rb') as file:
    clf = pickle.load(file)