In [1]:
from typing import Tuple
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
from joblib import dump, load
import os




In [8]:
# In this code below I made a function to load in the data
def load_and_split_data(data_folder: str, test_size: float = 0.2, random_state: int = 42) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """
    Load pose data from a specified folder and split it into training and testing sets.

    Parameters:
    - data_folder (str): Path to the folder containing pose data.
    - test_size (float, optional): The proportion of the dataset to include in the test split.
    - random_state (int, optional): Seed used by the random number generator for reproducibility.

    Returns:
    - Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: 
      (X_train, X_test, y_train, y_test)
    """
    data = []
    labels = []

    # Here I loop through each subfolder
    for folder in os.listdir(data_folder):
        folder_path = os.path.join(data_folder, folder)
        
        # Here I am making sure that the current item in the directory is a folder
        if os.path.isdir(folder_path):
            # Looping through each file in the folder
            for file in os.listdir(folder_path):
                file_path = os.path.join(folder_path, file)
                
                # Checking if the file is a numpy file
                if file.endswith('.npy'):
                    # Loading in the numpy array from the file, allowing pickles
                    try:
                        pose_data = np.load(file_path, allow_pickle=True)
                        
                        # In this code below I used an if statement to 
                        # Check if the loaded data is a valid numpy array
                        if isinstance(pose_data, np.ndarray):
                            # In the code line below I apppend the data and corresponding label
                            data.append(pose_data)
                            labels.append(folder)
                        else:
                            print(f"Invalid numpy array in file: {file_path}")
                    except Exception as e:
                        print(f"Error loading file {file_path}: {str(e)}")

    # Convert lists to numpy arrays
    X = np.array(data)
    y = np.array(labels)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    return X_train, X_test, y_train, y_test


In [3]:
# splitting the data into train and test 
X_train, X_test, y_train, y_test = load_and_split_data('/Users/kecichilala/Desktop/actions', test_size=0.2, random_state=42)


In [4]:
X_train

array([[[ 0.01727152, -0.64834183, -0.16870694,  0.99104768],
        [ 0.02348802, -0.68450934, -0.15438059,  0.98798066],
        [ 0.02408011, -0.68517637, -0.15385909,  0.98862737],
        ...,
        [-0.09192229,  0.75708461,  0.22625235,  0.73771971],
        [ 0.09876573,  0.85107976,  0.03616261,  0.98374546],
        [-0.11502071,  0.84598893,  0.11303246,  0.97059816]],

       [[ 0.09178978, -0.58637238, -0.30197707,  0.99975485],
        [ 0.09928286, -0.62373847, -0.28285247,  0.99952638],
        [ 0.09971283, -0.62438381, -0.28197637,  0.99944645],
        ...,
        [-0.38175559,  0.52911276,  0.08904778,  0.95360029],
        [ 0.43095887,  0.62348497,  0.01781835,  0.9688006 ],
        [-0.46991715,  0.59813875,  0.00688588,  0.98351359]],

       [[-0.0356063 , -0.57199425, -0.37085268,  0.99962008],
        [-0.03796387, -0.60769457, -0.36410475,  0.99891007],
        [-0.03753307, -0.60822988, -0.36324757,  0.99881208],
        ...,
        [ 0.01608677,  0.70

In [5]:
# Flatten the 3D arrays to 2D for SVM
# Flattening the 3D arrays to 2D for SVM simplifies the input data structure,
# converting the spatial information of poses into a format compatible with
# linear SVM. This approach treats each key point's coordinate as an independent feature.
X_train_flatten = X_train.reshape(X_train.shape[0], -1)
X_test_flatten = X_test.reshape(X_test.shape[0], -1)

# Encoding class labels into numerical values is essential for machine learning models,
# allowing algorithms like SVM to work with categorical data, as they require numerical labels.
# The LabelEncoder from scikit-learn is used to transform class labels into a format suitable for training.
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Here I initialize SVM model
svm_model = SVC(kernel='linear', C=1.0, random_state=42)

# In this line of code I train the model
svm_model.fit(X_train_flatten, y_train_encoded)

# Now I am making predictions the test set
predictions = svm_model.predict(X_test_flatten)

# Decode numerical predictions to original class labels
decoded_predictions = label_encoder.inverse_transform(predictions)

# Evaluate the model
accuracy = accuracy_score(y_test, decoded_predictions)
report = classification_report(y_test, decoded_predictions)

print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:\n", report)


Accuracy: 1.00
Classification Report:
                      precision    recall  f1-score   support

        action_down       1.00      1.00      1.00       253
      action_inside       1.00      1.00      1.00       514
         action_new       1.00      1.00      1.00       397
     action_outside       1.00      1.00      1.00       284
action_remove_block       1.00      1.00      1.00       417
action_select_block       1.00      1.00      1.00       519
      action_switch       1.00      1.00      1.00       613
          action_up       1.00      1.00      1.00       577
   block_events_ran       1.00      1.00      1.00       475
   category_control       1.00      1.00      1.00       427
    category_events       1.00      1.00      1.00       373
     category_looks       0.99      1.00      1.00       262
    category_motion       1.00      1.00      1.00       378
     category_sound       1.00      1.00      1.00       399
              dummy       1.00      0.98     

In [6]:
# Flatten the 3D arrays to 2D for compatibility with SVM
# Flattening is necessary as SVM expects 2D input, treating each key point's coordinate as a separate feature.

X_test_flatten = X_test.reshape(X_test.shape[0], -1)

# Encode the class labels to numerical values for model compatibility
# Numerical encoding is crucial for SVM to work with categorical data, as it requires numerical labels for training.
y_test_encoded = label_encoder.transform(y_test)

# # Make predictions on the test set using the trained SVM model
# Using the trained SVM model to predict the labels of the test data.
test_predictions = svm_model.predict(X_test_flatten)

# Decode predictions
decoded_test_predictions = label_encoder.inverse_transform(test_predictions)

# Evaluate the model on the test data
accuracy_test_data = accuracy_score(y_test, decoded_test_predictions)
report_test_data = classification_report(y_test, decoded_test_predictions)

print(f"Accuracy on Test Data: {accuracy_test_data:.2f}")
print("Classification Report on Test Data:\n", report_test_data)


Accuracy on Test Data: 1.00
Classification Report on Test Data:
                      precision    recall  f1-score   support

        action_down       1.00      1.00      1.00       253
      action_inside       1.00      1.00      1.00       514
         action_new       1.00      1.00      1.00       397
     action_outside       1.00      1.00      1.00       284
action_remove_block       1.00      1.00      1.00       417
action_select_block       1.00      1.00      1.00       519
      action_switch       1.00      1.00      1.00       613
          action_up       1.00      1.00      1.00       577
   block_events_ran       1.00      1.00      1.00       475
   category_control       1.00      1.00      1.00       427
    category_events       1.00      1.00      1.00       373
     category_looks       0.99      1.00      1.00       262
    category_motion       1.00      1.00      1.00       378
     category_sound       1.00      1.00      1.00       399
              dummy

In [7]:
# Saving the trained SVM model on my desktop
desktop_path = os.path.join(os.path.expanduser('~'), 'Desktop')
model_filename = os.path.join(desktop_path, 'body_scatch_svm_model.joblib')
dump(svm_model, model_filename)

# Print the path to the saved model
print(f"Model saved at: {model_filename}")


Model saved at: /Users/kecichilala/Desktop/body_scatch_svm_model.joblib
