In [None]:
#Import all the necessary libraries
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder

import pandas as pd
import numpy as np
import joblib

In [None]:
#Import the preprocessed dataset
df = pd.read_csv('preprocessed_dataset.csv')
#X holds all the features
X = df.drop('label', axis=1).values
#y holds all the labels
y = df['label'].values

In [None]:
#Split the data set into trainig and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

In [None]:
#Make a standard scaler to scale all the features and normalize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
#Use PCA to reduce the dimensionality of the data set
#This extracts the most important features while reducing noise
pca = PCA(n_components=32)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

In [None]:
# Label encoder is used to convert the string labels into categorical integers
le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)

In [None]:
#GridSearch for the Random Forest Classifier and determine the best parameters
rf_params = {
    'n_estimators': [50, 100, 200],          # number of trees in the forest
    'max_depth': [None, 10, 20],             # max depth of the tree
    'min_samples_split': [2, 5, 10]          # minimum number of samples required to split a node
}

rf_model = GridSearchCV(RandomForestClassifier(), rf_params, cv=5, scoring='accuracy')
rf_model.fit(X_train_pca, y_train_encoded)
print("Best RF params:", rf_model.best_params_)

Best RF params: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}


In [None]:
#Traint the best Random Forest Classifier
clf = RandomForestClassifier(n_estimators=200, max_depth= None, min_samples_split=2)
clf.fit(X_train_pca, y_train_encoded)

0,1,2
,n_estimators,200
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [None]:
#Show the results of the Random Forest model
y_pred = clf.predict(X_test_pca)
#Convert the predicted labels back to strings
y_pred_labels = le.inverse_transform(y_pred)

print("Accuracy:", accuracy_score(y_test, y_pred_labels))
print(classification_report(y_test, y_pred_labels))

Accuracy: 0.7346837944664032
              precision    recall  f1-score   support

        away       0.62      0.92      0.74       686
sitting down       0.81      0.59      0.68       208
  sitting up       0.74      0.56      0.64       204
    spinning       0.88      0.79      0.83       473
     towards       0.89      0.54      0.67       453

    accuracy                           0.73      2024
   macro avg       0.79      0.68      0.71      2024
weighted avg       0.77      0.73      0.73      2024



In [None]:
#GridSearch for the Knn classifier and determine the best parameters
svm_params = {
    'C': [0.1, 1, 10],                # regularization parameter, the larger the value, the stronger the regularization
    'kernel': ['linear', 'rbf'],  # The type of kernel for the algorithm and make the hyperplane
    'gamma': ['scale']  # This only applies to rbf, determines how far the influence of a single training example reaches
}

#Used the n_jobs parameter to help speed up the training process using all available CPU cores
svm_model = GridSearchCV(SVC(), svm_params, cv=3, scoring='accuracy', n_jobs= -1)
svm_model.fit(X_train_pca, y_train_encoded)
print("Best SVM params:", svm_model.best_params_)

Best SVM params: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}


In [None]:
#Train the best SVM model with the parameters from GridSearhc
svm_model = SVC(C=10, kernel='rbf', gamma='scale')
svm_model.fit(X_train_pca, y_train_encoded)

0,1,2
,C,10
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [None]:
#Show the results of the SVM model
y_pred = svm_model.predict(X_test_pca)
#Convert the predicted labels back to strings
y_pred_labels = le.inverse_transform(y_pred)

print("Accuracy:", accuracy_score(y_test, y_pred_labels))
print(classification_report(y_test, y_pred_labels))

Accuracy: 0.6151185770750988
              precision    recall  f1-score   support

        away       0.49      0.93      0.64       686
sitting down       0.77      0.32      0.45       208
  sitting up       0.82      0.31      0.45       204
    spinning       0.91      0.67      0.77       473
     towards       0.78      0.36      0.49       453

    accuracy                           0.62      2024
   macro avg       0.75      0.52      0.56      2024
weighted avg       0.71      0.62      0.60      2024



In [None]:
#GridSearch for the KNN classifier and determine the best parameters
knn_params = {
    #The number of neighbors where determined by the tests below
    'n_neighbors': [1, 2, 3, 4, 5],          #number of neighbors
    'weights': ['uniform', 'distance'],# weights for the neighbors
    'metric': ['euclidean', 'manhattan', 'minkowski']  # distance metric to use
}

knn_model = GridSearchCV(KNeighborsClassifier(), knn_params, cv=5, scoring='accuracy')
knn_model.fit(X_train_pca, y_train_encoded)
print("Best KNN params:", knn_model.best_params_)

Best KNN params: {'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}


In [None]:
#Train the best Knn model with the best parameters
knn_model = KNeighborsClassifier(n_neighbors=1, metric='manhattan', weights='uniform')
knn_model.fit(X_train_pca, y_train_encoded)

0,1,2
,n_neighbors,1
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'manhattan'
,metric_params,
,n_jobs,


In [None]:
#Show the results of the model
y_pred = knn_model.predict(X_test_pca)
#Conver the predicted labels into strings
y_pred_labels = le.inverse_transform(y_pred)

print("Accuracy:", accuracy_score(y_test, y_pred_labels))
print(classification_report(y_test, y_pred_labels))

Accuracy: 0.7371541501976284
              precision    recall  f1-score   support

        away       0.75      0.77      0.76       686
sitting down       0.65      0.71      0.68       208
  sitting up       0.56      0.69      0.62       204
    spinning       0.84      0.85      0.85       473
     towards       0.74      0.61      0.67       453

    accuracy                           0.74      2024
   macro avg       0.71      0.72      0.71      2024
weighted avg       0.74      0.74      0.74      2024



In [None]:
#Testing a random range of neighbors for the KNN model
n_neighbors = [3, 5, 10, 15, 20, 25, 50, 100]

for n in n_neighbors:
    knn_model = KNeighborsClassifier(n_neighbors=n)
    knn_model.fit(X_train_pca, y_train)
    y_pred = knn_model.predict(X_test_pca)
    print(n)
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print(classification_report(y_test, y_pred))

#The result shows that over 10 neighbors the accuracy starts to drop

3
Accuracy: 0.7233201581027668
              precision    recall  f1-score   support

        away       0.69      0.84      0.76       686
sitting down       0.61      0.69      0.65       208
  sitting up       0.58      0.55      0.56       204
    spinning       0.88      0.77      0.82       473
     towards       0.78      0.59      0.68       453

    accuracy                           0.72      2024
   macro avg       0.71      0.69      0.69      2024
weighted avg       0.73      0.72      0.72      2024

5
Accuracy: 0.716897233201581
              precision    recall  f1-score   support

        away       0.70      0.83      0.76       686
sitting down       0.61      0.74      0.67       208
  sitting up       0.56      0.58      0.57       204
    spinning       0.84      0.76      0.80       473
     towards       0.80      0.55      0.65       453

    accuracy                           0.72      2024
   macro avg       0.70      0.69      0.69      2024
weighted avg    

In [None]:
#Another test for the number of neighbors, but for the better range
n_neighbors = [1,2,3,4,5,6,7,8,9,10]

for n in n_neighbors:
    knn_model = KNeighborsClassifier(n_neighbors=n)
    knn_model.fit(X_train_pca, y_train)
    y_pred = knn_model.predict(X_test_pca)
    print(n)
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print(classification_report(y_test, y_pred))

#The result shows the best is between the best 4 or 5

1
Accuracy: 0.7440711462450593
              precision    recall  f1-score   support

        away       0.78      0.79      0.78       686
sitting down       0.63      0.68      0.66       208
  sitting up       0.55      0.60      0.58       204
    spinning       0.83      0.83      0.83       473
     towards       0.75      0.68      0.71       453

    accuracy                           0.74      2024
   macro avg       0.71      0.72      0.71      2024
weighted avg       0.75      0.74      0.74      2024

2
Accuracy: 0.7035573122529645
              precision    recall  f1-score   support

        away       0.65      0.89      0.75       686
sitting down       0.61      0.75      0.67       208
  sitting up       0.54      0.52      0.53       204
    spinning       0.87      0.73      0.80       473
     towards       0.89      0.45      0.60       453

    accuracy                           0.70      2024
   macro avg       0.71      0.67      0.67      2024
weighted avg   

In [None]:
# Save the trained models, scaler, pca and label encoder
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(pca, 'pca.pkl')
joblib.dump(clf, 'pose_random.pkl')
joblib.dump(svm_model, 'pose_svm.pkl')
joblib.dump(knn_model, 'pose_knn.pkl')
joblib.dump(le, 'label_encoder.pkl')

['label_encoder.pkl']