#Fetch Data From Google Drive

In [None]:
# No longer necessary following location port
#from google.colab import drive
#drive.mount('/content/drive')

Mounted at /content/drive


# Import Necessary Libraries

In [1]:
import glob
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
import re
import warnings
warnings.filterwarnings('ignore')

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold, GridSearchCV, RandomizedSearchCV, train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import StandardScaler

#Load CSVs

In [None]:
import os

# Get the data folder  
notebook_dir = os.path.dirname(os.path.abspath("__file__"))
data_dir = os.path.join(notebook_dir, "..", "Dataset") 

# Normalize the path
data_dir = os.path.abspath(data_dir)

W100_025_df = pd.read_csv(os.path.join(data_dir, "W100_O25_Features.csv"))
W200_025_df = pd.read_csv(os.path.join(data_dir, "W200_O25_Features.csv"))
W300_025_df = pd.read_csv(os.path.join(data_dir, "W300_O25_Features.csv"))
W400_025_df = pd.read_csv(os.path.join(data_dir, "W400_O25_Features.csv"))
W500_025_df = pd.read_csv(os.path.join(data_dir, "W500_O25_Features.csv"))

W100_050_df = pd.read_csv(os.path.join(data_dir, "W100_O50_Features.csv"))
W200_050_df = pd.read_csv(os.path.join(data_dir, "W200_O50_Features.csv"))
W300_050_df = pd.read_csv(os.path.join(data_dir, "W300_O50_Features.csv"))
W400_050_df = pd.read_csv(os.path.join(data_dir, "W400_O50_Features.csv"))
W500_050_df = pd.read_csv(os.path.join(data_dir, "W500_O50_Features.csv"))

# Logistic Regression (W100_025)

Logistic Regression with GridSearch, RandomSearch, and Stratified KFold

In [3]:
# Window sizes and overlaps
window_sizes = [100, 200, 300, 400, 500]
overlaps = [0.25, 0.5]

# Logistic Regression Implementation

target_column = W100_025_df.columns[-1]
X = W100_025_df.drop(columns=[target_column])
y = W100_025_df[target_column]

# Train-test split (80-20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features (standardize)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Stratified K-Fold Cross-Validation
skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

# Define the Logistic Regression model
lr = LogisticRegression(max_iter=300, multi_class='multinomial', solver='lbfgs')

# Grid Search for hyperparameter tuning
param_grid = {
    'C': [0.1, 1, 10],
    'penalty': ['l2'],  # 'l1' not supported by 'lbfgs' solver
    'solver': ['lbfgs', 'saga'],
    'max_iter': [100, 200, 300]
}
grid_search = GridSearchCV(
    lr, param_grid, cv=skf, scoring='accuracy', verbose=1
)

# Fit the model using GridSearchCV
grid_search.fit(X_train, y_train)

# Best model from grid search
best_model = grid_search.best_estimator_

# Predictions and evaluation on the test set
y_pred = best_model.predict(X_test)

# Classification Report
print("GridSearch Classification Report:")
print(classification_report(y_test, y_pred))

# Additional metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

# Randomized Search for hyperparameter tuning
random_search = RandomizedSearchCV(
    lr, param_distributions=param_grid, n_iter=10, cv=skf, scoring='accuracy', random_state=42, verbose=1
)
random_search.fit(X_train, y_train)

# Best model from Randomized Search
best_random_model = random_search.best_estimator_

# Predictions and evaluation on the test set
y_pred_random = best_random_model.predict(X_test)

# Classification Report for Randomized Search
print("\nRandomizedSearch Classification Report:")
print(classification_report(y_test, y_pred_random))

# Additional metrics for Randomized Search model
accuracy_random = accuracy_score(y_test, y_pred_random)
precision_random = precision_score(y_test, y_pred_random, average='weighted')
recall_random = recall_score(y_test, y_pred_random, average='weighted')
f1_random = f1_score(y_test, y_pred_random, average='weighted')

print(f"Accuracy (Randomized Search): {accuracy_random}")
print(f"Precision (Randomized Search): {precision_random}")
print(f"Recall (Randomized Search): {recall_random}")
print(f"F1 Score (Randomized Search): {f1_random}")

Fitting 3 folds for each of 18 candidates, totalling 54 fits
GridSearch Classification Report:
                                   precision    recall  f1-score   support

          Browsing Using Computer       0.97      0.98      0.98       509
      Moving Around While Sitting       0.95      0.94      0.95       544
     Moving Around While Standing       0.97      0.93      0.95       580
       Moving Chair While Sitting       0.96      0.93      0.94       598
  Moving Head, Body While Sitting       0.96      0.94      0.95       541
   Picking Up Items While Sitting       0.93      0.97      0.95       455
  Picking Up Items While Standing       0.96      0.96      0.96       423
                          Running       0.98      0.94      0.96       388
       Sitting and Reading a Book       0.99      1.00      1.00       602
Sitting and Writing in a Notebook       0.96      0.99      0.97       526
            Stand up From Sitting       0.92      0.95      0.94       491
    

#Decision Trees (W100_O50)

Decision Tree with Stratified K-Fold Cross-Validation and GridSearch + RandomSearch Tuning

In [7]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold, GridSearchCV, RandomizedSearchCV, train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import StandardScaler


target_column = W100_050_df.columns[-1]
X = W100_050_df.drop(columns=[target_column])
y = W100_050_df[target_column]

# Train-test split (80-20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features (standardize)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Stratified K-Fold Cross-Validation
skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

# Define the Decision Tree model
dt = DecisionTreeClassifier(random_state=42)

# Grid Search for hyperparameter tuning
param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': [None, 'sqrt', 'log2']
}
grid_search = GridSearchCV(
    dt, param_grid, cv=skf, scoring='accuracy', verbose=1
)

# Fit the model using GridSearchCV
grid_search.fit(X_train, y_train)

# Best model from grid search
best_model = grid_search.best_estimator_

# Predictions and evaluation on the test set
y_pred = best_model.predict(X_test)

# Classification Report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Additional metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

# Randomized Search for hyperparameter tuning
random_search = RandomizedSearchCV(
    dt, param_distributions=param_grid, n_iter=10, cv=skf, scoring='accuracy', n_jobs=-1, random_state=42, verbose=1
)
random_search.fit(X_train, y_train)

# Best model from Randomized Search
best_random_model = random_search.best_estimator_

# Predictions and evaluation on the test set
y_pred_random = best_random_model.predict(X_test)

# Classification Report for Randomized Search
print("\nRandomized Search Classification Report:")
print(classification_report(y_test, y_pred_random))

# Additional metrics for Randomized Search model
accuracy_random = accuracy_score(y_test, y_pred_random)
precision_random = precision_score(y_test, y_pred_random, average='weighted')
recall_random = recall_score(y_test, y_pred_random, average='weighted')
f1_random = f1_score(y_test, y_pred_random, average='weighted')

print(f"Accuracy (Randomized Search): {accuracy_random}")
print(f"Precision (Randomized Search): {precision_random}")
print(f"Recall (Randomized Search): {recall_random}")
print(f"F1 Score (Randomized Search): {f1_random}")

Fitting 3 folds for each of 216 candidates, totalling 648 fits
Classification Report:
                                   precision    recall  f1-score   support

          Browsing Using Computer       1.00      1.00      1.00       817
      Moving Around While Sitting       1.00      1.00      1.00       856
     Moving Around While Standing       1.00      1.00      1.00       845
       Moving Chair While Sitting       1.00      1.00      1.00       847
  Moving Head, Body While Sitting       1.00      1.00      1.00       857
   Picking Up Items While Sitting       1.00      1.00      1.00       686
  Picking Up Items While Standing       1.00      1.00      1.00       631
                          Running       1.00      1.00      1.00       609
       Sitting and Reading a Book       1.00      1.00      1.00       883
Sitting and Writing in a Notebook       1.00      1.00      1.00       786
            Stand up From Sitting       1.00      1.00      1.00       773
             

# Random Forest (W200_O25)

Random Forest with K-Fold Cross-Validation and GridSearch + RandomSearch Tuning


In [10]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold, RandomizedSearchCV, train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import StandardScaler
from scipy.stats import randint, uniform

target_column = W200_025_df.columns[-1]
X = W200_025_df.drop(columns=[target_column])
y = W200_025_df[target_column]

# Train-test split (80-20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features (standardize)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# K-Fold Cross-Validation
kf = KFold(n_splits=3, shuffle=True, random_state=42)

# Define the Random Forest model
rf = RandomForestClassifier(random_state=42)

# Reduced Hyperparameter Space for RandomizedSearchCV
param_distributions = {
    'n_estimators': randint(50, 200),  # Reduced range for faster training
    'max_depth': [None, 10, 20, 30],  # Focus on reasonable depth values
    'min_samples_split': randint(2, 10),  # Control tree growth
    'min_samples_leaf': randint(1, 5),  # Avoid extremely small leaf sizes
    'bootstrap': [True]  # Fix bootstrap to simplify search
}

# Hyperparameter tuning with RandomizedSearchCV
random_search = RandomizedSearchCV(
    rf,
    param_distributions=param_distributions,
    n_iter=10,  # Reduce the number of iterations for faster runtime
    cv=kf,
    scoring='accuracy',
    n_jobs=-1,
    random_state=42,
    verbose=1
)

# Fit the model using RandomizedSearchCV
random_search.fit(X_train, y_train)

# Best model from RandomizedSearchCV
best_rf_model = random_search.best_estimator_
print(f"Best Parameters: {random_search.best_params_}")

# Predictions and evaluation on the test set
y_pred = best_rf_model.predict(X_test)

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Additional metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Fitting 3 folds for each of 10 candidates, totalling 30 fits
Best Parameters: {'bootstrap': True, 'max_depth': 20, 'min_samples_leaf': 2, 'min_samples_split': 4, 'n_estimators': 124}

Classification Report:
                                   precision    recall  f1-score   support

          Browsing Using Computer       1.00      0.99      1.00       287
      Moving Around While Sitting       0.97      0.90      0.93       267
     Moving Around While Standing       0.96      0.94      0.95       270
       Moving Chair While Sitting       0.99      0.99      0.99       334
  Moving Head, Body While Sitting       0.99      1.00      0.99       279
   Picking Up Items While Sitting       0.96      0.98      0.97       259
  Picking Up Items While Standing       0.97      0.97      0.97       214
                          Running       0.99      0.99      0.99       174
       Sitting and Reading a Book       1.00      1.00      1.00       269
Sitting and Writing in a Notebook       1.

# Gaussian Naive Bayes (W200_O50)

Gaussian Naive Bayes with K-Fold Cross-Validation

In [11]:
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import KFold, cross_val_score, train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import StandardScaler

target_column = W200_050_df.columns[-1]
X = W200_050_df.drop(columns=[target_column])
y = W200_050_df[target_column]

# Train-test split (80-20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features (standardize)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# K-Fold Cross-Validation
kf = KFold(n_splits=3, shuffle=True, random_state=42)

# Initialize Gaussian Naive Bayes
gnb = GaussianNB()

# Evaluate the model using K-Fold Cross-Validation
cv_scores = cross_val_score(gnb, X_train, y_train, cv=kf, scoring='accuracy')

print("K-Fold Cross-Validation Results:")
print(f"Mean Accuracy: {cv_scores.mean():.4f}")
print(f"Standard Deviation: {cv_scores.std():.4f}")

# Fit the model on the entire training data
gnb.fit(X_train, y_train)

# Predictions and evaluation on the test set
y_pred = gnb.predict(X_test)

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Additional metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

K-Fold Cross-Validation Results:
Mean Accuracy: 0.7568
Standard Deviation: 0.0088

Classification Report:
                                   precision    recall  f1-score   support

          Browsing Using Computer       0.65      0.91      0.76       430
      Moving Around While Sitting       0.72      0.41      0.52       413
     Moving Around While Standing       0.71      0.35      0.47       397
       Moving Chair While Sitting       0.79      0.62      0.70       467
  Moving Head, Body While Sitting       0.68      0.55      0.61       433
   Picking Up Items While Sitting       0.59      0.89      0.71       342
  Picking Up Items While Standing       0.84      0.82      0.83       305
                          Running       0.93      0.97      0.95       261
       Sitting and Reading a Book       0.75      0.96      0.84       388
Sitting and Writing in a Notebook       0.83      0.64      0.72       406
            Stand up From Sitting       0.74      0.82      0.78    

# SVM (Support Vector Machines) (W300_O25)

Support Vector Machines with Stratified K-Fold and RandomizedSearchCV

In [12]:
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold, RandomizedSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
import numpy as np

# Assuming the last column is the target variable (activity type)
target_column = W300_025_df.columns[-1]
X = W300_025_df.drop(columns=[target_column])
y = W300_025_df[target_column]

# Train-test split (80-20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Stratified K-Fold Cross-Validation
skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

# Hyperparameter tuning using RandomizedSearchCV
param_dist = {
    'C': np.logspace(-3, 3, 10),
    'kernel': ['linear', 'rbf', 'poly'],
    'degree': [2, 3, 4],
    'gamma': ['scale', 'auto'],
    'class_weight': [None, 'balanced']
}

svm = SVC()

random_search = RandomizedSearchCV(
    svm,
    param_distributions=param_dist,
    n_iter=50,  # Number of parameter settings sampled
    cv=skf,
    n_jobs=-1,
    scoring='accuracy',
    verbose=1,
    random_state=42
)

# Perform Randomized Search
random_search.fit(X_train, y_train)

# Best model and parameters
best_model = random_search.best_estimator_
print(f"Best Parameters: {random_search.best_params_}")

# Evaluate on the test set
y_pred = best_model.predict(X_test)

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Additional metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Fitting 3 folds for each of 50 candidates, totalling 150 fits
Best Parameters: {'kernel': 'linear', 'gamma': 'scale', 'degree': 4, 'class_weight': None, 'C': 215.44346900318823}

Classification Report:
                                   precision    recall  f1-score   support

          Browsing Using Computer       0.99      1.00      1.00       197
      Moving Around While Sitting       0.99      1.00      1.00       192
     Moving Around While Standing       1.00      0.99      1.00       193
       Moving Chair While Sitting       0.99      1.00      1.00       185
  Moving Head, Body While Sitting       1.00      0.99      0.99       176
   Picking Up Items While Sitting       1.00      1.00      1.00       150
  Picking Up Items While Standing       1.00      0.99      1.00       140
                          Running       0.99      1.00      1.00       123
       Sitting and Reading a Book       1.00      1.00      1.00       193
Sitting and Writing in a Notebook       1.00   

# KNN (K-Nearest Neighbors) (W300_O50)

K-Nearest Neighbors with Stratified K-Fold and GridSearchCV

In [13]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import StratifiedKFold, GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

target_column = W300_050_df.columns[-1]
X = W300_050_df.drop(columns=[target_column])
y = W300_050_df[target_column]

# Train-test split (80-20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Stratified K-Fold Cross-Validation
skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

# Hyperparameter tuning using GridSearchCV
param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11],  # Number of neighbors
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan', 'minkowski']
}

knn = KNeighborsClassifier()

grid_search = GridSearchCV(
    knn,
    param_grid=param_grid,
    cv=skf,
    scoring='accuracy',
    verbose=1
)

# Perform Grid Search
grid_search.fit(X_train, y_train)

# Best model and parameters
best_model = grid_search.best_estimator_
print(f"Best Parameters: {grid_search.best_params_}")

# Evaluate on the test set
y_pred = best_model.predict(X_test)

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Additional metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Fitting 3 folds for each of 30 candidates, totalling 90 fits
Best Parameters: {'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'distance'}

Classification Report:
                                   precision    recall  f1-score   support

          Browsing Using Computer       0.91      0.94      0.92       279
      Moving Around While Sitting       0.76      0.75      0.75       298
     Moving Around While Standing       0.74      0.65      0.69       277
       Moving Chair While Sitting       0.79      0.80      0.79       311
  Moving Head, Body While Sitting       0.84      0.83      0.84       282
   Picking Up Items While Sitting       0.84      0.82      0.83       230
  Picking Up Items While Standing       0.82      0.83      0.83       197
                          Running       0.96      0.98      0.97       177
       Sitting and Reading a Book       0.95      0.94      0.95       266
Sitting and Writing in a Notebook       0.96      0.94      0.95       262
       

# AdaBoost (W400_O25)

AdaBoost with K-Fold and GridSearchCV

In [17]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import KFold, GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import StandardScaler

# Assuming the last column is the target variable (activity type)
target_column = W200_025_df.columns[-1]
X = W200_025_df.drop(columns=[target_column])
y = W200_025_df[target_column]

# Train-test split (80-20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features (standardize)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# K-Fold Cross-Validation
kf = KFold(n_splits=3, shuffle=True, random_state=42)

# Defining the AdaBoost model
adaboost = AdaBoostClassifier(n_estimators=100, learning_rate=1.0, random_state=42)

# Hyperparameter tuning with GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 200],  # Number of boosting rounds
    'learning_rate': [0.01, 0.1, 1]  # Learning rate for boosting
}
grid_search = GridSearchCV(
    adaboost, param_grid, cv=kf, scoring='accuracy', verbose=1, n_jobs=-1
)

# Fit the model using GridSearchCV
grid_search.fit(X_train, y_train)

# Best model from GridSearchCV
best_adaboost_model = grid_search.best_estimator_

# Predictions and evaluation on the test set
y_pred = best_adaboost_model.predict(X_test)

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Additional metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Fitting 3 folds for each of 9 candidates, totalling 27 fits

Classification Report:
                                   precision    recall  f1-score   support

          Browsing Using Computer       0.00      0.00      0.00       287
      Moving Around While Sitting       1.00      1.00      1.00       267
     Moving Around While Standing       0.23      1.00      0.38       270
       Moving Chair While Sitting       0.55      1.00      0.71       334
  Moving Head, Body While Sitting       0.25      1.00      0.40       279
   Picking Up Items While Sitting       0.00      0.00      0.00       259
  Picking Up Items While Standing       0.00      0.00      0.00       214
                          Running       0.00      0.00      0.00       174
       Sitting and Reading a Book       1.00      1.00      1.00       269
Sitting and Writing in a Notebook       0.00      0.00      0.00       262
            Stand up From Sitting       0.00      0.00      0.00       277
               

# Gradient Boost (W400_O50)

Gradient Boosting with K-Fold and RandomizedSearchCV (GridSearch execution time was taking over 35 minutes, so I tried to reduce it by lowering splits and iterations)

In [29]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import KFold, RandomizedSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
from scipy.stats import uniform, randint

# Assuming the last column is the target variable (activity type)
target_column = W400_050_df.columns[-1]
X = W400_050_df.drop(columns=[target_column])
y = W400_050_df[target_column]

# Train-test split (80-20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# K-Fold Cross-Validation
kf = KFold(n_splits=2, shuffle=True, random_state=42)

# Define the GradientBoostingClassifier
gb_clf = GradientBoostingClassifier(random_state=42)

# Hyperparameter distribution for RandomizedSearchCV
param_dist = {
    'n_estimators': randint(50, 200),  # Randomly select between 50 and 200
    'learning_rate': uniform(0.01, 0.2),  # Random float between 0.01 and 0.2
    'max_depth': randint(2, 5),  # Randomly choose depth between 2 and 4
    'subsample': uniform(0.7, 0.3),  # Random fraction between 0.7 and 1.0
    'min_samples_split': randint(2, 10)  # Randomly choose from 2 to 10
}

# Perform RandomizedSearchCV instead of GridSearchCV
random_search = RandomizedSearchCV(
    gb_clf,
    param_distributions=param_dist,
    n_iter=10,
    cv=kf,
    scoring='accuracy',
    verbose=1,
    n_jobs=-1,
    random_state=42
)

# Perform Randomized Search
random_search.fit(X_train, y_train)

# Best model and parameters
best_model = random_search.best_estimator_
print(f"Best Parameters: {random_search.best_params_}")

# Evaluate on the test set
y_pred = best_model.predict(X_test)

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Additional metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


Fitting 2 folds for each of 10 candidates, totalling 20 fits
Best Parameters: {'learning_rate': 0.0849080237694725, 'max_depth': 2, 'min_samples_split': 8, 'n_estimators': 156, 'subsample': 0.9339073000818308}

Classification Report:
                                   precision    recall  f1-score   support

          Browsing Using Computer       1.00      1.00      1.00       211
      Moving Around While Sitting       1.00      1.00      1.00       235
     Moving Around While Standing       1.00      0.99      1.00       207
       Moving Chair While Sitting       1.00      1.00      1.00       207
  Moving Head, Body While Sitting       1.00      1.00      1.00       223
   Picking Up Items While Sitting       0.99      1.00      1.00       164
  Picking Up Items While Standing       1.00      1.00      1.00       161
                          Running       1.00      1.00      1.00       126
       Sitting and Reading a Book       1.00      1.00      1.00       227
Sitting and Wri

# XGBoost (W500_O25)

XGBoost with KFold and RandomizedSearchCV

In [24]:
from xgboost import XGBClassifier
from sklearn.model_selection import KFold, RandomizedSearchCV, train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import StandardScaler
from scipy.stats import randint, uniform
from sklearn.preprocessing import LabelEncoder

target_column = W500_025_df.columns[-1]
X = W500_025_df.drop(columns=[target_column])
y = W500_025_df[target_column]

# Encode activity labels to integers
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Train-test split (80-20)
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Scale features (standardize)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# K-Fold Cross-Validation
kf = KFold(n_splits=3, shuffle=True, random_state=42)

# Define the XGBoost model
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)

# Reduced Hyperparameter Space for RandomizedSearchCV
param_distributions = {
    'n_estimators': randint(50, 200),  # Number of boosting rounds
    'max_depth': randint(3, 10),  # Depth of trees
    'learning_rate': uniform(0.01, 0.3),  # Step size shrinkage
    'subsample': uniform(0.6, 0.4),  # Fraction of samples for boosting
    'colsample_bytree': uniform(0.6, 0.4),  # Fraction of features for boosting
    'gamma': uniform(0, 5),  # Minimum loss reduction
    'reg_alpha': uniform(0, 1),  # L1 regularization term
    'reg_lambda': uniform(1, 5)  # L2 regularization term
}

# Hyperparameter tuning with RandomizedSearchCV
random_search = RandomizedSearchCV(
    xgb,
    param_distributions=param_distributions,
    n_iter=10,  # Reduce the number of iterations for faster runtime
    cv=kf,
    scoring='accuracy',
    n_jobs=-1,
    random_state=42,
    verbose=1
)

# Fit the model using RandomizedSearchCV
random_search.fit(X_train, y_train)

# Best model from RandomizedSearchCV
best_xgb_model = random_search.best_estimator_
print(f"Best Parameters: {random_search.best_params_}")

# Predictions and evaluation on the test set
y_pred = best_xgb_model.predict(X_test)

# Inverse transform to get original activity names
y_pred_original = label_encoder.inverse_transform(y_pred)

# Inverse transform y_test to get the original activity names
y_test_original = label_encoder.inverse_transform(y_test)

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test_original, y_pred_original))

# Additional metrics
accuracy = accuracy_score(y_test_original, y_pred_original)
precision = precision_score(y_test_original, y_pred_original, average='weighted')
recall = recall_score(y_test_original, y_pred_original, average='weighted')
f1 = f1_score(y_test_original, y_pred_original, average='weighted')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Fitting 3 folds for each of 10 candidates, totalling 30 fits
Best Parameters: {'colsample_bytree': 0.6727299868828402, 'gamma': 0.9170225492671691, 'learning_rate': 0.1012726728878613, 'max_depth': 8, 'n_estimators': 138, 'reg_alpha': 0.2912291401980419, 'reg_lambda': 4.059264473611897, 'subsample': 0.6557975442608167}

Classification Report:
                                   precision    recall  f1-score   support

          Browsing Using Computer       1.00      1.00      1.00       106
      Moving Around While Sitting       1.00      1.00      1.00       115
     Moving Around While Standing       1.00      1.00      1.00       115
       Moving Chair While Sitting       1.00      0.99      1.00       122
  Moving Head, Body While Sitting       1.00      1.00      1.00       109
   Picking Up Items While Sitting       1.00      1.00      1.00        79
  Picking Up Items While Standing       0.97      1.00      0.99        69
                          Running       1.00      1.00

#ANN (Artificial Neural Networks) (W500_O50)

ANN with Stratified K-Fold and RandomizedSearchCV (Similar issue with runtime, suggests my implementation is very computationally expensive)

In [27]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import StratifiedKFold, RandomizedSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
from scipy.stats import loguniform

# Assuming the last column is the target variable (activity type)
target_column = W500_050_df.columns[-1]
X = W500_050_df.drop(columns=[target_column])
y = W500_050_df[target_column]

# Train-test split (80-20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Stratified K-Fold Cross-Validation
skf = StratifiedKFold(n_splits=2, shuffle=True, random_state=42)

# Hyperparameter tuning using RandomizedSearchCV
param_distributions = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 100)],  # Number of neurons in hidden layers
    'activation': ['relu', 'tanh'],  # Activation functions
    'solver': ['adam', 'sgd'],  # Optimizers
    'alpha': loguniform(1e-5, 1e-2),  # Regularization strength
    'learning_rate': ['constant', 'adaptive'],  # Learning rate schedule
    'max_iter': [200, 300, 500]  # Maximum iterations
}

mlp_clf = MLPClassifier(random_state=42)

random_search = RandomizedSearchCV(
    mlp_clf,
    param_distributions=param_distributions,
    n_iter=25,  # Number of random samples
    cv=skf,
    scoring='accuracy',
    verbose=1,
    n_jobs=-1,
    random_state=42
)

# Perform Randomized Search
random_search.fit(X_train, y_train)

# Best model and parameters
best_model = random_search.best_estimator_
print(f"Best Parameters: {random_search.best_params_}")

# Evaluate on the test set
y_pred = best_model.predict(X_test)

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Additional metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Fitting 2 folds for each of 25 candidates, totalling 50 fits
Best Parameters: {'activation': 'tanh', 'alpha': 0.0015199034037054083, 'hidden_layer_sizes': (50, 50), 'learning_rate': 'adaptive', 'max_iter': 300, 'solver': 'adam'}

Classification Report:
                                   precision    recall  f1-score   support

          Browsing Using Computer       1.00      1.00      1.00       169
      Moving Around While Sitting       1.00      0.97      0.99       155
     Moving Around While Standing       0.99      1.00      0.99       173
       Moving Chair While Sitting       0.99      0.99      0.99       186
  Moving Head, Body While Sitting       1.00      0.99      1.00       186
   Picking Up Items While Sitting       0.99      1.00      1.00       121
  Picking Up Items While Standing       0.99      0.99      0.99       126
                          Running       0.99      0.98      0.99       106
       Sitting and Reading a Book       1.00      0.98      0.99       