In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.naive_bayes import GaussianNB
import seaborn as sns
sns.set()

In [2]:
#import data

data = pd.read_csv('processed_data/fe_data.csv')
data_leg = pd.read_csv('processed_data/fe_legsensor_data.csv')
data_arm = pd.read_csv('processed_data/fe_armsensor_data.csv')

In [3]:
#drop 'index col'
data = data.iloc[:,1:]
data_leg = data_leg.iloc[:,1:]
data_arm = data_arm.iloc[:,1:]

# 1. Activity Detection - Machine Learning Approach

In [4]:
data.info()

TypeError: Cannot interpret '<attribute 'dtype' of 'numpy.generic' objects>' as a data type

In [5]:
#whole data
X_data = data.drop('activity', axis=1)
y_data = data['activity']

#leg sensor data
X_data_leg = data_leg.drop('activity', axis=1)
y_data_leg = data_leg['activity']

#arm sensor data
X_data_arm = data_arm.drop('activity', axis=1)
y_data_arm = data_arm['activity']


In [51]:
#X_data['sex']

In [6]:
#encode sensor postion and sex data
sex_encoder = LabelEncoder()
sex_encoder.fit(X_data['sex'])
X_data['sex'] =  sex_encoder.transform(X_data['sex'])
#X_data['sex']
#male:1, female:0
X_data_leg['sex'] = sex_encoder.transform(X_data_leg['sex'])
X_data_arm['sex'] = sex_encoder.transform(X_data_arm['sex'])

In [53]:
#X_data['sensor_pos']

In [7]:
pos_encoder = LabelEncoder()
pos_encoder.fit(X_data['sensor_pos'])
X_data['sensor_pos'] = pos_encoder.transform(X_data['sensor_pos'])
#X_data['sensor_pos']
#leg: 1, arm: 0
X_data_leg['sensor_pos'] = pos_encoder.transform(X_data_leg['sensor_pos'])
X_data_arm['sensor_pos'] = pos_encoder.transform(X_data_arm['sensor_pos'])

In [8]:
#Train - Test Split
X_train, X_val, y_train, y_val = train_test_split(X_data, y_data, test_size= 0.2, random_state= 24)
X_train_leg, X_val_leg, y_train_leg, y_val_leg = train_test_split(X_data_leg, y_data_leg, test_size= 0.2, random_state= 24)
X_train_arm, X_val_arm, y_train_arm, y_val_arm = train_test_split(X_data_arm, y_data_arm, test_size= 0.2, random_state= 24)

In [9]:
#standardizing into scaler

scaler = StandardScaler()
scaler.fit(X_train)
X_train_sd = scaler.transform(X_train)
X_val_sd = scaler.transform(X_val)

X_train_leg_sd = scaler.transform(X_train_leg)
X_val_leg_sd = scaler.transform(X_val_leg)

X_train_arm_sd = scaler.transform(X_train_arm)
X_val_arm_sd = scaler.transform(X_val_arm)

# Logistic Regression

In [10]:
#Logistical Regression

lr_model = LogisticRegression(random_state=21)
lr_model.fit(X_train_sd,y_train)

lr_model_leg = LogisticRegression(random_state=21)
lr_model_leg.fit(X_train_leg_sd,y_train_leg)

lr_model_arm = LogisticRegression(random_state=21)
lr_model_arm.fit(X_train_arm_sd,y_train_arm)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

LogisticRegression(random_state=21)

 - LR on whole FE dataset

In [22]:
#Train Accuracy
y_pred = lr_model.predict(X_train_sd)
print("LR Training Results: ")
print("accuracy:", accuracy_score(y_train, y_pred))
print(classification_report(y_train, y_pred))

LR Training Results: 
accuracy: 0.9692174290049859
              precision    recall  f1-score   support

         jog       0.93      0.94      0.94      1151
         run       0.94      0.93      0.94      1140
       stand       1.00      1.00      1.00      1172
        walk       1.00      1.00      1.00      1150

    accuracy                           0.97      4613
   macro avg       0.97      0.97      0.97      4613
weighted avg       0.97      0.97      0.97      4613



In [24]:
#Validation
y_pred = lr_model.predict(X_val_sd)
print("LR Validation Results: ")
print("accuracy:", accuracy_score(y_val, y_pred))
print(classification_report(y_val, y_pred))

LR Validation Results: 
accuracy: 0.9662045060658578
              precision    recall  f1-score   support

         jog       0.92      0.95      0.93       277
         run       0.95      0.92      0.93       288
       stand       1.00      1.00      1.00       311
        walk       1.00      1.00      1.00       278

    accuracy                           0.97      1154
   macro avg       0.97      0.97      0.97      1154
weighted avg       0.97      0.97      0.97      1154



- LR on Leg Sensor FE dataset

In [26]:
y_pred_leg = lr_model_leg.predict(X_train_leg_sd)
print("LR Training Results on Leg Sensor: ")
print("accuracy:", accuracy_score(y_train_leg, y_pred_leg))
print(classification_report(y_train_leg, y_pred_leg))

LR Training Results on Leg Sensor: 
accuracy: 0.9846760070052539
              precision    recall  f1-score   support

         jog       0.97      0.97      0.97       576
         run       0.97      0.97      0.97       563
       stand       1.00      1.00      1.00       567
        walk       1.00      1.00      1.00       578

    accuracy                           0.98      2284
   macro avg       0.98      0.98      0.98      2284
weighted avg       0.98      0.98      0.98      2284



In [27]:
y_pred_leg = lr_model_leg.predict(X_val_leg_sd)
print("LR Validation Results on Leg Sensor: ")
print("accuracy:", accuracy_score(y_val_leg, y_pred_leg))
print(classification_report(y_val_leg, y_pred_leg))

LR Validation Results on Leg Sensor: 
accuracy: 0.9842657342657343
              precision    recall  f1-score   support

         jog       0.96      0.97      0.97       138
         run       0.99      0.97      0.98       151
       stand       1.00      1.00      1.00       147
        walk       0.99      1.00      0.99       136

    accuracy                           0.98       572
   macro avg       0.98      0.98      0.98       572
weighted avg       0.98      0.98      0.98       572



- LR on Arm Sensor FE dataset

In [28]:
y_pred_arm = lr_model_arm.predict(X_train_arm_sd)
print("LR Training Results on Arm Sensor: ")
print("accuracy:", accuracy_score(y_train_arm, y_pred_arm))
print(classification_report(y_train_arm, y_pred_arm))

LR Training Results on Arm Sensor: 
accuracy: 0.9845360824742269
              precision    recall  f1-score   support

         jog       0.96      0.97      0.97       562
         run       0.97      0.96      0.97       563
       stand       1.00      1.00      1.00       611
        walk       1.00      1.00      1.00       592

    accuracy                           0.98      2328
   macro avg       0.98      0.98      0.98      2328
weighted avg       0.98      0.98      0.98      2328



In [29]:
y_pred_arm = lr_model_arm.predict(X_val_arm_sd)
print("LR Validation Results on Arm Sensor: ")
print("accuracy:", accuracy_score(y_val_arm, y_pred_arm))
print(classification_report(y_val_arm, y_pred_arm))

LR Validation Results on Arm Sensor: 
accuracy: 0.9725557461406518
              precision    recall  f1-score   support

         jog       0.95      0.95      0.95       152
         run       0.95      0.95      0.95       151
       stand       1.00      1.00      1.00       158
        walk       0.98      1.00      0.99       122

    accuracy                           0.97       583
   macro avg       0.97      0.97      0.97       583
weighted avg       0.97      0.97      0.97       583



# Random Forests

In [14]:
#Random Forests

clf_model = RandomForestClassifier()
clf_model.fit(X_train_sd,y_train)

clf_model_leg = RandomForestClassifier()
clf_model_leg.fit(X_train_leg_sd,y_train_leg)

clf_model_arm = RandomForestClassifier()
clf_model_arm.fit(X_train_arm_sd,y_train_arm)

RandomForestClassifier()

- LR on whole FE dataset

In [15]:
y_pred = clf_model.predict(X_val_sd)
print(classification_report(y_val, y_pred))

              precision    recall  f1-score   support

         jog       0.97      0.99      0.98       277
         run       0.99      0.98      0.98       288
       stand       1.00      1.00      1.00       311
        walk       1.00      1.00      1.00       278

    accuracy                           0.99      1154
   macro avg       0.99      0.99      0.99      1154
weighted avg       0.99      0.99      0.99      1154



- LR on Leg Sensor FE dataset

In [16]:
y_pred_leg = clf_model_leg.predict(X_val_leg_sd)
print(classification_report(y_val_leg, y_pred_leg))

              precision    recall  f1-score   support

         jog       0.99      0.99      0.99       138
         run       0.99      0.99      0.99       151
       stand       1.00      1.00      1.00       147
        walk       1.00      1.00      1.00       136

    accuracy                           0.99       572
   macro avg       0.99      0.99      0.99       572
weighted avg       0.99      0.99      0.99       572



- LR on Arm Sensor FE dataset

In [17]:
y_pred_arm = clf_model_arm.predict(X_val_arm_sd)
print(classification_report(y_val_arm, y_pred_arm))

              precision    recall  f1-score   support

         jog       0.97      0.93      0.95       152
         run       0.94      0.97      0.95       151
       stand       1.00      1.00      1.00       158
        walk       1.00      1.00      1.00       122

    accuracy                           0.97       583
   macro avg       0.98      0.98      0.98       583
weighted avg       0.97      0.97      0.97       583



# Naive Bayes Model

In [18]:
#Whole
gnb_model = GaussianNB()
gnb_model.fit(X_train_sd, y_train)

#Leg Sensor
gnb_model_leg = GaussianNB()
gnb_model_leg.fit(X_train_leg_sd, y_train_leg)

#Arm Sensor
gnb_model_arm = GaussianNB()
gnb_model_arm.fit(X_train_arm_sd, y_train_arm)

GaussianNB()

- Naive Bayes on whole FE data

In [19]:
y_pred = gnb_model.predict(X_val_sd)
print("General Naive Bayes score: ", gnb_model.score(X_val_sd,y_val))
print(classification_report(y_val, y_pred))

General Naive Bayes score:  0.8665511265164645
              precision    recall  f1-score   support

         jog       0.70      0.79      0.74       277
         run       0.80      0.75      0.77       288
       stand       1.00      0.99      1.00       311
        walk       0.98      0.92      0.95       278

    accuracy                           0.87      1154
   macro avg       0.87      0.86      0.87      1154
weighted avg       0.87      0.87      0.87      1154



- Naive Bayes on Leg Sensor FE data

In [20]:
y_pred_leg = gnb_model_leg.predict(X_val_leg_sd)
print("Leg Naive Bayes score: ", gnb_model_leg.score(X_val_leg_sd,y_val_leg))
print(classification_report(y_val_leg, y_pred_leg))

Leg Naive Bayes score:  0.9405594405594405
              precision    recall  f1-score   support

         jog       0.88      0.87      0.88       138
         run       0.92      0.91      0.92       151
       stand       1.00      1.00      1.00       147
        walk       0.96      0.98      0.97       136

    accuracy                           0.94       572
   macro avg       0.94      0.94      0.94       572
weighted avg       0.94      0.94      0.94       572



- Naive Bayes on Arm Sensor FE data

In [21]:
y_pred_arm = gnb_model_arm.predict(X_val_arm_sd)
print("Arm Naive Bayes score: ", gnb_model_arm.score(X_val_arm_sd,y_val_arm))
print(classification_report(y_val_arm, y_pred_arm))

Arm Naive Bayes score:  0.9073756432246999
              precision    recall  f1-score   support

         jog       0.81      0.84      0.82       152
         run       0.83      0.81      0.82       151
       stand       1.00      1.00      1.00       158
        walk       1.00      1.00      1.00       122

    accuracy                           0.91       583
   macro avg       0.91      0.91      0.91       583
weighted avg       0.91      0.91      0.91       583

