In [33]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import seaborn as sns
sns.set()

In [34]:
#import data

data = pd.read_csv('processed_data/fe_data.csv')
data_leg = pd.read_csv('processed_data/fe_legsensor_data.csv')
data_arm = pd.read_csv('processed_data/fe_armsensor_data.csv')

In [35]:
#drop 'index col'
data = data.iloc[:,1:]
data_leg = data_leg.iloc[:,1:]
data_arm = data_arm.iloc[:,1:]

# 1. Activity Detection - Machine Learning Approach

In [36]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5767 entries, 0 to 5766
Data columns (total 64 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   ax_mean       5767 non-null   float64
 1   ay_mean       5767 non-null   float64
 2   az_mean       5767 non-null   float64
 3   aT_mean       5767 non-null   float64
 4   ax_ad_mean    5767 non-null   float64
 5   ay_ad_mean    5767 non-null   float64
 6   az_ad_mean    5767 non-null   float64
 7   aT_ad_mean    5767 non-null   float64
 8   ax_min        5767 non-null   float64
 9   ay_min        5767 non-null   float64
 10  az_min        5767 non-null   float64
 11  aT_min        5767 non-null   float64
 12  ax_max        5767 non-null   float64
 13  ay_max        5767 non-null   float64
 14  az_max        5767 non-null   float64
 15  aT_max        5767 non-null   float64
 16  ax_std        5767 non-null   float64
 17  ay_std        5767 non-null   float64
 18  az_std        5767 non-null 

In [37]:
#whole data
X_data = data.drop('activity', axis=1)
y_data = data['activity']

#leg sensor data
X_data_leg = data_leg.drop('activity', axis=1)
y_data_leg = data_leg['activity']

#arm sensor data
X_data_arm = data_arm.drop('activity', axis=1)
y_data_arm = data_arm['activity']


In [38]:
#X_data['sex']

In [39]:
#encode sensor postion and sex data
sex_encoder = LabelEncoder()
sex_encoder.fit(X_data['sex'])
X_data['sex'] =  sex_encoder.transform(X_data['sex'])
#X_data['sex']
#male:1, female:0
X_data_leg['sex'] = sex_encoder.transform(X_data_leg['sex'])
X_data_arm['sex'] = sex_encoder.transform(X_data_arm['sex'])

In [40]:
#X_data['sensor_pos']

In [41]:
pos_encoder = LabelEncoder()
pos_encoder.fit(X_data['sensor_pos'])
X_data['sensor_pos'] = pos_encoder.transform(X_data['sensor_pos'])
#X_data['sensor_pos']
#leg: 1, arm: 0
X_data_leg['sensor_pos'] = pos_encoder.transform(X_data_leg['sensor_pos'])
X_data_arm['sensor_pos'] = pos_encoder.transform(X_data_arm['sensor_pos'])

In [42]:
activity_encoder = LabelEncoder()
activity_encoder.fit(y_data)
y_data = activity_encoder.transform(y_data)
#y_data
#stand: 1, arm: 0
y_data_leg = activity_encoder.transform(y_data_leg)
y_data_arm = activity_encoder.transform(y_data_arm)

In [43]:
#Train - Test Split
X_train, X_val, y_train, y_val = train_test_split(X_data, y_data, test_size= 0.2, random_state= 24)
X_train_leg, X_val_leg, y_train_leg, y_val_leg = train_test_split(X_data_leg, y_data_leg, test_size= 0.2, random_state= 24)
X_train_arm, X_val_arm, y_train_arm, y_val_arm = train_test_split(X_data_arm, y_data_arm, test_size= 0.2, random_state= 24)

In [44]:
#standardizing into scaler

scaler = StandardScaler()
scaler.fit(X_train)
X_train_sd = scaler.transform(X_train)
X_val_sd = scaler.transform(X_val)

X_train_leg_sd = scaler.transform(X_train_leg)
X_val_leg_sd = scaler.transform(X_val_leg)

X_train_arm_sd = scaler.transform(X_train_arm)
X_val_arm_sd = scaler.transform(X_val_arm)

In [45]:
#XG Boost

xgb_model = XGBClassifier()
xgb_model.fit(X_train_sd,y_train)

xgb_model_leg = XGBClassifier()
xgb_model_leg.fit(X_train_leg_sd,y_train_leg)

xgb_model_arm = XGBClassifier()
xgb_model_arm.fit(X_train_arm_sd,y_train_arm)





XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=12,
              num_parallel_tree=1, objective='multi:softprob', predictor='auto',
              random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=None,
              subsample=1, tree_method='exact', validate_parameters=1,
              verbosity=None)

- XGBoost on whole FE dataset

In [46]:
#Train Accuracy
y_pred = xgb_model.predict(X_train_sd)
print("XGBoost Training Results: ")
print("accuracy:", accuracy_score(y_train, y_pred))
print(classification_report(y_train, y_pred))

XGBoost Training Results: 
accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1151
           1       1.00      1.00      1.00      1140
           2       1.00      1.00      1.00      1172
           3       1.00      1.00      1.00      1150

    accuracy                           1.00      4613
   macro avg       1.00      1.00      1.00      4613
weighted avg       1.00      1.00      1.00      4613



In [47]:
#Validation
y_pred = xgb_model.predict(X_val_sd)
print("XGBoost Validation Results: ")
print("accuracy:", accuracy_score(y_val, y_pred))
print(classification_report(y_val, y_pred))

XGBoost Validation Results: 
accuracy: 0.9939341421143848
              precision    recall  f1-score   support

           0       0.98      0.99      0.99       277
           1       0.99      0.98      0.99       288
           2       1.00      1.00      1.00       311
           3       1.00      1.00      1.00       278

    accuracy                           0.99      1154
   macro avg       0.99      0.99      0.99      1154
weighted avg       0.99      0.99      0.99      1154



- XGBoost on Leg Sensor FE dataset

In [48]:
y_pred_leg = xgb_model_leg.predict(X_train_leg_sd)
print("XGBoost Training Results on Leg Sensor: ")
print("accuracy:", accuracy_score(y_train_leg, y_pred_leg))
print(classification_report(y_train_leg, y_pred_leg))

XGBoost Training Results on Leg Sensor: 
accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       576
           1       1.00      1.00      1.00       563
           2       1.00      1.00      1.00       567
           3       1.00      1.00      1.00       578

    accuracy                           1.00      2284
   macro avg       1.00      1.00      1.00      2284
weighted avg       1.00      1.00      1.00      2284



In [49]:
y_pred_leg = xgb_model_leg.predict(X_val_leg_sd)
print("XGBoost Validation Results on Leg Sensor: ")
print("accuracy:", accuracy_score(y_val_leg, y_pred_leg))
print(classification_report(y_val_leg, y_pred_leg))

XGBoost Validation Results on Leg Sensor: 
accuracy: 0.9965034965034965
              precision    recall  f1-score   support

           0       1.00      0.99      0.99       138
           1       0.99      1.00      1.00       151
           2       1.00      1.00      1.00       147
           3       0.99      1.00      1.00       136

    accuracy                           1.00       572
   macro avg       1.00      1.00      1.00       572
weighted avg       1.00      1.00      1.00       572



- XGBoost on Arm Sensor FE dataset

In [50]:
y_pred_arm = xgb_model_arm.predict(X_train_arm_sd)
print("XGBoost Training Results on Arm Sensor: ")
print("accuracy:", accuracy_score(y_train_arm, y_pred_arm))
print(classification_report(y_train_arm, y_pred_arm))

XGBoost Training Results on Arm Sensor: 
accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       562
           1       1.00      1.00      1.00       563
           2       1.00      1.00      1.00       611
           3       1.00      1.00      1.00       592

    accuracy                           1.00      2328
   macro avg       1.00      1.00      1.00      2328
weighted avg       1.00      1.00      1.00      2328



In [51]:
y_pred_arm = xgb_model_arm.predict(X_val_arm_sd)
print("XGBoost Validation Results on Arm Sensor: ")
print("accuracy:", accuracy_score(y_val_arm, y_pred_arm))
print(classification_report(y_val_arm, y_pred_arm))

XGBoost Validation Results on Arm Sensor: 
accuracy: 0.9862778730703259
              precision    recall  f1-score   support

           0       0.97      0.98      0.97       152
           1       0.98      0.97      0.97       151
           2       1.00      1.00      1.00       158
           3       1.00      1.00      1.00       122

    accuracy                           0.99       583
   macro avg       0.99      0.99      0.99       583
weighted avg       0.99      0.99      0.99       583



# 2. Sex Differentiation - Machine Learning Approach

In [52]:
#Whole data
sd_x_data = data.drop('sex', axis=1)
sd_y_data =  data['sex']

#Leg data
sd_x_data_leg = data_leg.drop('sex', axis=1)
sd_y_data_leg =  data_leg['sex']

#arm data
sd_x_data_arm = data_arm.drop('sex', axis=1)
sd_y_data_arm =  data_arm['sex']

In [53]:
#one hot encoding for 'activity' data
activity_encoder = OneHotEncoder()
activity_reshaped = np.array(sd_x_data['activity']).reshape(-1,1)
activity_data = activity_encoder.fit_transform(activity_reshaped)

activity_df = pd.DataFrame(activity_data.toarray(), columns=['jog', 'run', 'stand', 'walk'])
activity_df

Unnamed: 0,jog,run,stand,walk
0,0.0,0.0,1.0,0.0
1,0.0,0.0,1.0,0.0
2,0.0,0.0,1.0,0.0
3,0.0,0.0,1.0,0.0
4,0.0,0.0,1.0,0.0
...,...,...,...,...
5762,0.0,1.0,0.0,0.0
5763,0.0,1.0,0.0,0.0
5764,0.0,1.0,0.0,0.0
5765,0.0,1.0,0.0,0.0


In [54]:
#one hot encoding-> leg sensor data
activity_reshaped_leg = np.array(sd_x_data_leg['activity']).reshape(-1,1)
activity_data_leg = activity_encoder.fit_transform(activity_reshaped_leg)
activity_df_leg = pd.DataFrame(activity_data_leg.toarray(), columns=['jog', 'run', 'stand', 'walk'])
activity_df_leg

Unnamed: 0,jog,run,stand,walk
0,0.0,0.0,1.0,0.0
1,0.0,0.0,1.0,0.0
2,0.0,0.0,1.0,0.0
3,0.0,0.0,1.0,0.0
4,0.0,0.0,1.0,0.0
...,...,...,...,...
2851,0.0,1.0,0.0,0.0
2852,0.0,1.0,0.0,0.0
2853,0.0,1.0,0.0,0.0
2854,0.0,1.0,0.0,0.0


In [55]:
#one hot encoding-> arm sensor data
activity_reshaped_arm = np.array(sd_x_data_arm['activity']).reshape(-1,1)
activity_data_arm = activity_encoder.fit_transform(activity_reshaped_arm)
activity_df_arm = pd.DataFrame(activity_data_arm.toarray(), columns=['jog', 'run', 'stand', 'walk'])
activity_df_arm

Unnamed: 0,jog,run,stand,walk
0,0.0,0.0,1.0,0.0
1,0.0,0.0,1.0,0.0
2,0.0,0.0,1.0,0.0
3,0.0,0.0,1.0,0.0
4,0.0,0.0,1.0,0.0
...,...,...,...,...
2906,0.0,1.0,0.0,0.0
2907,0.0,1.0,0.0,0.0
2908,0.0,1.0,0.0,0.0
2909,0.0,1.0,0.0,0.0


In [56]:
sd_x_data = sd_x_data.drop('activity', axis=1)
sd_x_data = pd.concat([sd_x_data,activity_df], axis=1)

sd_x_data_leg = sd_x_data_leg.drop('activity', axis=1)
sd_x_data_leg = pd.concat([sd_x_data_leg,activity_df_leg], axis=1)

sd_x_data_arm = sd_x_data_arm.drop('activity', axis=1)
sd_x_data_arm = pd.concat([sd_x_data_arm,activity_df_arm], axis=1)

In [57]:
#sensor encoding
sd_x_data['sensor_pos'] = pos_encoder.transform(sd_x_data['sensor_pos'])

sd_x_data_leg['sensor_pos'] = pos_encoder.transform(sd_x_data_leg['sensor_pos'])

sd_x_data_arm['sensor_pos'] = pos_encoder.transform(sd_x_data_arm['sensor_pos'])

In [58]:
sd_x_data = sd_x_data.drop('id', axis=1)
sd_x_data_leg = sd_x_data_leg.drop('id', axis=1)
sd_x_data_arm = sd_x_data_arm.drop('id', axis=1)

In [59]:
X_train, X_val, y_train, y_val = train_test_split(sd_x_data, sd_y_data, test_size= 0.2, random_state= 24)
X_train_leg, X_val_leg, y_train_leg, y_val_leg = train_test_split(sd_x_data_leg, sd_y_data_leg, test_size= 0.2, random_state= 24)
X_train_arm, X_val_arm, y_train_arm, y_val_arm = train_test_split(sd_x_data_arm, sd_y_data_arm, test_size= 0.2, random_state= 24)

In [60]:
scaler.fit(X_train)
X_train_sd = scaler.transform(X_train)
X_val_sd = scaler.transform(X_val)

X_train_leg_sd = scaler.transform(X_train_leg)
X_val_leg_sd = scaler.transform(X_val_leg)

X_train_arm_sd = scaler.transform(X_train_arm)
X_val_arm_sd = scaler.transform(X_val_arm)

In [61]:
#XG Boost

xgb_model = XGBClassifier()
xgb_model.fit(X_train_sd,y_train)

xgb_model_leg = XGBClassifier()
xgb_model_leg.fit(X_train_leg_sd,y_train_leg)

xgb_model_arm = XGBClassifier()
xgb_model_arm.fit(X_train_leg_sd,y_train_leg)





XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=12,
              num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

- XGBoost on whole FE dataset

In [62]:
y_pred = xgb_model.predict(X_val_sd)
print("accuracy:", accuracy_score(y_val, y_pred))
print(classification_report(y_val, y_pred))

accuracy: 0.9341421143847487
              precision    recall  f1-score   support

           F       0.93      0.94      0.93       572
           M       0.94      0.93      0.93       582

    accuracy                           0.93      1154
   macro avg       0.93      0.93      0.93      1154
weighted avg       0.93      0.93      0.93      1154



- XGBoost on Leg Sensor FE dataset

In [63]:
#leg
y_pred_leg = xgb_model_leg.predict(X_val_leg_sd)
print("accuracy:", accuracy_score(y_val_leg, y_pred_leg))
print(classification_report(y_val_leg, y_pred_leg))

accuracy: 0.965034965034965
              precision    recall  f1-score   support

           F       0.96      0.97      0.96       282
           M       0.97      0.96      0.97       290

    accuracy                           0.97       572
   macro avg       0.97      0.97      0.97       572
weighted avg       0.97      0.97      0.97       572



- XGBoost on Arm Sensor FE dataset

In [64]:
#arm
y_pred_arm = xgb_model_arm.predict(X_val_arm_sd)
print("accuracy:", accuracy_score(y_val_arm, y_pred_arm))
print(classification_report(y_val_arm, y_pred_arm))

accuracy: 0.5403087478559176
              precision    recall  f1-score   support

           F       0.53      0.62      0.57       288
           M       0.56      0.46      0.50       295

    accuracy                           0.54       583
   macro avg       0.54      0.54      0.54       583
weighted avg       0.54      0.54      0.54       583

