In [103]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import seaborn as sns
sns.set()

In [104]:
#import data

data = pd.read_csv('processed_data/fe_data.csv')
data_leg = pd.read_csv('processed_data/fe_legsensor_data.csv')
data_arm = pd.read_csv('processed_data/fe_armsensor_data.csv')

In [105]:
#drop 'index col'
data = data.iloc[:,1:]
data_leg = data_leg.iloc[:,1:]
data_arm = data_arm.iloc[:,1:]

1. Activity Detection - Machine Learning Approach

In [106]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4403 entries, 0 to 4402
Data columns (total 64 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   ax_mean       4403 non-null   float64
 1   ay_mean       4403 non-null   float64
 2   az_mean       4403 non-null   float64
 3   aT_mean       4403 non-null   float64
 4   ax_ad_mean    4403 non-null   float64
 5   ay_ad_mean    4403 non-null   float64
 6   az_ad_mean    4403 non-null   float64
 7   aT_ad_mean    4403 non-null   float64
 8   ax_min        4403 non-null   float64
 9   ay_min        4403 non-null   float64
 10  az_min        4403 non-null   float64
 11  aT_min        4403 non-null   float64
 12  ax_max        4403 non-null   float64
 13  ay_max        4403 non-null   float64
 14  az_max        4403 non-null   float64
 15  aT_max        4403 non-null   float64
 16  ax_std        4403 non-null   float64
 17  ay_std        4403 non-null   float64
 18  az_std        4403 non-null 

In [107]:
#whole data
X_data = data.drop('activity', axis=1)
y_data = data['activity']

#leg sensor data
X_data_leg = data_leg.drop('activity', axis=1)
y_data_leg = data_leg['activity']

#arm sensor data
X_data_arm = data_arm.drop('activity', axis=1)
y_data_arm = data_arm['activity']


In [44]:
#X_data['sex']

In [108]:
#encode sensor postion and sex data
sex_encoder = LabelEncoder()
sex_encoder.fit(X_data['sex'])
X_data['sex'] =  sex_encoder.transform(X_data['sex'])
#X_data['sex']
#male:1, female:0
X_data_leg['sex'] = sex_encoder.transform(X_data_leg['sex'])
X_data_arm['sex'] = sex_encoder.transform(X_data_arm['sex'])

In [81]:
#X_data['sensor_pos']

In [109]:
pos_encoder = LabelEncoder()
pos_encoder.fit(X_data['sensor_pos'])
X_data['sensor_pos'] = pos_encoder.transform(X_data['sensor_pos'])
#X_data['sensor_pos']
#leg: 1, arm: 0
X_data_leg['sensor_pos'] = pos_encoder.transform(X_data_leg['sensor_pos'])
X_data_arm['sensor_pos'] = pos_encoder.transform(X_data_arm['sensor_pos'])

In [110]:
#Train - Test Split
X_train, X_val, y_train, y_val = train_test_split(X_data, y_data, test_size= 0.2, random_state= 24)
X_train_leg, X_val_leg, y_train_leg, y_val_leg = train_test_split(X_data_leg, y_data_leg, test_size= 0.2, random_state= 24)
X_train_arm, X_val_arm, y_train_arm, y_val_arm = train_test_split(X_data_arm, y_data_arm, test_size= 0.2, random_state= 24)

In [111]:
#standardizing into scaler

scaler = StandardScaler()
scaler.fit(X_train)
X_train_sd = scaler.transform(X_train)
X_val_sd = scaler.transform(X_val)

X_train_leg_sd = scaler.transform(X_train_leg)
X_val_leg_sd = scaler.transform(X_val_leg)

X_train_arm_sd = scaler.transform(X_train_arm)
X_val_arm_sd = scaler.transform(X_val_arm)

 - LR on whole FE dataset

In [112]:
#Logistical Regression
lr_model = LogisticRegression(random_state=21)
lr_model.fit(X_train_sd,y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


LogisticRegression(random_state=21)

In [113]:
y_pred = lr_model.predict(X_val_sd)
print(classification_report(y_val, y_pred))

              precision    recall  f1-score   support

         jog       0.96      0.95      0.95       209
         run       0.95      0.96      0.96       202
       stand       1.00      1.00      1.00       221
        walk       1.00      1.00      1.00       249

    accuracy                           0.98       881
   macro avg       0.98      0.98      0.98       881
weighted avg       0.98      0.98      0.98       881



- LR on Leg Sensor FE dataset

In [114]:
#Logistical Regression
lr_model_leg = LogisticRegression(random_state=21)
lr_model_leg.fit(X_train_leg_sd,y_train_leg)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


LogisticRegression(random_state=21)

In [115]:
y_pred_leg = lr_model_leg.predict(X_val_leg_sd)
print(classification_report(y_val_leg, y_pred_leg))

              precision    recall  f1-score   support

         jog       1.00      1.00      1.00        77
         run       1.00      1.00      1.00        63
       stand       1.00      1.00      1.00        99
        walk       1.00      1.00      1.00        95

    accuracy                           1.00       334
   macro avg       1.00      1.00      1.00       334
weighted avg       1.00      1.00      1.00       334



- LR on Arm Sensor FE dataset

In [116]:
#Logistical Regression
lr_model_arm = LogisticRegression(random_state=21)
lr_model_arm.fit(X_train_arm_sd,y_train_arm)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


LogisticRegression(random_state=21)

In [117]:
y_pred_arm = lr_model_arm.predict(X_val_arm_sd)
print(classification_report(y_val_arm, y_pred_arm))

              precision    recall  f1-score   support

         jog       0.93      0.97      0.95       143
         run       0.96      0.93      0.95       144
       stand       1.00      1.00      1.00       117
        walk       1.00      1.00      1.00       144

    accuracy                           0.97       548
   macro avg       0.97      0.97      0.97       548
weighted avg       0.97      0.97      0.97       548



2. Sex Differentiation - Machine Learning Approach

In [118]:
#Whole data
sd_x_data = data.drop('sex', axis=1)
sd_y_data =  data['sex']

In [119]:
#one hot encoding for 'activity' data
activity_encoder = OneHotEncoder()
activity_reshaped = np.array(sd_x_data['activity']).reshape(-1,1)
activity_data = activity_encoder.fit_transform(activity_reshaped)

activity_df = pd.DataFrame(activity_data.toarray(), columns=['jog', 'run', 'stand', 'walk'])
activity_df

Unnamed: 0,jog,run,stand,walk
0,0.0,0.0,1.0,0.0
1,0.0,0.0,1.0,0.0
2,0.0,0.0,1.0,0.0
3,0.0,0.0,1.0,0.0
4,0.0,0.0,1.0,0.0
...,...,...,...,...
4398,0.0,1.0,0.0,0.0
4399,0.0,1.0,0.0,0.0
4400,0.0,1.0,0.0,0.0
4401,0.0,1.0,0.0,0.0


In [120]:
sd_x_data = sd_x_data.drop('activity', axis=1)
sd_x_data = pd.concat([sd_x_data,activity_df], axis=1)
sd_x_data

Unnamed: 0,ax_mean,ay_mean,az_mean,aT_mean,ax_ad_mean,ay_ad_mean,az_ad_mean,aT_ad_mean,ax_min,ay_min,...,ax_energy,ay_energy,az_energy,aT_energy,id,sensor_pos,jog,run,stand,walk
0,0.008389,0.002299,-0.005565,0.052762,0.018978,0.007285,0.011284,0.013785,-0.038112,-0.022203,...,0.000565,0.000092,0.000225,0.003101,5,leg,0.0,0.0,1.0,0.0
1,0.004237,0.004799,-0.003213,0.056464,0.022196,0.006363,0.012816,0.010641,-0.038112,-0.006534,...,0.000610,0.000080,0.000230,0.003373,5,leg,0.0,0.0,1.0,0.0
2,0.004794,0.004276,-0.006235,0.055496,0.018700,0.006834,0.010793,0.010963,-0.034305,-0.004757,...,0.000473,0.000081,0.000214,0.003274,5,leg,0.0,0.0,1.0,0.0
3,0.002024,0.003511,-0.001768,0.048095,0.017400,0.005763,0.007734,0.004334,-0.034305,-0.004757,...,0.000428,0.000066,0.000081,0.002338,5,leg,0.0,0.0,1.0,0.0
4,0.005332,0.002019,-0.001356,0.050343,0.021216,0.003858,0.007236,0.007080,-0.034305,-0.004757,...,0.000622,0.000024,0.000062,0.002622,5,leg,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4398,5.304296,6.992285,-7.357166,20.077278,7.788882,10.934353,4.241924,3.214296,-11.753848,-11.427540,...,103.234317,201.144825,77.771117,418.364999,6,arm,0.0,1.0,0.0,0.0
4399,1.353025,2.949142,-5.974860,20.573900,11.762721,10.300356,5.880427,2.751457,-19.944505,-11.427540,...,180.673362,154.019590,77.549418,435.116636,6,arm,0.0,1.0,0.0,0.0
4400,-3.024489,7.247215,-3.938283,19.358044,9.439023,9.714546,5.349486,3.410748,-19.944505,-11.427540,...,135.442196,175.032008,52.863801,391.718142,6,arm,0.0,1.0,0.0,0.0
4401,-7.953697,9.211947,-1.493172,18.164470,7.121034,8.891033,4.168916,2.962911,-19.944505,-4.971440,...,95.410406,142.332039,21.622441,262.217930,6,arm,0.0,1.0,0.0,0.0


In [97]:
#sample = [[1, 0, 0, 0],[0, 1, 0, 0],[0, 0, 1, 0],[0, 0, 0, 1]]
#sample_np = np.array(sample)
#print(activity_encoder.inverse_transform(sample))

[['jog']
 ['run']
 ['stand']
 ['walk']]


In [121]:
#sensor encoding
sd_x_data['sensor_pos'] = pos_encoder.transform(sd_x_data['sensor_pos'])

In [122]:
X_train, X_val, y_train, y_val = train_test_split(sd_x_data, sd_y_data, test_size= 0.2, random_state= 24)


In [124]:
scaler.fit(X_train)
X_train_sd = scaler.transform(X_train)
X_val_sd = scaler.transform(X_val)



In [125]:
#Logistical Regression
lr_model = LogisticRegression(random_state=21)
lr_model.fit(X_train_sd,y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


LogisticRegression(random_state=21)

In [127]:
y_pred = lr_model.predict(X_val_sd)
print(classification_report(y_val, y_pred))

              precision    recall  f1-score   support

           F       0.81      0.80      0.80       527
           M       0.71      0.71      0.71       354

    accuracy                           0.77       881
   macro avg       0.76      0.76      0.76       881
weighted avg       0.77      0.77      0.77       881

