# Participants Classification based on Light-BGM model
by GONG ZERUI 2022.04.21

### 1. Load Libraries

In [2]:
# import libraries
import pandas as pd
import numpy as np
from numpy import pi 
from lightgbm import LGBMClassifier

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

import matplotlib.pyplot as plt

### 2. Load Data

In [3]:
df_train = pd.read_csv("source/train.csv")
df_test = pd.read_csv("source/test.csv")

# combine both dataframes
df_train['Data'] = 'Train'
df_test['Data'] = 'Test'
df_total = pd.concat([df_train,df_test], axis = 0).reset_index(drop = True)
df_total['subject'] = '#' + df_total['subject'].astype(str)

# create label
activity_label = df_total.pop('Activity')

print('Shape Train:\t{}'.format(df_train.shape))
print('Shape Test:\t{}\n'.format(df_test.shape))



Shape Train:	(7352, 564)
Shape Test:	(2947, 564)



### 3. Classifying Participants data using Light-BGM model

In [6]:
activity_label_counts = activity_label.value_counts()

# iterate over each activity and print out the accuracy
for activity in activity_label_counts.index:

    activity_dataset = df_total[activity_label == activity].copy()
    activity_dataset.pop("Data")
    activity_subject = activity_dataset.pop("subject")

    # scale data and dimension reducing
    scl = StandardScaler()
    activity_dataset = scl.fit_transform(activity_dataset)

    pca = PCA(random_state=3)
    activity_dataset = pca.fit_transform(activity_dataset)

    # split train-test
    enc = LabelEncoder()
    label_encoded = enc.fit_transform(activity_subject)
    X_train, X_test, y_train, y_test = train_test_split(
        activity_dataset, label_encoded, random_state=3
    )

    # fit LBGM model
    print('Activity:'+ str(activity))
    lgbm = LGBMClassifier(n_estimators=1000, random_state = 3)
    lgbm = lgbm.fit(X_train, y_train)
    
    score = accuracy_score(y_true=y_test, y_pred=lgbm.predict(X_test))
    print('Accuracy:\t{:.8f}\n'.format(score))


Activity:LAYING
Accuracy:	0.66666667

Activity:STANDING
Accuracy:	0.49685535

Activity:SITTING
Accuracy:	0.44719101

Activity:WALKING
Accuracy:	0.94663573

Activity:WALKING_UPSTAIRS
Accuracy:	0.93005181

Activity:WALKING_DOWNSTAIRS
Accuracy:	0.88352273



We can see that it's very accurate to detect the specific person based on their activity. And the accuracy is much higher for movement. (94.6% for walking) This suggests we can find a unique movement pattern for each individual.

### Re-test base on deleted features

In [20]:
df_total_retest = df_total.copy()
df_total_retest = df_total_retest[df_total_retest.columns.drop(list(df_total_retest.filter(regex='Acc')))]
df_total_retest

Unnamed: 0,tBodyGyro-mean()-X,tBodyGyro-mean()-Y,tBodyGyro-mean()-Z,tBodyGyro-std()-X,tBodyGyro-std()-Y,tBodyGyro-std()-Z,tBodyGyro-mad()-X,tBodyGyro-mad()-Y,tBodyGyro-mad()-Z,tBodyGyro-max()-X,...,fBodyBodyGyroJerkMag-meanFreq(),fBodyBodyGyroJerkMag-skewness(),fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject,Data
0,-0.006101,-0.031365,0.107725,-0.985310,-0.976623,-0.992205,-0.984586,-0.976353,-0.992362,-0.867044,...,-0.074323,-0.298676,-0.710304,-0.464761,-0.018446,-0.841247,0.179941,-0.058627,#1,Train
1,-0.016112,-0.083894,0.100584,-0.983120,-0.989046,-0.989121,-0.986890,-0.989038,-0.989185,-0.864904,...,0.158075,-0.595051,-0.861499,-0.732626,0.703511,-0.844788,0.180289,-0.054317,#1,Train
2,-0.031698,-0.102335,0.096127,-0.976292,-0.993552,-0.986379,-0.974922,-0.994122,-0.985786,-0.864904,...,0.414503,-0.390748,-0.760104,0.100699,0.808529,-0.848933,0.180637,-0.049118,#1,Train
3,-0.043410,-0.091386,0.085538,-0.991385,-0.992407,-0.987554,-0.991589,-0.993142,-0.989585,-0.885320,...,0.404573,-0.117290,-0.482845,0.640011,-0.485366,-0.848649,0.181935,-0.047663,#1,Train
4,-0.033960,-0.074708,0.077392,-0.985184,-0.992378,-0.987402,-0.986944,-0.992542,-0.988163,-0.870154,...,0.087753,-0.351471,-0.699205,0.693578,-0.615971,-0.847865,0.185151,-0.043892,#1,Train
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10294,-0.142473,0.025443,0.202862,-0.562739,-0.526855,-0.256756,-0.570178,-0.575937,-0.330748,-0.536181,...,0.074472,-0.376278,-0.750809,0.884904,-0.698885,-0.651732,0.274627,0.184784,#24,Test
10295,0.062107,-0.043156,0.113594,-0.464037,-0.518149,-0.139629,-0.459919,-0.571732,-0.148170,-0.453271,...,0.101859,-0.320418,-0.700274,-0.657421,0.322549,-0.655181,0.273578,0.182412,#24,Test
10296,-0.123715,0.086320,0.261423,-0.421977,-0.557059,-0.197504,-0.421708,-0.576087,-0.199541,-0.453271,...,-0.066249,-0.118854,-0.467179,0.696663,0.363139,-0.655357,0.274479,0.181184,#24,Test
10297,-0.335912,0.099347,0.355058,-0.667858,-0.555166,-0.342620,-0.686728,-0.580453,-0.360587,-0.763286,...,-0.046467,-0.205445,-0.617737,0.929294,-0.008398,-0.659719,0.264782,0.187563,#24,Test


In [21]:
activity_label_counts = activity_label.value_counts()

# iterate over each activity and print out the accuracy
for activity in activity_label_counts.index:

    activity_dataset = df_total_retest[activity_label == activity].copy()
    activity_dataset.pop("Data")
    activity_subject = activity_dataset.pop("subject")

    # scale data and dimension reducing
    scl = StandardScaler()
    activity_dataset = scl.fit_transform(activity_dataset)

    pca = PCA(random_state=3)
    activity_dataset = pca.fit_transform(activity_dataset)

    # split train-test
    enc = LabelEncoder()
    label_encoded = enc.fit_transform(activity_subject)
    X_train, X_test, y_train, y_test = train_test_split(
        activity_dataset, label_encoded, random_state=3
    )

    # fit LBGM model
    print('Activity:'+ str(activity))
    lgbm = LGBMClassifier(n_estimators=1000, random_state = 3)
    lgbm = lgbm.fit(X_train, y_train)
    
    score = accuracy_score(y_true=y_test, y_pred=lgbm.predict(X_test))
    print('Accuracy:\t{:.8f}\n'.format(score))


Activity:LAYING
Accuracy:	0.59876543

Activity:STANDING
Accuracy:	0.44863732

Activity:SITTING
Accuracy:	0.48539326

Activity:WALKING
Accuracy:	0.93967517

Activity:WALKING_UPSTAIRS
Accuracy:	0.88341969

Activity:WALKING_DOWNSTAIRS
Accuracy:	0.82102273

