# Human activity recognition

머신 러닝에서 배운 내용을 토대로 Human activity dataset을 이용하여 activity 예측

In [1]:
import numpy as np
import pandas as pd
from sklearn.utils import shuffle

input_file = "train.csv"
df_train = pd.read_csv(input_file)

data_train = shuffle(df_train, random_state=42)

In [2]:
X_train = data_train.drop(["Activity"], axis=1)

X_train

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-skewness(),fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject
4525,0.283203,-0.047024,-0.168986,0.384949,0.176898,-0.310332,0.381757,0.122611,-0.332984,0.465563,...,-0.730593,-0.932568,-0.034924,0.558036,0.258975,-0.854858,-0.784330,0.222960,-0.066506,22
1446,0.256904,-0.036623,-0.133856,0.201409,-0.154142,0.344183,0.092771,-0.182114,0.291899,0.586004,...,-0.032958,-0.338095,0.017986,-0.475545,0.942947,-0.567147,-0.665156,0.178975,0.243362,7
5995,0.291316,-0.001065,-0.072461,-0.336609,-0.279162,-0.303323,-0.381421,-0.253026,-0.337381,0.074430,...,-0.351997,-0.698254,-0.044663,-0.551806,-0.680774,0.161405,-0.857202,0.192929,0.046716,27
4222,0.276116,-0.010909,-0.102886,-0.992196,-0.982169,-0.981127,-0.992838,-0.981366,-0.979864,-0.931924,...,-0.623358,-0.906098,-0.177275,-0.309304,0.176515,-0.143818,-0.625215,-0.106678,-0.147469,21
6754,0.256382,0.000428,-0.113664,0.075014,0.046502,-0.369482,-0.024794,-0.085337,-0.385018,0.562230,...,0.020630,-0.316113,0.347143,0.590475,0.831084,0.465453,-0.860617,0.150598,-0.067109,29
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5191,0.278897,-0.030306,-0.096043,-0.555352,-0.104055,-0.438064,-0.572530,-0.112149,-0.429688,-0.479744,...,0.417731,0.200267,-0.155279,-0.137981,0.929236,-0.583898,-0.703989,0.159112,-0.185944,25
5226,0.289183,-0.049248,-0.125083,-0.290043,-0.212102,-0.469731,-0.307317,-0.209558,-0.528635,-0.283244,...,-0.021970,-0.360982,-0.019533,-0.896005,-0.039591,0.494178,-0.670139,0.106156,-0.220746,25
5390,0.293946,-0.018341,-0.119916,-0.627198,-0.216566,-0.424764,-0.648666,-0.253814,-0.417569,-0.547082,...,0.527152,0.348736,-0.414621,-0.488542,0.095895,-0.297162,-0.856512,0.183547,-0.034939,25
860,0.280475,-0.018976,-0.113756,-0.994825,-0.985314,-0.965857,-0.995170,-0.984285,-0.963293,-0.937503,...,-0.889671,-0.984051,-0.079515,0.014291,-0.290347,0.602344,-0.839693,0.081388,-0.098270,5


In [3]:
y_train = data_train["Activity"].copy()

y_train

4525    WALKING_DOWNSTAIRS
1446    WALKING_DOWNSTAIRS
5995               WALKING
4222               SITTING
6754    WALKING_DOWNSTAIRS
               ...        
5191               WALKING
5226    WALKING_DOWNSTAIRS
5390               WALKING
860               STANDING
7270               WALKING
Name: Activity, Length: 7352, dtype: object

# ExtraTreeClassifier

In [4]:
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import cross_val_score

ext_clf1 = ExtraTreesClassifier(random_state=36774, n_jobs=-1)
ext_clf1.fit(X_train, y_train)

np.mean(cross_val_score(ext_clf1, X_train, y_train, cv=10, scoring="accuracy"))

0.9903429089026915

In [5]:
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import cross_val_score

ext_clf2 = ExtraTreesClassifier(random_state=20615, n_jobs=-1)
ext_clf2.fit(X_train, y_train)

np.mean(cross_val_score(ext_clf2, X_train, y_train, cv=10, scoring="accuracy"))

0.9897985063590653

In [6]:
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import cross_val_score

ext_clf3 = ExtraTreesClassifier(random_state=49330, n_jobs=-1)
ext_clf3.fit(X_train, y_train)

np.mean(cross_val_score(ext_clf3, X_train, y_train, cv=10, scoring="accuracy"))

0.9899338213546288

In [7]:
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import cross_val_score

ext_clf4 = ExtraTreesClassifier(random_state=3831, n_jobs=-1)
ext_clf4.fit(X_train, y_train)

np.mean(cross_val_score(ext_clf4, X_train, y_train, cv=10, scoring="accuracy"))

0.9897975820763089

In [8]:
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import cross_val_score

ext_clf5 = ExtraTreesClassifier(random_state=5946, n_jobs=-1)
ext_clf5.fit(X_train, y_train)

np.mean(cross_val_score(ext_clf5, X_train, y_train, cv=10, scoring="accuracy"))

0.9897981366459628

# Voting

In [9]:
from sklearn.ensemble import VotingClassifier

voting_clf = VotingClassifier(
    estimators=[('ext1', ext_clf1),('ext3', ext_clf2),('ext5', ext_clf5)],
               voting='hard')

voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('ext1',
                              ExtraTreesClassifier(n_jobs=-1,
                                                   random_state=36774)),
                             ('ext3',
                              ExtraTreesClassifier(n_jobs=-1,
                                                   random_state=20615)),
                             ('ext5',
                              ExtraTreesClassifier(n_jobs=-1,
                                                   random_state=5946))])

# Result!

In [10]:
np.mean(cross_val_score(voting_clf, X_train, y_train, cv=10, scoring="accuracy", n_jobs = -1))

0.9917027136941734