# Human Activity Recognition using RandomForest

In [164]:
# Importing relevant packages

import os,sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier


Feature Engineering and Creating a dataset

In [165]:
activities=['bending1','bending2','cycling','lying','sitting','standing','walking'] # list of activities
features=['avg_rss12','var_rss12','avg_rss13','var_rss13','avg_rss23','var_rss23']  # list of original features

# Extracting time-domain features for the each time series
extract=["min","max","mean","median"]  
feature_index=[]

# New features
for f in features:
    for p in range(len(extract)):
        feature_index.append((extract[p]+"_"+f))

# df with new feature names
table = pd.DataFrame(columns=feature_index)



# Reading input files and creating a dataset with new extracted features

table_activity_index=feature_index
table_activity_index.append('activity') # target variable
new_list=[]

for activity in activities:
    mypath = "Projectenv/activityrecognition/{0}".format(activity)
    for fname in os.listdir(mypath):
        df=pd.read_csv(mypath+'/'+fname,skiprows=4,usecols=range(1,7))
        ext=[]
        for f in features:
            ext.append(np.min(df[f]))       # min value of each feature for each instance
            ext.append(np.max(df[f]))       # max value of each feature for each instance
            ext.append(np.mean(df[f]))      # mean value of each feature for each instance
            ext.append(np.median(df[f]))    # median value of each feature for each instance
        ext.append(activity)
        new_list.append(ext)
        

# Final dataset
dataset=pd.DataFrame(new_list,columns=table_index)
dataset

Unnamed: 0,min_avg_rss12,max_avg_rss12,mean_avg_rss12,median_avg_rss12,min_var_rss12,max_var_rss12,mean_var_rss12,median_var_rss12,min_avg_rss13,max_avg_rss13,...,median_var_rss13,min_avg_rss23,max_avg_rss23,mean_avg_rss23,median_avg_rss23,min_var_rss23,max_var_rss23,mean_var_rss23,median_var_rss23,activity
0,37.25,45.00,40.624792,40.500,0.0,1.30,0.358604,0.430,4.00,29.50,...,0.500,27.25,38.25,34.311292,35.00,0.0,1.92,0.570583,0.430,bending1
1,38.00,45.67,42.812812,42.500,0.0,1.22,0.372437,0.470,2.00,29.50,...,0.500,27.67,38.50,33.024583,33.00,0.0,3.11,0.571083,0.430,bending1
2,35.00,47.40,43.954500,44.330,0.0,1.70,0.426250,0.470,6.50,29.75,...,0.430,29.00,38.50,35.588458,36.00,0.0,1.79,0.493292,0.430,bending1
3,33.00,47.75,42.179812,43.500,0.0,3.00,0.696042,0.500,8.50,30.00,...,0.830,20.00,38.67,33.493917,35.00,0.0,2.18,0.613521,0.500,bending1
4,33.00,45.75,41.678063,41.750,0.0,2.83,0.535979,0.500,3.00,28.25,...,0.500,23.67,37.50,29.857083,30.00,0.0,1.79,0.383292,0.430,bending1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83,20.75,46.25,34.763333,35.290,0.0,12.68,4.223792,3.900,5.00,23.75,...,3.000,6.00,25.50,16.299563,16.25,0.0,9.39,3.288271,3.270,walking
84,21.50,51.00,34.935812,35.500,0.0,12.21,4.115750,3.845,6.50,23.33,...,2.870,6.33,25.00,15.997229,16.25,0.0,10.21,3.280021,3.015,walking
85,18.33,47.67,34.333042,34.750,0.0,12.48,4.396958,3.900,7.67,23.33,...,2.930,6.67,24.00,15.864437,16.00,0.0,8.01,3.261583,2.980,walking
86,18.33,45.75,34.599875,35.125,0.0,15.37,4.398833,4.025,5.50,24.00,...,3.015,6.50,23.25,16.055563,16.00,0.0,8.86,3.289542,3.015,walking


Pre-Processing

In [166]:
# Checking for missing values
null_sum = dataset.isnull().sum()

# Drop rows with null values
dataset = dataset.dropna(axis=0)

Splitting into training and testing sets

In [167]:

X = dataset.loc[:,dataset.columns!='activity']
Y = dataset['activity']


# splitting data into 80% training and 20% testing
train_x, test_x, train_y, test_y = train_test_split(X, Y, 
                                                    test_size=0.2, 
                                                    random_state=0)

print(train_x.shape)
print(train_y.shape)
print(test_x.shape)
print(test_y.shape)

(69, 24)
(69,)
(18, 24)
(18,)


Performing classification using RandomForest 

In [168]:

# Creating the model 
model = RandomForestClassifier(n_estimators=100, 
                               bootstrap = True,
                               max_features = 'sqrt',
                               class_weight="balanced")

# Fit on training data
rf_fit = model.fit(train_x, train_y)

# checking accuracy 
accuracy = rf_fit.score(test_x,test_y)
print("\n Accuracy of the classifier is :", accuracy)


 Accuracy of the classifier is : 0.8888888888888888
