In [2]:
from os.path import join
from os import listdir

import pandas as pd
import numpy as np
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error, mean_absolute_error

Data preparation

In [3]:
features = [
            'skew_x', 
            'skew_y', 
            'skew_z', 
            'kurt_x', 
            'kurt_y', 
            'kurt_z', 
            'max_x', 
            'max_y', 
            'max_z', 
            'min_x', 
            'min_y', 
            'min_z', 
            'mean_x', 
            'mean_y', 
            'mean_z', 
            'std_x', 
            'std_y', 
            'std_z', 
            'var_x', 
            'var_y', 
            'var_z', 
            'median_x', 
            'median_y', 
            'median_z', 
            'idxmax_x', 
            'idxmax_y', 
            'idxmax_z', 
            'idxmin_x', 
            'idxmin_y', 
            'idxmin_z', 
            'mae_x', 
            'mae_y', 
            'mae_z', 
            'rmse_x', 
            'rmse_y', 
            'rmse_z'
            ]

In [4]:
def get_stat_features(frame):

    features = np.array([])

    features = np.concatenate((features, frame.skew(axis=0).values), axis=0)
    features = np.concatenate((features, frame.kurt(axis=0).values), axis=0)
    features = np.concatenate((features, frame.max(axis=0).values), axis=0)
    features = np.concatenate((features, frame.min(axis=0).values), axis=0)
    features = np.concatenate((features, frame.mean(axis=0).values), axis=0)
    features = np.concatenate((features, frame.std(axis=0).values), axis=0)
    features = np.concatenate((features, frame.var(axis=0).values), axis=0)
    features = np.concatenate((features, frame.median(axis=0).values), axis=0)
    features = np.concatenate((features, frame.idxmax(axis=0).values), axis=0)
    features = np.concatenate((features, frame.idxmin(axis=0).values), axis=0)

    frame['mean_X'] = frame.mean(axis=0)['accelerometer_X']
    frame['mean_Y'] = frame.mean(axis=0)['accelerometer_Y']
    frame['mean_Z'] = frame.mean(axis=0)['accelerometer_Z']

    mae_X = mean_absolute_error(frame['accelerometer_X'], frame['mean_X'])
    mae_Y = mean_absolute_error(frame['accelerometer_Y'], frame['mean_Y'])
    mae_Z = mean_absolute_error(frame['accelerometer_Z'], frame['mean_Z'])

    rmse_x = np.sqrt(mean_squared_error(frame['accelerometer_X'], frame['mean_X']))
    rmse_y = np.sqrt(mean_squared_error(frame['accelerometer_Y'], frame['mean_Y']))
    rmse_z = np.sqrt(mean_squared_error(frame['accelerometer_Z'], frame['mean_Z']))

    metrics = np.array([mae_X, mae_Y, mae_Z, rmse_x, rmse_y, rmse_z])
    features = np.concatenate((features, metrics), axis=0)

    return features

In [5]:
def class_data_stat_prepare(class_name, class_number):
    path = join('./data', class_name)

    X=[]

    for item in listdir(path):
        frame = pd.read_csv(join(path, item))
        features = get_stat_features(frame)
        X.append (features)

    y = [class_number] * len(X)

    X = np.array(X)
    y = np.array(y)

    return X, y

In [6]:
def create_dataset(class_prepare):

    X_idle, y_idle = class_prepare('idle', 0)
    X_walking, y_walking = class_prepare('walking', 1)
    X_stairs, y_stairs = class_prepare('stairs', 2)
    X_running, y_running = class_prepare('running', 3)

    X = np.concatenate((X_idle, X_walking), axis=0)
    X = np.concatenate ((X, X_stairs), axis=0)
    X = np.concatenate((X, X_running), axis=0)
    Y = np.concatenate ((y_idle, y_walking))
    Y = np.concatenate((Y, y_stairs))
    Y = np.concatenate((Y, y_running))

    return X, Y

In [7]:
X, Y = create_dataset(class_data_stat_prepare)

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

Random Forest Classifier

In [8]:
random_forest_model = RandomForestClassifier(n_estimators=100)
random_forest_model.fit(X_train_scaled, y_train)

y_pred = random_forest_model.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.998968540484786


Support Vector Machine Classifier

In [11]:
svc_model = svm.SVC(decision_function_shape='ovo', probability=True)

scores = cross_val_score(svc_model, X_train_scaled, y_train, cv=10)
print("Accuracy:", scores.mean())

Accuracy: 0.9920397937056789
