In [1]:
import numpy as np
import pandas as pd
import pickle

from os import listdir
from os.path import join, isdir
from sklearn import model_selection, svm
from sklearn.metrics import mean_absolute_error, mean_squared_error, accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier

In [2]:
datapath = "data"

In [3]:
activities = [d for d in listdir(datapath) if isdir(join(datapath, d))]
activities

['running', 'idle', 'walking', 'stairs']

In [4]:
for act in activities:
    path = join(datapath, act)
    frames = listdir(path)
    print(f"{act}: {len(frames)}")

running: 3408
idle: 1039
walking: 1850
stairs: 165


In [5]:
frms = listdir(join(datapath, "stairs"))
frame = pd.read_csv(join(join(datapath, "stairs"), frms[2]))
frame.head(5)

Unnamed: 0,accelerometer_X,accelerometer_Y,accelerometer_Z
0,5.923255,-18.195932,10.486604
1,2.595315,-7.762002,-10.606314
2,-6.287173,-20.60929,-38.08696
3,1.49877,-14.719552,-5.975927
4,4.936844,-19.177555,17.492037


#### Підготовка даних

In [6]:
def get_stat_features(frame):
    features = np.array([])
    features = np.concatenate((features, frame.skew(axis=0).values), axis=0)
    features = np.concatenate((features, frame.kurt(axis=0).values), axis=0)
    features = np.concatenate((features, frame.max(axis=0).values), axis=0)
    features = np.concatenate((features, frame.min(axis=0).values), axis=0)
    features = np.concatenate((features, frame.mean(axis=0).values), axis=0)
    features = np.concatenate((features, frame.std(axis=0).values), axis=0)
    features = np.concatenate((features, frame.var(axis=0).values), axis=0)
    features = np.concatenate((features, frame.median(axis=0).values), axis=0)
    features = np.concatenate((features, frame.idxmax(axis=0).values), axis=0)
    features = np.concatenate((features, frame.idxmin(axis=0).values), axis=0)
    correlations = frame.corr()
    corr = np.array([correlations['accelerometer_X']['accelerometer_Y'], correlations['accelerometer_X']['accelerometer_Z'], correlations['accelerometer_Y']['accelerometer_Z']])
    features = np.concatenate((features, corr), axis=0)

    frame['mean_X'] = frame.mean(axis=0)['accelerometer_X']
    frame['mean_Y'] = frame.mean(axis=0)['accelerometer_Y']
    frame['mean_Z'] = frame.mean(axis=0)['accelerometer_Z']
    
    mae_X = mean_absolute_error(frame['accelerometer_X'], frame['mean_X'])
    mae_Y = mean_absolute_error(frame['accelerometer_Y'], frame['mean_Y'])
    mae_Z = mean_absolute_error(frame['accelerometer_Z'], frame['mean_Z'])
    
    rmse_x = np.sqrt(mean_squared_error(frame['accelerometer_X'], frame['mean_X']))
    rmse_y = np.sqrt(mean_squared_error(frame['accelerometer_Y'], frame['mean_Y']))
    rmse_z = np.sqrt(mean_squared_error(frame['accelerometer_Z'], frame['mean_Z']))

    metrics = np.array([mae_X, mae_Y, mae_Z, rmse_x, rmse_y, rmse_z])
    features = np.concatenate((features, metrics), axis=0)

    return features

In [7]:
len(get_stat_features(frame))

39

#### Визначення кількості характеристик

In [8]:
new_frame = frame

In [9]:
new_frame['mean_X'] = frame.mean(axis=0)['accelerometer_X']
new_frame['mean_Y'] = frame.mean(axis=0)['accelerometer_Y']
new_frame['mean_Z'] = frame.mean(axis=0)['accelerometer_Z']

new_frame.head(3)

Unnamed: 0,accelerometer_X,accelerometer_Y,accelerometer_Z,mean_X,mean_Y,mean_Z
0,5.923255,-18.195932,10.486604,1.269406,-9.126697,-2.280078
1,2.595315,-7.762002,-10.606314,1.269406,-9.126697,-2.280078
2,-6.287173,-20.60929,-38.08696,1.269406,-9.126697,-2.280078


In [10]:
mean_absolute_error(frame['accelerometer_X'], new_frame['mean_X'])

2.251411582222223

In [11]:
np.sqrt(mean_squared_error(frame['accelerometer_X'], new_frame['mean_X']))

2.904224753121146

#### Підготовка класу

In [12]:
def class_data_stat_prepare(class_name, class_number):
    path = join(datapath, class_name)
    X = []
    for item in listdir(path):
        frame = pd.read_csv(join(path, item))
        features = get_stat_features(frame)
        X.append(features)

    y = [class_number]*len(X)

    X = np.array(X)
    y = np.array(y)

    return X, y

#### Створюємо DataSet

In [13]:
def create_dataset(class_prepare):
    X_idle, y_idle = class_prepare('idle', 0)
    X_walking, y_walking = class_prepare('walking', 1)
    X_stairs, y_stairs = class_prepare('stairs', 2)
    X_running, y_running = class_prepare('running', 3)

    X = np.concatenate((X_idle, X_walking), axis=0)
    X = np.concatenate((X, X_stairs), axis=0)
    X = np.concatenate((X, X_running), axis=0)

    Y = np.concatenate((y_idle, y_walking), axis=0)
    Y = np.concatenate((Y, y_stairs), axis=0)
    Y = np.concatenate((Y, y_running), axis=0)

    return X, Y

In [14]:
X, y = create_dataset(class_data_stat_prepare)

In [15]:
def save_data(prefix, X, y):
    with open(f'{prefix}_X.pickle', 'wb') as f:
        pickle.dump(X, f)
    with open(f'{prefix}_y.pickle', 'wb') as f:
        pickle.dump(y, f)

In [16]:
save_data('data', X, y)

In [17]:
def load_data(prefix):
    with open(f'{prefix}_X.pickle', 'rb') as f:
        X = pickle.load(f)
    with open(f'{prefix}_y.pickle', 'rb') as f:
        y = pickle.load(f)
    return X, y

In [18]:
X, y = load_data('data')

In [19]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, train_size=0.3)

#### порівнння VM Classifier VS RandomForest Classifier

In [20]:
cls_ovo = svm.SVC(decision_function_shape='ovo', kernel='rbf', gamma=0.005, probability=True).fit(X_train, y_train)
cls_ovr = svm.SVC(decision_function_shape='ovr', kernel='rbf', gamma=0.005, probability=True).fit(X_train, y_train)

cls_forest = RandomForestClassifier().fit(X_train, y_train)

In [21]:
svm_ovo_pred = cls_ovo.predict(X_test)
svm_ovr_pred = cls_ovr.predict(X_test)

forest_pred = cls_forest.predict(X_test)

In [22]:
svm_ovo_accuracy = accuracy_score(y_test, svm_ovo_pred)
svm_ovr_accuracy = accuracy_score(y_test, svm_ovr_pred)

forest_ovr_accuracy = accuracy_score(y_test, forest_pred)

print("accuracy SVM ovo: ", svm_ovo_accuracy)
print("accuracy SVM ovr: ", svm_ovr_accuracy)
print("accuracy RandomForest: ", forest_ovr_accuracy)

accuracy SVM ovo:  0.898762157382847
accuracy SVM ovr:  0.898762157382847
accuracy RandomForest:  0.99447391688771


In [23]:
svm_ovo_report = classification_report(y_test, svm_ovo_pred)
print("SVM ovo report: ")
print(svm_ovo_report)

SVM ovo report: 
              precision    recall  f1-score   support

           0       1.00      0.88      0.93       736
           1       0.98      0.76      0.86      1292
           2       1.00      0.45      0.62       107
           3       0.84      1.00      0.92      2389

    accuracy                           0.90      4524
   macro avg       0.96      0.77      0.83      4524
weighted avg       0.91      0.90      0.90      4524



In [24]:
svm_ovr_report = classification_report(y_test, svm_ovr_pred)
print("SVM ovr report: ")
print(svm_ovr_report)

SVM ovr report: 
              precision    recall  f1-score   support

           0       1.00      0.88      0.93       736
           1       0.98      0.76      0.86      1292
           2       1.00      0.45      0.62       107
           3       0.84      1.00      0.92      2389

    accuracy                           0.90      4524
   macro avg       0.96      0.77      0.83      4524
weighted avg       0.91      0.90      0.90      4524



In [25]:
forest_report = classification_report(y_test, forest_pred)
print("RandomForest report: ")
print(forest_report)


RandomForest report: 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       736
           1       0.98      1.00      0.99      1292
           2       0.96      0.80      0.87       107
           3       1.00      1.00      1.00      2389

    accuracy                           0.99      4524
   macro avg       0.98      0.95      0.97      4524
weighted avg       0.99      0.99      0.99      4524

