In [4]:
import numpy as np
import pandas as pd
import os
import pickle
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# Prepare Data

In [5]:
features = [
    'min_x',
    'min_y',
    'min_z',
    'max_x',
    'max_y',
    'max_z',
    'mean_x',
    'mean_y',
    'mean_z',
    'std_x',
    'std_y',
    'std_z',
    'median_x',
    'median_y',
    'median_z',
    'index_min_x',
    'index_min_y',
    'index_min_z',
    'index_max_x',
    'index_max_y',
    'index_max_z',
    'skew_x',
    'skew_y',
    'skew_z',
    'kurtosis_x',
    'kurtosis_y',
    'kurtosis_z',
    'variance_x',
    'variance_y',
    'variance_z',
    'correlation_x_y',
    'correlation_x_z',
    'correlation_y_z',
]

In [368]:
def prepare_data_features(data):
    res = np.array([])
    res = np.concatenate((res, data.min(axis=0).values), axis=0)
    res = np.concatenate((res, data.max(axis=0).values), axis=0)
    res = np.concatenate((res, data.mean(axis=0).values), axis=0)
    res = np.concatenate((res, data.std(axis=0).values), axis=0)
    res = np.concatenate((res, data.median(axis=0).values), axis=0)
    res = np.concatenate((res, data.idxmin(axis=0).values), axis=0)
    res = np.concatenate((res, data.idxmax(axis=0).values), axis=0)
    res = np.concatenate((res, data.skew(axis=0).values), axis=0)
    res = np.concatenate((res, data.kurt(axis=0).values), axis=0)
    res = np.concatenate((res, data.var(axis=0).values), axis=0)
    corr = data.corr()
    corr['accelerometer_X']['accelerometer_Y'], corr['accelerometer_X']['accelerometer_Z'], corr['accelerometer_Y']['accelerometer_Z']
    res = np.concatenate((res, [corr['accelerometer_X']['accelerometer_Y'], corr['accelerometer_X']['accelerometer_Z'], corr['accelerometer_Y']['accelerometer_Z']]), axis=0)
    return res

In [380]:
def prepare_data_features_part(data):
    res = np.array([])
    res = np.concatenate((res, data.min(axis=0).values), axis=0)
    res = np.concatenate((res, data.max(axis=0).values), axis=0)
    res = np.concatenate((res, data.mean(axis=0).values), axis=0)
    res = np.concatenate((res, data.std(axis=0).values), axis=0)
    res = np.concatenate((res, data.median(axis=0).values), axis=0)
    return res

In [390]:
def read_data(path):
    activities = os.listdir(path)
    X = []
    y = []
    for i in range(len(activities)):
        dir_name = os.path.join(path, activities[i])
        X_p = []
        for file in os.listdir(dir_name):
            if file.split('.')[-1] == 'csv':
                data = pd.read_csv(os.path.join(dir_name, file))
                X_p.append(data.values[0]) 
        y_p =[i]*len(X_p)
        y = [*y, *y_p]
        X = [*X, *X_p]
    return np.array(X), np.array(y)

In [391]:
def read_data_features(path):
    activities = os.listdir(path)
    X = []
    y = []
    for i in range(len(activities)):
        dir_name = os.path.join(path, activities[i])
        X_p = []
        for file in os.listdir(dir_name):
            if file.split('.')[-1] == 'csv':
                data = pd.read_csv(os.path.join(dir_name, file))
                X_p.append(prepare_data_features(data)) 
        y_p =[i]*len(X_p)
        y = [*y, *y_p]
        X = [*X, *X_p]
    return np.array(X), np.array(y)

In [392]:
def read_data_features_part(path):
    activities = os.listdir(path)
    X = []
    y = []
    for i in range(len(activities)):
        dir_name = os.path.join(path, activities[i])
        X_p = []
        for file in os.listdir(dir_name):
            if file.split('.')[-1] == 'csv':
                data = pd.read_csv(os.path.join(dir_name, file))
                X_p.append(prepare_data_features_part(data)) 
        y_p =[i]*len(X_p)
        y = [*y, *y_p]
        X = [*X, *X_p]
    return np.array(X), np.array(y)

In [393]:
def save_data(prefix, X, y):
    with open(f'data_X_{prefix}.pickle', 'wb') as f:
        pickle.dump(X, f)
    with open(f'data_y_{prefix}.pickle', 'wb') as f:
        pickle.dump(y, f)

In [7]:
def load_data(prefix):
    with open(f'data_X_{prefix}.pickle', 'rb') as f:
        X = pickle.load(f)
    with open(f'data_y_{prefix}.pickle', 'rb') as f:
        y = pickle.load(f)
    return X, y

In [395]:
X, y = read_data('data')
save_data('simple', X, y)
X, y = read_data_features_part('data')
save_data('feature_half', X, y)
X, y = read_data_features('data')
save_data('feature', X, y)

In [8]:
X, y = load_data('simple')
X_test, X_train, y_test, y_train = train_test_split(X, y, train_size=0.2, random_state=42)
X, y = load_data('feature_half')
X_test_fh, X_train_fh, y_test_fh, y_train_fh = train_test_split(X, y, train_size=0.2, random_state=42)
X, y = load_data('feature')
X_test_f, X_train_f, y_test_f, y_train_f = train_test_split(X, y, train_size=0.2, random_state=42)

# SVM Model

In [9]:
def models(X_test, X_train, y_test, y_train):
    svm_ovo_rbf = SVC(decision_function_shape='ovo', kernel='rbf').fit(X_train, y_train)
    svm_ovo_rbf_gamma = SVC(decision_function_shape='ovo', kernel='rbf', gamma=0.005).fit(X_train, y_train)
    svm_ovo_linear = SVC(decision_function_shape='ovo', kernel='linear').fit(X_train, y_train)
    svm_ovr_rbf = SVC(decision_function_shape='ovr', kernel='rbf').fit(X_train, y_train)
    svm_ovr_rbf_gamma= SVC(decision_function_shape='ovr', kernel='rbf', gamma=0.005).fit(X_train, y_train)
    svm_ovr_linear = SVC(decision_function_shape='ovr', kernel='linear').fit(X_train, y_train)
    forest = RandomForestClassifier().fit(X_train, y_train)
    
    score_test_ovo_rbf = svm_ovo_rbf.score(X_test, y_test)
    score_test_ovo_rbf_gamma = svm_ovo_rbf_gamma.score(X_test, y_test)
    score_test_ovo_linear = svm_ovo_linear.score(X_test, y_test)
    score_test_ovr_rbf = svm_ovr_rbf.score(X_test, y_test)
    score_test_ovr_rbf_gamma = svm_ovr_rbf_gamma.score(X_test, y_test)
    score_test_ovr_linear = svm_ovr_linear.score(X_test, y_test)
    score_test_forest = forest.score(X_test, y_test)
    
    return [
        score_test_ovo_rbf, 
        score_test_ovo_rbf_gamma, 
        score_test_ovo_linear, 
        score_test_ovr_rbf, 
        score_test_ovr_rbf_gamma, 
        score_test_ovr_linear,
        score_test_forest
    ]

# Check score

In [30]:
result_scores = [
    models(X_test, X_train, y_test, y_train),
    models(X_test_fh, X_train_fh, y_test_fh, y_train_fh),
    models(X_test_f[:,21:], X_train_f[:, 21:], y_test_f, y_train_f),
    models(X_test_f, X_train_f, y_test_f, y_train_f)
]

In [31]:
print('Rows (features):')
print('    1 - accelerometer_X, accelerometer_Y, accelerometer_Z')
print('    2 - ', ', '.join(features[:15]))
print('    3 - ', ', '.join(features[21:]))
print('    4 - ', ', '.join(features))
print()
print('       ovo rbf     ', 
      'ovo rbf gamma(0.005)', 
      '    ovo linear     ', 
      '     ovr rbf       ', 
      'ovr rbf gamma(0.005)', 
      '    ovr linear     ',
      '   random forest   ')
print()
for el in result_scores:
    print(el)
    print()

Rows (features):
    1 - accelerometer_X, accelerometer_Y, accelerometer_Z
    2 -  min_x, min_y, min_z, max_x, max_y, max_z, mean_x, mean_y, mean_z, std_x, std_y, std_z, median_x, median_y, median_z
    3 -  skew_x, skew_y, skew_z, kurtosis_x, kurtosis_y, kurtosis_z, variance_x, variance_y, variance_z, correlation_x_y, correlation_x_z, correlation_y_z
    4 -  min_x, min_y, min_z, max_x, max_y, max_z, mean_x, mean_y, mean_z, std_x, std_y, std_z, median_x, median_y, median_z, index_min_x, index_min_y, index_min_z, index_max_x, index_max_y, index_max_z, skew_x, skew_y, skew_z, kurtosis_x, kurtosis_y, kurtosis_z, variance_x, variance_y, variance_z, correlation_x_y, correlation_x_z, correlation_y_z

       ovo rbf      ovo rbf gamma(0.005)     ovo linear           ovr rbf        ovr rbf gamma(0.005)     ovr linear         random forest   

[0.8746130030959752, 0.8761609907120743, 0.8134674922600619, 0.8746130030959752, 0.8761609907120743, 0.8134674922600619, 0.8839009287925697]

[0.988390