In [1]:
path = "D:\moduls\hw5\homework\data"

In [3]:
from os import listdir
from os.path import isfile, join
import numpy as np
import pandas as pd 

activities = listdir(path)

In [9]:
activities

['idle', 'running', 'stairs', 'walking']

In [12]:
for act in activities:
    mypath = join(path, act)
    frame = listdir(mypath)
    print(act, ": ", len(frame))

idle :  1039
running :  3408
stairs :  165
walking :  1850


In [15]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
from scipy.fftpack import fft, rfft
from sklearn import preprocessing

In [27]:
features = [
    "skew_X",
    "skew_Y",
    "skew_Z",
    "kurtosis_X",
    "kurtosis_Y",
    "kurtosis_Z",
    "max_X",
    "max_Y",
    "max_Z",
    "mean_X",
    "mean_Y",
    "mean_Z",
    "std_X",
    "std_Y",
    "std_Z",
    "variance_X",
    "variance_Y",
    "variance_Z",
    "median_X",
    "median_Y",
    "median_Z",
    "index_max_X",
    "index_max_Y",
    "index_max_Z",
    "index_min_X",
    "index_min_Y",
    "index_min_Z",
    "correlation_X_Y",
    "correlation_X_Z",
    "correlation_Y_Z",
    "mae_X",
    "mae_Y",
    "mae_Z",
    "rmse_X",
    "rmse_Y",
    "rmse_Z"
]

In [31]:
# Шлях до папки з файлами


def create_fetures(class_name, class_number):
    
    folder_path = join(path, class_name)
    
    features_df = pd.DataFrame(columns=features)

    # Отримання списку файлів у папці
    file_list = listdir(folder_path)

    # Проходження по кожному файлу та обчислення ознак
    for file_name in file_list:

        file_path = join(folder_path, file_name)
        data = pd.read_csv(file_path)

        # Обчислення ознак
        features_values = []

        # Обчислення skewness
        skew_x = data['accelerometer_X'].skew()
        skew_y = data['accelerometer_Y'].skew()
        skew_z = data['accelerometer_Z'].skew()
        features_values.extend([skew_x, skew_y, skew_z])

            # Обчислення kurtosis
        kurtosis_x = data['accelerometer_X'].kurtosis()
        kurtosis_y = data['accelerometer_Y'].kurtosis()
        kurtosis_z = data['accelerometer_Z'].kurtosis()
        features_values.extend([kurtosis_x, kurtosis_y, kurtosis_z])

            # Обчислення максимального значення
        max_x = data['accelerometer_X'].max()
        max_y = data['accelerometer_Y'].max()
        max_z = data['accelerometer_Z'].max()
        features_values.extend([max_x, max_y, max_z])

            # Обчислення середнього значення
        mean_x = data['accelerometer_X'].mean()
        mean_y = data['accelerometer_Y'].mean()
        mean_z = data['accelerometer_Z'].mean()
        features_values.extend([mean_x, mean_y, mean_z])

            # Обчислення стандартного відхилення
        std_x = data['accelerometer_X'].std()
        std_y = data['accelerometer_Y'].std()
        std_z = data['accelerometer_Z'].std()
        features_values.extend([std_x, std_y, std_z])

            # Обчислення дисперсії
        variance_x = data['accelerometer_X'].var()
        variance_y = data['accelerometer_Y'].var()
        variance_z = data['accelerometer_Z'].var()
        features_values.extend([variance_x, variance_y, variance_z])

            # Обчислення медіани
        median_x = data['accelerometer_X'].median()
        median_y = data['accelerometer_Y'].median()
        median_z = data['accelerometer_Z'].median()
        features_values.extend([median_x, median_y, median_z])

            # Знаходження індексу максимального значення
        index_max_x = data['accelerometer_X'].idxmax()
        index_max_y = data['accelerometer_Y'].idxmax()
        index_max_z = data['accelerometer_Z'].idxmax()
        features_values.extend([index_max_x, index_max_y, index_max_z])

            # Знаходження індексу мінімального значення
        index_min_x = data['accelerometer_X'].idxmin()
        index_min_y = data['accelerometer_Y'].idxmin()
        index_min_z = data['accelerometer_Z'].idxmin()
        features_values.extend([index_min_x, index_min_y, index_min_z])

            # Обчислення кореляції між вимірами
        correlation_x_y = data['accelerometer_X'].corr(data['accelerometer_Y'])
        correlation_x_z = data['accelerometer_X'].corr(data['accelerometer_Z'])
        correlation_y_z = data['accelerometer_Y'].corr(data['accelerometer_Z'])
        features_values.extend([correlation_x_y, correlation_x_z, correlation_y_z])

            # Обчислення MAE
        mae_x = np.mean(np.abs(data['accelerometer_X'] - data['accelerometer_X'].mean()))
        mae_y = np.mean(np.abs(data['accelerometer_Y'] - data['accelerometer_Y'].mean()))
        mae_z = np.mean(np.abs(data['accelerometer_Z'] - data['accelerometer_Z'].mean()))
        features_values.extend([mae_x, mae_y, mae_z])

            # Обчислення RMSE
        rmse_x = np.sqrt(np.mean(np.square(data['accelerometer_X'] - data['accelerometer_X'].mean())))
        rmse_y = np.sqrt(np.mean(np.square(data['accelerometer_Y'] - data['accelerometer_Y'].mean())))
        rmse_z = np.sqrt(np.mean(np.square(data['accelerometer_Z'] - data['accelerometer_Z'].mean())))
        features_values.extend([rmse_x, rmse_y, rmse_z])

            # Додавання значень ознак до DataFrame
        features_df.loc[len(features_df)] = features_values
        X = features_df
        y = [class_number] * len(X)
        
        
    return X, y


In [38]:
def preapare_class():
    X_idle, y_idle = create_fetures("idle", 0)
    X_running, y_running = create_fetures("running", 1)
    X_stairs, y_stairs = create_fetures("stairs", 2)
    X_walking, y_walking = create_fetures("walking", 3)
    
    X = np.concatenate((X_idle, X_running), axis=0)
    X = np.concatenate((X, X_stairs), axis=0)
    X = np.concatenate((X, X_walking), axis=0)
    
    Y = np.concatenate((y_idle, y_running))
    Y = np.concatenate((Y, y_stairs))
    Y = np.concatenate((Y, y_walking))
    
    return X, Y
    


In [39]:
X, Y = preapare_class()

In [43]:
import pickle
with open("data_X", "wb") as f:
    pickle.dump(X, f)
    
with open("data_Y", "wb") as f:
    pickle.dump(Y, f)

In [67]:
from sklearn import model_selection
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
X_train, X_test, y_train, y_test = model_selection.train_test_split(X,Y, test_size=0.3)

In [72]:
from sklearn import svm
clf = svm.SVC(decision_function_shape="ovo", kernel="rbf", gamma=0.01, probability=True)
clf.fit(X_train, y_train)

In [73]:
prediction_csv_ovo = clf.predict(X_test)
acuracy = accuracy_score(y_test, prediction_csv_ovo)
acuracy

0.887055183084064

In [77]:
clf_ovr = svm.SVC(kernel="linear", probability=True)
clf_ovr.fit(X_train, y_train)

In [78]:
prediction_csv_ovr = clf_ovr.predict(X_test)
acuracy = accuracy_score(y_test, prediction_csv_ovr)
acuracy

0.9938112429087158

In [80]:
score = cross_val_score(clf_ovr, X_train, y_train, cv=10)
score.mean()

0.994694660962316

In [84]:
from sklearn.ensemble import RandomForestClassifier
clf_forest = RandomForestClassifier(n_estimators=15, max_depth=None)

In [85]:
clf_forest.fit(X_train, y_train)

In [87]:
scores = cross_val_score(clf_forest, X_train, y_train, cv=10)
scores.mean()

0.9969046084119635

In [88]:
y_pred_forest= clf_forest.predict(X_test)
acuracy_forest = accuracy_score(y_test, y_pred_forest)
acuracy_forest

0.9963898916967509