In [13]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from scipy.stats import skew, kurtosis



In [4]:
folder_path = '/Users/ALONA/Desktop/Go_IT/HW/Data Science/Data_Science/HW_5/data'
dfs = []

for root, dirs, files in os.walk(folder_path):
    for file in files:
        if file.endswith('.csv'):
            file_path = os.path.join(root, file)
            df = pd.read_csv(file_path)
            folder_name = os.path.basename(os.path.dirname(file_path))
            df['activity'] = folder_name
            dfs.append(df)

combined_df = pd.concat(dfs, ignore_index=True)

combined_df

Unnamed: 0,accelerometer_X,accelerometer_Y,accelerometer_Z,activity
0,1.757344,-5.875371,-3.878607,running
1,2.461239,12.986150,1.762132,running
2,22.682667,22.922087,-1.738190,running
3,11.875240,4.611233,-8.389283,running
4,-5.295974,4.783615,-1.273715,running
...,...,...,...,...
193855,-4.137180,-4.429273,-1.517924,stairs
193856,1.829170,-9.313444,-0.330400,stairs
193857,-7.350199,-12.670115,-1.460463,stairs
193858,3.969586,-6.320692,9.255983,stairs


In [16]:
def calculate_basic_time_domain_features(df):
    features = {}
    features['mean_X'] = df['accelerometer_X'].mean()
    features['mean_Y'] = df['accelerometer_Y'].mean()
    features['mean_Z'] = df['accelerometer_Z'].mean()
    features['std_X'] = df['accelerometer_X'].std()
    features['std_Y'] = df['accelerometer_Y'].std()
    features['std_Z'] = df['accelerometer_Z'].std()
    features['max_X'] = df['accelerometer_X'].max()
    features['max_Y'] = df['accelerometer_Y'].max()
    features['max_Z'] = df['accelerometer_Z'].max()
    features['min_X'] = df['accelerometer_X'].min()
    features['min_Y'] = df['accelerometer_Y'].min()
    features['min_Z'] = df['accelerometer_Z'].min()
    return pd.Series(features)

In [18]:
def calculate_extended_time_domain_features(df):
    features = calculate_basic_time_domain_features(df).to_dict()
    features['median_X'] = df['accelerometer_X'].median()
    features['median_Y'] = df['accelerometer_Y'].median()
    features['median_Z'] = df['accelerometer_Z'].median()
    features['skew_X'] = skew(df['accelerometer_X'])
    features['skew_Y'] = skew(df['accelerometer_Y'])
    features['skew_Z'] = skew(df['accelerometer_Z'])
    features['kurt_X'] = kurtosis(df['accelerometer_X'])
    features['kurt_Y'] = kurtosis(df['accelerometer_Y'])
    features['kurt_Z'] = kurtosis(df['accelerometer_Z'])
    return pd.Series(features)

In [19]:
# Визначення розміру вікна та кроку
window_size = 100  # Розмір вікна
step_size = 50    # Крок вікна

segments_basic = []
segments_extended = []
labels = []

# Сегментація за допомогою ковзного вікна
for start in range(0, len(combined_df) - window_size, step_size):
    end = start + window_size
    segment = combined_df.iloc[start:end]
    if len(segment) == window_size:
        segments_basic.append(calculate_basic_time_domain_features(segment))
        segments_extended.append(calculate_extended_time_domain_features(segment))
        labels.append(segment['activity'].mode()[0])  # Найбільш частий клас у вікні


In [20]:
X_basic = pd.DataFrame(segments_basic)
X_extended = pd.DataFrame(segments_extended)
y = pd.Series(labels)

# Розділення на тренувальні та тестові дані
X_train_basic, X_test_basic, y_train, y_test = train_test_split(X_basic, y, test_size=0.3, random_state=42)
X_train_extended, X_test_extended, _, _ = train_test_split(X_extended, y, test_size=0.3, random_state=42)

# Стандартизація ознак
scaler_basic = StandardScaler()
X_train_basic = scaler_basic.fit_transform(X_train_basic)
X_test_basic = scaler_basic.transform(X_test_basic)

scaler_extended = StandardScaler()
X_train_extended = scaler_extended.fit_transform(X_train_extended)
X_test_extended = scaler_extended.transform(X_test_extended)

# Моделі SVM та випадкового лісу для базових фіч
svm_model_basic = SVC(kernel='linear')
svm_model_basic.fit(X_train_basic, y_train)
y_pred_svm_basic = svm_model_basic.predict(X_test_basic)

rf_model_basic = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model_basic.fit(X_train_basic, y_train)
y_pred_rf_basic = rf_model_basic.predict(X_test_basic)

# Моделі SVM та випадкового лісу для розширених фіч
svm_model_extended = SVC(kernel='linear')
svm_model_extended.fit(X_train_extended, y_train)
y_pred_svm_extended = svm_model_extended.predict(X_test_extended)

rf_model_extended = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model_extended.fit(X_train_extended, y_train)
y_pred_rf_extended = rf_model_extended.predict(X_test_extended)

# Оцінка моделей
print("SVM with Basic Features - Classification Report")
print(classification_report(y_test, y_pred_svm_basic))

print("Random Forest with Basic Features - Classification Report")
print(classification_report(y_test, y_pred_rf_basic))

print("SVM with Extended Features - Classification Report")
print(classification_report(y_test, y_pred_svm_extended))

print("Random Forest with Extended Features - Classification Report")
print(classification_report(y_test, y_pred_rf_extended))

SVM with Basic Features - Classification Report
              precision    recall  f1-score   support

        idle       1.00      0.99      1.00       180
     running       1.00      1.00      1.00       628
      stairs       1.00      1.00      1.00        28
     walking       1.00      1.00      1.00       327

    accuracy                           1.00      1163
   macro avg       1.00      1.00      1.00      1163
weighted avg       1.00      1.00      1.00      1163

Random Forest with Basic Features - Classification Report
              precision    recall  f1-score   support

        idle       1.00      0.99      1.00       180
     running       1.00      1.00      1.00       628
      stairs       1.00      1.00      1.00        28
     walking       1.00      1.00      1.00       327

    accuracy                           1.00      1163
   macro avg       1.00      1.00      1.00      1163
weighted avg       1.00      1.00      1.00      1163

SVM with Extended Featur