In [18]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, mean_squared_error
from scipy.stats import skew, kurtosis, iqr



In [4]:
folder_path = '/Users/ALONA/Desktop/Go_IT/HW/Data Science/Data_Science/HW_5/data'
dfs = []

for root, dirs, files in os.walk(folder_path):
    for file in files:
        if file.endswith('.csv'):
            file_path = os.path.join(root, file)
            df = pd.read_csv(file_path)
            folder_name = os.path.basename(os.path.dirname(file_path))
            df['activity'] = folder_name
            dfs.append(df)

combined_df = pd.concat(dfs, ignore_index=True)

combined_df

Unnamed: 0,accelerometer_X,accelerometer_Y,accelerometer_Z,activity
0,1.757344,-5.875371,-3.878607,running
1,2.461239,12.986150,1.762132,running
2,22.682667,22.922087,-1.738190,running
3,11.875240,4.611233,-8.389283,running
4,-5.295974,4.783615,-1.273715,running
...,...,...,...,...
193855,-4.137180,-4.429273,-1.517924,stairs
193856,1.829170,-9.313444,-0.330400,stairs
193857,-7.350199,-12.670115,-1.460463,stairs
193858,3.969586,-6.320692,9.255983,stairs


In [5]:
features = ['accelerometer_X', 'accelerometer_Y', 'accelerometer_Z']
scaler = StandardScaler()
data_to_normalize = combined_df[features]
# Нормалізація даних
normalized_data = scaler.fit_transform(data_to_normalize)
# Заміна вихідних даних нормалізованими даними
ndata = combined_df.copy()
ndata[features] = normalized_data
ndata


Unnamed: 0,accelerometer_X,accelerometer_Y,accelerometer_Z,activity
0,-0.019775,-0.599143,-0.790301,running
1,0.063974,0.912923,-0.005946,running
2,2.469899,1.709454,-0.492672,running
3,1.184042,0.241533,-1.417518,running
4,-0.858972,0.255353,-0.428086,running
...,...,...,...,...
193855,-0.721099,-0.483214,-0.462044,stairs
193856,-0.011229,-0.874762,-0.296916,stairs
193857,-1.103381,-1.143855,-0.454054,stairs
193858,0.243435,-0.634843,1.036086,stairs


In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    ndata[features], ndata["activity"],
    test_size=0.3,
    stratify=ndata["activity"],
)



In [9]:
model_svm = SVC()
model_svm.fit(X_train, y_train)
y_pred_svm = model_svm.predict(X_test)
print("SVM  - Classification Report")
print(classification_report(y_test, y_pred_svm))

SVM with Basic Features - Classification Report
              precision    recall  f1-score   support

        idle       0.96      0.98      0.97      9351
     running       0.93      0.91      0.92     30672
      stairs       1.00      0.01      0.01      1485
     walking       0.80      0.90      0.85     16650

    accuracy                           0.89     58158
   macro avg       0.92      0.70      0.69     58158
weighted avg       0.90      0.89      0.88     58158



In [11]:
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
print("Random Forest - Classification Report")
print(classification_report(y_test, y_pred_rf))

Random Forest - Classification Report
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00      9351
     running       1.00      1.00      1.00     30672
      stairs       1.00      0.99      1.00      1485
     walking       1.00      1.00      1.00     16650

    accuracy                           1.00     58158
   macro avg       1.00      1.00      1.00     58158
weighted avg       1.00      1.00      1.00     58158



In [12]:
svm_accuracy = accuracy_score(y_test, y_pred_svm)
rf_accuracy = accuracy_score(y_test, y_pred_rf)

print("Model accuracy SVM:", svm_accuracy)
print("Model accuracy Random Forest:", rf_accuracy)

Model accuracy SVM: 0.8929468000962895
Model accuracy Random Forest: 0.9996561092197118


In [21]:
def entropy(signal):
    """Розрахунок ентропії сигналу."""
    probability_distribution, _ = np.histogram(signal, bins=10, density=True)
    probability_distribution = probability_distribution[probability_distribution > 0]
    return -np.sum(probability_distribution * np.log2(probability_distribution))

def mean_absolute_deviation(signal):
    """Розрахунок середнього абсолютного відхилення сигналу."""
    return np.mean(np.abs(signal - np.mean(signal)))

def calculate_features(segment):
    features = {}
    
    # Характеристики для осі X
    features['max_x'] = segment['accelerometer_X'].max()
    features['min_x'] = segment['accelerometer_X'].min()
    features['entropy_x'] = entropy(segment['accelerometer_X'])
    features['iqr_x'] = iqr(segment['accelerometer_X'])
    
    # Характеристики для осі Y
    features['max_y'] = segment['accelerometer_Y'].max()
    features['index_min_y'] = segment['accelerometer_Y'].idxmin()
    features['mean_abs_dev_y'] = mean_absolute_deviation(segment['accelerometer_Y'])
    features['median_y'] = segment['accelerometer_Y'].median()
    features['skew_y'] = skew(segment['accelerometer_Y'])
    features['std_y'] = segment['accelerometer_Y'].std()
    features['rmse_y'] = np.sqrt(mean_squared_error(segment['accelerometer_Y'], np.zeros(len(segment['accelerometer_Y']))))
    
    # Характеристики для осі Z
    features['skew_z'] = skew(segment['accelerometer_Z'])
    
    return pd.Series(features)

In [23]:
# Визначення розміру вікна та кроку
window_size = 100  # Розмір вікна
step_size = 50    # Крок вікна

segments = []
labels = []

for start in range(0, len(ndata) - window_size, step_size):
    end = start + window_size
    segment = ndata.iloc[start:end]
    if len(segment) == window_size:
        segments.append(calculate_features(segment))
        labels.append(segment['activity'].mode()[0])

# Перетворення списків на DataFrame
X = pd.DataFrame(segments)
y = pd.Series(labels)


In [26]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Стандартизація ознак
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [27]:
# Модель SVM
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)

# Модель випадкового лісу
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

# Оцінка моделей
print("SVM Classification Report")
print(classification_report(y_test, y_pred_svm))

print("Random Forest Classification Report")
print(classification_report(y_test, y_pred_rf))

SVM Classification Report
              precision    recall  f1-score   support

        idle       1.00      0.99      1.00       180
     running       1.00      1.00      1.00       628
      stairs       1.00      1.00      1.00        28
     walking       1.00      1.00      1.00       327

    accuracy                           1.00      1163
   macro avg       1.00      1.00      1.00      1163
weighted avg       1.00      1.00      1.00      1163

Random Forest Classification Report
              precision    recall  f1-score   support

        idle       1.00      0.99      1.00       180
     running       1.00      1.00      1.00       628
      stairs       1.00      1.00      1.00        28
     walking       1.00      1.00      1.00       327

    accuracy                           1.00      1163
   macro avg       1.00      1.00      1.00      1163
weighted avg       1.00      1.00      1.00      1163

