In [27]:
import os
import pandas as pd
import numpy as np
from scipy.stats import entropy, iqr, skew
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

In [13]:
from google.colab import drive

# Монтируем Google Диск
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [17]:
# Функция для вычисления всех необходимых статистических показателей
def calculate_metrics(df, filename):
    metrics = {}

    short_filename = filename.split('-')[0]
    metrics['activity'] = short_filename

    # Преобразование всех столбцов в числовой формат
    df = df.apply(pd.to_numeric, errors='coerce')

    # Для X
    metrics['max_x'] = df['accelerometer_X'].max()
    metrics['min_x'] = df['accelerometer_X'].min()
    metrics['entropy_x'] = entropy(df['accelerometer_X'].value_counts(normalize=True))
    metrics['iqr_x'] = iqr(df['accelerometer_X'])

    # Для Y
    metrics['max_y'] = df['accelerometer_Y'].max()
    metrics['min_y'] = df['accelerometer_Y'].min()
    metrics['index_min_y'] = df['accelerometer_Y'].idxmin()
    metrics['mad_y'] = df['accelerometer_Y'].apply(lambda x: np.mean(np.abs(x - np.mean(df['accelerometer_Y']))))
    metrics['median_y'] = df['accelerometer_Y'].median()
    metrics['skewness_y'] = skew(df['accelerometer_Y'])
    metrics['std_y'] = df['accelerometer_Y'].std()
    metrics['rmse_y'] = np.sqrt(np.mean(df['accelerometer_Y']**2))

    # Для Z
    metrics['skewness_z'] = skew(df['accelerometer_Z'])

    return metrics

all_metrics = []


root_dir = '/content/drive/My Drive/data'

for subdir, _, files in os.walk(root_dir):
    for file in files:
        if file.endswith('.csv'):
            file_path = os.path.join(subdir, file)
            df = pd.read_csv(file_path)
            metrics = calculate_metrics(df, file)
            all_metrics.append(metrics)


result_df = pd.DataFrame(all_metrics)


result_df

Unnamed: 0,activity,max_x,min_x,entropy_x,iqr_x,max_y,min_y,index_min_y,mad_y,median_y,skewness_y,std_y,rmse_y,skewness_z
0,walking,4.486734,-13.225570,3.401197,3.373430,4.951209,-26.307487,27,0 8.212910 1 3.199452 2 2.00554...,-9.880871,-0.208826,6.158232,10.880074,-0.233282
1,walking,3.117251,-8.858546,3.401197,2.874239,-1.173159,-15.538369,23,0 2.651499 1 2.721090 2 1.495259 3...,-8.712500,0.472556,3.693687,9.717184,-0.625911
2,walking,8.475474,-11.669339,3.401197,6.896498,-0.316035,-27.911604,26,0 0.294008 1 9.253111 2 5.71448...,-11.207258,-0.367831,6.810540,13.509717,1.742867
3,walking,2.327164,-8.303091,3.401197,3.375824,-2.284068,-17.597382,29,0 1.704352 1 2.652456 2 3.696967 3...,-9.198522,0.365660,3.587683,10.326881,-1.458849
4,walking,7.527370,-9.504981,3.401197,3.732560,3.423709,-23.376986,26,0 2.182555 1 9.891884 2 0.50852...,-9.531318,0.062554,6.662437,12.094256,-0.053557
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6467,running,34.620155,-5.348646,3.401197,11.247959,27.193342,-12.765883,20,0 9.037472 1 2.272736 2 1.20013...,8.647857,-0.303988,12.310168,14.736389,0.749560
6468,running,26.647465,-4.007893,3.401197,6.538564,27.461494,-11.420342,6,0 8.742029 1 16.114573 2 2.08135...,8.578425,-0.150668,10.838412,13.870738,0.192030
6469,running,28.644230,-4.443638,3.401197,10.577583,28.926744,-11.990162,12,0 17.038255 1 7.698635 2 2.82882...,7.462727,0.042162,12.250552,14.601490,0.428806
6470,running,28.720844,-9.466674,3.401197,14.632164,37.047880,-19.756952,6,0 5.821900 1 5.410098 2 1.33676...,10.177751,-0.021111,16.108921,18.299163,-0.205158


In [28]:
# Преобразование категориального признака в числовой
result_df['activity'] = result_df['activity'].astype('category').cat.codes

# Убедитесь, что все признаки числовые
X = result_df.drop(columns=['activity'])
X = X.apply(pd.to_numeric, errors='coerce')
X = X.fillna(0)

y = result_df['activity']

# Разделение данных на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [29]:
# Обучение модели SVM
svm_model = SVC()
svm_model.fit(X_train, y_train)

In [30]:
# Обучение модели Random Forest
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)

In [31]:
# Предсказание на тестовой выборке
svm_predictions = svm_model.predict(X_test)
rf_predictions = rf_model.predict(X_test)

In [34]:
# Оценка моделей
svm_report = classification_report(y_test, svm_predictions, target_names=['idle', 'running', 'stairs', 'walking'], zero_division=0)
rf_report = classification_report(y_test, rf_predictions, target_names=['idle', 'running', 'stairs', 'walking'], zero_division=0)

print("SVM Classification Report:")
print(svm_report)

print("Random Forest Classification Report:")
print(rf_report)

SVM Classification Report:
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00       300
     running       1.00      1.00      1.00      1021
      stairs       0.00      0.00      0.00        46
     walking       0.93      1.00      0.96       575

    accuracy                           0.98      1942
   macro avg       0.73      0.75      0.74      1942
weighted avg       0.95      0.98      0.96      1942

Random Forest Classification Report:
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00       300
     running       1.00      1.00      1.00      1021
      stairs       1.00      0.91      0.95        46
     walking       0.99      1.00      1.00       575

    accuracy                           1.00      1942
   macro avg       1.00      0.98      0.99      1942
weighted avg       1.00      1.00      1.00      1942

