In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

Reading and Merging Data from Multiple Sources

In [2]:
dataset_path = Path('homework/data')
csv_files = dataset_path.glob('**/*.csv')

dataframes = []

for file_path in csv_files:
    activity = file_path.parent.name
    df = pd.read_csv(file_path)
    df['activity'] = activity
    dataframes.append(df)
combined_df = pd.concat(dataframes, ignore_index=True)

combined_df

Unnamed: 0,accelerometer_X,accelerometer_Y,accelerometer_Z,activity
0,4.721366,32.326510,12.157756,running
1,1.302446,19.325996,36.913803,running
2,5.128380,-3.936067,-2.073379,running
3,-6.799533,6.502652,1.091756,running
4,9.610326,1.053449,-0.225055,running
...,...,...,...,...
193945,-0.593762,9.988609,4.654328,walking
193946,-1.125275,-13.326127,-0.177171,walking
193947,-21.571756,-26.168625,-20.369867,walking
193948,-2.470816,-11.061212,-0.790087,walking


Normilize data

In [4]:
columns_to_normalize = ['accelerometer_X', 'accelerometer_Y', 'accelerometer_Z']

scaler = MinMaxScaler()
combined_df[columns_to_normalize] = scaler.fit_transform(combined_df[columns_to_normalize])

combined_df

Unnamed: 0,accelerometer_X,accelerometer_Y,accelerometer_Z,activity
0,0.560239,0.912451,0.655120,running
1,0.516618,0.746579,0.970980,running
2,0.565433,0.449780,0.473546,running
3,0.413245,0.582967,0.513930,running
4,0.622617,0.513441,0.497129,running
...,...,...,...,...
193945,0.492424,0.627444,0.559384,walking
193946,0.485643,0.329973,0.497739,walking
193947,0.224768,0.166117,0.240103,walking
193948,0.468475,0.358871,0.489919,walking


Data separation

In [5]:
## Defining the features and target variable
X = combined_df[['accelerometer_X', 'accelerometer_Y', 'accelerometer_Z']]  # Ознаки
y = combined_df['activity']

## Separation into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

time domain features

In [6]:
X_train['time_mean'] = X_train.mean(axis=1)
X_test['time_mean'] = X_test.mean(axis=1)

In [7]:
# Creating SVM model
svm_model = SVC()
svm_model.fit(X_train, y_train)

In [8]:
# Creating RF(random forest) model
rf_model = RandomForestClassifier(n_jobs=-1)
rf_model.fit(X_train, y_train)

Comparing the accuracy of models

In [9]:
score_svm = svm_model.score(X_test, y_test)
score_rf = rf_model.score(X_test, y_test)

print(f"Accuracy of the SVM model:", score_svm)
print(f"Accuracy of the RF model:", score_rf)

Accuracy of the SVM model: 0.8924465068316576
Accuracy of the RF model: 0.9992953510354903


In [10]:
# Model Evaluation

## SVM predictions
svm_predictions = svm_model.predict(X_test)

## RF predicitons
rf_predictions = rf_model.predict(X_test)

print("SVM Classification Report:")
print(classification_report(y_test, svm_predictions))

print("Random Forest Classification Report:")
print(classification_report(y_test, rf_predictions))

SVM Classification Report:


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                    precision    recall  f1-score   support

.ipynb_checkpoints       0.00      0.00      0.00        27
              idle       0.96      0.99      0.97      9297
           running       0.93      0.90      0.92     30574
            stairs       1.00      0.00      0.00      1470
           walking       0.80      0.91      0.85     16817

          accuracy                           0.89     58185
         macro avg       0.74      0.56      0.55     58185
      weighted avg       0.90      0.89      0.88     58185

Random Forest Classification Report:


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                    precision    recall  f1-score   support

.ipynb_checkpoints       0.00      0.00      0.00        27
              idle       1.00      1.00      1.00      9297
           running       1.00      1.00      1.00     30574
            stairs       1.00      0.99      1.00      1470
           walking       1.00      1.00      1.00     16817

          accuracy                           1.00     58185
         macro avg       0.80      0.80      0.80     58185
      weighted avg       1.00      1.00      1.00     58185



  _warn_prf(average, modifier, msg_start, len(result))
