<a href="https://colab.research.google.com/github/OleksiiLozovyi/goit-ds-hw-08/blob/main/Hw4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [21]:
import pandas as pd
import numpy as np
import os
from google.colab import files
import zipfile
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from scipy.stats import skew, kurtosis
from scipy.signal import butter, filtfilt
from sklearn.impute import SimpleImputer

uploaded = files.upload()
zip_file = list(uploaded.keys())[0]
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
    zip_ref.extractall('/content/data')

def load_data_from_folders(base_path):
    data_frames = []
    for root, dirs, files in os.walk(base_path):
        for file in files:
            if file.endswith('.csv'):
                file_path = os.path.join(root, file)
                activity = os.path.basename(root)
                temp_df = pd.read_csv(file_path)
                temp_df['activity'] = activity
                data_frames.append(temp_df)
    return pd.concat(data_frames, ignore_index=True)

def butter_filter(data, cutoff, fs, btype='low', order=5):
    nyquist = 0.5 * fs
    normal_cutoff = cutoff / nyquist
    b, a = butter(order, normal_cutoff, btype=btype, analog=False)
    return filtfilt(b, a, data)

def calculate_time_features(row):
    features = {}
    for col, value in row.items():
        if col != 'activity' and not pd.isnull(value):
            features[f'{col}_mean'] = value
            features[f'{col}_std'] = 0
            features[f'{col}_max'] = value
            features[f'{col}_min'] = value
            features[f'{col}_skew'] = 0
            features[f'{col}_kurtosis'] = 0
            features[f'{col}_energy'] = value**2
            features[f'{col}_signal_magnitude_area'] = abs(value)
    return pd.Series(features)


base_path = '/content/data'
raw_data = load_data_from_folders(base_path)

for axis in ['accelerometer_X', 'accelerometer_Y', 'accelerometer_Z']:
    if axis in raw_data.columns:
        raw_data[axis] = butter_filter(raw_data[axis], cutoff=1, fs=50, btype='low')

time_features = raw_data.apply(calculate_time_features, axis=1)
time_features['activity'] = raw_data['activity']

imputer = SimpleImputer(strategy='mean')
X = time_features.drop('activity', axis=1)
X = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)
y = time_features['activity']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

svm = SVC(kernel='rbf')
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

print("Classification Report for SVM:")
print(classification_report(y_test, y_pred_svm))

print("Classification Report for Random Forest:")
print(classification_report(y_test, y_pred_rf))

Saving data.zip to data (3).zip
Classification Report for SVM:
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00      9410
     running       1.00      1.00      1.00     30640
      stairs       0.87      0.28      0.43      1499
     walking       0.94      1.00      0.97     16609

    accuracy                           0.98     58158
   macro avg       0.95      0.82      0.85     58158
weighted avg       0.98      0.98      0.98     58158

Classification Report for Random Forest:
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00      9410
     running       1.00      1.00      1.00     30640
      stairs       0.82      0.59      0.69      1499
     walking       0.96      0.99      0.98     16609

    accuracy                           0.99     58158
   macro avg       0.95      0.89      0.92     58158
weighted avg       0.98      0.99      0.98     58158

