In [1]:
import os
import pandas as pd
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

#### Dataset creation

In [2]:
hw5_data = 'data'

In [3]:
all_data = pd.DataFrame()

In [4]:
for activity_folder in os.listdir(hw5_data):
    activity_path = os.path.join(hw5_data, activity_folder)
    if not os.path.isdir(activity_path):
        continue
    
    for file_name in os.listdir(activity_path):
        file_path = os.path.join(activity_path, file_name)
        if not file_name.endswith('.csv'):
            continue
        
        df = pd.read_csv(file_path)
        
        df['activity'] = activity_folder
        
        all_data = pd.concat([all_data, df], ignore_index=True)

In [5]:
all_data = all_data.sample(frac=1).reset_index(drop=True)
all_data.head()

Unnamed: 0,accelerometer_X,accelerometer_Y,accelerometer_Z,activity
0,3.77805,-17.06587,-22.409727,walking
1,5.774815,-2.930503,-0.296881,running
2,-2.485181,-2.049437,2.140417,walking
3,1.35033,-6.861782,-2.42772,running
4,-0.258574,-6.019023,-0.842759,walking


In [6]:
all_data.shape

(193860, 4)

In [7]:
X = all_data.drop('activity', axis=1)
X.head()

Unnamed: 0,accelerometer_X,accelerometer_Y,accelerometer_Z
0,3.77805,-17.06587,-22.409727
1,5.774815,-2.930503,-0.296881
2,-2.485181,-2.049437,2.140417
3,1.35033,-6.861782,-2.42772
4,-0.258574,-6.019023,-0.842759


In [8]:
y = all_data['activity']
y.head()

0    walking
1    running
2    walking
3    running
4    walking
Name: activity, dtype: object

#### Create test data

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#### Time features

In [10]:
X_train['time_mean'] = X_train.mean(axis=1)
X_test['time_mean'] = X_test.mean(axis=1)

#### Classifications

In [11]:
svm_model = SVC()
rfc_model = RandomForestClassifier()

#### SVM training

In [12]:
%%time
svm_model.fit(X_train, y_train)

CPU times: user 8min 1s, sys: 2.9 s, total: 8min 4s
Wall time: 8min 8s


SVC()

#### RFC training

In [13]:
%%time
rfc_model.fit(X_train, y_train)

CPU times: user 17.6 s, sys: 111 ms, total: 17.7 s
Wall time: 17.9 s


RandomForestClassifier()

#### Prediction on test data

In [14]:
%%time
svm_prediction = svm_model.predict(X_test)

CPU times: user 3min 7s, sys: 737 ms, total: 3min 7s
Wall time: 3min 8s


In [15]:
%%time
rfc_prediction = rfc_model.predict(X_test)

CPU times: user 533 ms, sys: 2.91 ms, total: 536 ms
Wall time: 535 ms


#### Evaluating the results

In [16]:
svm_accuracy = accuracy_score(y_test, svm_prediction)
rfc_accuracy = accuracy_score(y_test, rfc_prediction)

print("Accuracy (SVM):", svm_accuracy)
print("Accuracy (Random Forest):", rfc_accuracy)

Accuracy (SVM): 0.8931187454864336
Accuracy (Random Forest): 0.9997936655318271


In [17]:
svm_report = classification_report(y_test, svm_prediction)
print("SVM Results:")
print(svm_report)

rfc_report = classification_report(y_test, rfc_prediction)
print("Random Forest Results:")
print(rfc_report)

SVM Results:
              precision    recall  f1-score   support

        idle       0.96      0.99      0.97      6187
     running       0.93      0.90      0.92     20609
      stairs       1.00      0.00      0.00       985
     walking       0.80      0.90      0.85     10991

    accuracy                           0.89     38772
   macro avg       0.92      0.70      0.68     38772
weighted avg       0.90      0.89      0.88     38772

Random Forest Results:
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00      6187
     running       1.00      1.00      1.00     20609
      stairs       1.00      1.00      1.00       985
     walking       1.00      1.00      1.00     10991

    accuracy                           1.00     38772
   macro avg       1.00      1.00      1.00     38772
weighted avg       1.00      1.00      1.00     38772



#### Conclusion

Based on the calculation timings: 
- SVC model training took 8 minutes 8 seconds
- Random Forest training time was 17.9 seconds. 

Predicting the test data took:
- for the SVC model - 3 minutes 8 seconds
- for the Random Forest - less than a second. 

Also, it should be noted that according to the metrics, Random Forest gives more accurate forecasting results.