In [22]:
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')
import os
import time
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# **Dataset preparation**

In [23]:
path_to_data = '/content/drive/My Drive/data'
activities = os.listdir(path_to_data)
print(activities)

['running', 'walking', 'idle', 'stairs']


In [24]:
table = pd.DataFrame()

In [25]:
for folder in os.listdir(path_to_data):
    activity = os.path.join(path_to_data, folder)
    if not os.path.isdir(activity):
        continue

    for filename in os.listdir(activity):
        file = os.path.join(activity, filename)
        if not filename.endswith('.csv'):
            continue

        df = pd.read_csv(file)

        df['activity'] = folder

        table = pd.concat([table, df], ignore_index=True)

table = table.sample(frac=1).reset_index(drop=True)
table.head(10)

Unnamed: 0,accelerometer_X,accelerometer_Y,accelerometer_Z,activity
0,7.254431,-13.52724,-7.398083,running
1,-0.502782,6.406884,6.579266,idle
2,-2.063802,-8.729259,1.412579,walking
3,-3.830723,-0.924162,5.12838,walking
4,-3.217807,6.986281,3.409343,running
5,-3.53863,-15.547946,-3.768473,walking
6,-6.560112,-15.08347,-1.359906,walking
7,4.237737,5.650316,4.194642,running
8,-5.93762,5.305551,1.565808,running
9,-0.071826,4.323929,8.714894,idle


In [26]:
X = table.drop('activity', axis=1)
y = table['activity']

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

# **Time signs**

In [28]:
X_train['mean'] = X_train.mean(axis=1)
X_test['mean'] = X_test.mean(axis=1)

# **Random Forest Model**

In [29]:
time_rf_1 = time.perf_counter()
random_forest_model = RandomForestClassifier()
random_forest_model.fit(X_train, y_train)
time_rf_2 = time.perf_counter()

*Runtime*

In [30]:
time_taken_to_run_rf = time_rf_2 - time_rf_1

# **SVM Model**

In [31]:
time_svm_1 = time.perf_counter()
svm_model = SVC()
svm_model.fit(X_train, y_train)
time_svm_2 = time.perf_counter()

*Runtime*

In [32]:
time_taken_to_run_svm = time_svm_2 - time_svm_1

# **Predictions**

***SVM***

In [33]:
time_svm_1_pred = time.perf_counter()
svm_prediction = svm_model.predict(X_test)
time_svm_2_pred = time.perf_counter()

*Runtime*

In [34]:
time_taken_to_run_svm_pred = time_svm_2_pred - time_svm_1_pred

***Random*** ***Forest***

In [35]:
time_rf_1_pred = time.perf_counter()
random_forest_prediction = random_forest_model.predict(X_test)
time_rf_2_pred = time.perf_counter()

*Runtime*

In [36]:
time_taken_to_run_rf_pred = time_rf_2_pred - time_rf_1_pred

# **Accuracy**

In [37]:
svm_accuracy = accuracy_score(y_test, svm_prediction)
random_forest_accuracy = accuracy_score(y_test, random_forest_prediction)

# **Classification Report**

In [38]:
svm_report = classification_report(y_test, svm_prediction)
print(f'''SVM Classification Report:
      {svm_report}''')

random_forest_report = classification_report(y_test, random_forest_prediction)
print(f'''Random Forest Classification Report:
      {random_forest_report}''')

SVM Classification Report: 
                    precision    recall  f1-score   support

        idle       0.96      0.98      0.97      6294
     running       0.93      0.90      0.92     20628
      stairs       1.00      0.01      0.01       990
     walking       0.79      0.90      0.85     11160

    accuracy                           0.89     39072
   macro avg       0.92      0.70      0.69     39072
weighted avg       0.90      0.89      0.88     39072

Random Forest Classification Report:
                    precision    recall  f1-score   support

        idle       1.00      1.00      1.00      6294
     running       1.00      1.00      1.00     20628
      stairs       1.00      0.99      1.00       990
     walking       1.00      1.00      1.00     11160

    accuracy                           1.00     39072
   macro avg       1.00      1.00      1.00     39072
weighted avg       1.00      1.00      1.00     39072



# **Total** **Time**

In [39]:
total_time_svm = time_taken_to_run_svm_pred + time_taken_to_run_svm
total_time_rf = time_taken_to_run_rf_pred + time_taken_to_run_rf

# **Final Comparison**

In [40]:
final = pd.DataFrame()
final['Models'] = ('SVM', 'Random Forest')
final['Accuracy'] = (svm_accuracy, random_forest_accuracy)
final['Total Runtime'] = (total_time_svm, total_time_rf)

In [41]:
final

Unnamed: 0,Models,Accuracy,Total Runtime
0,SVM,0.891431,476.774103
1,Random Forest,0.999744,23.634956


As per final report total runtime of SVM method was approx 20 times longer than Random forest method. In addition accuracy of Random Forest method was much more better in comparison with SVM method.
So as a final decision we can say that Random Forest method is more effective in our current case. SVM is much more time- and resource-intensive.