In [146]:
from os import listdir
from os.path import isfile, join, isdir
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error, classification_report
from sklearn import preprocessing
from scipy.fftpack import fft, rfft


# **Dataset preparation (row data)**

In [147]:
path_to_data = '/content/drive/My Drive/data'
activities = listdir(path_to_data)
print(activities)

['running', 'walking', 'idle', 'stairs']


In [148]:
table = pd.DataFrame()

In [149]:
for folder in listdir(path_to_data):
    activity = join(path_to_data, folder)
    if not isdir(activity):
        continue

    for filename in listdir(activity):
        file = join(activity, filename)
        if not filename.endswith('.csv'):
            continue

        df = pd.read_csv(file)

        df['activity'] = folder

        table = pd.concat([table, df], ignore_index=True)

table = table.sample(frac=1).reset_index(drop=True)
table.head(10)

Unnamed: 0,accelerometer_X,accelerometer_Y,accelerometer_Z,activity
0,-5.343858,-8.384495,-1.278504,walking
1,9.634268,10.721235,-3.845088,running
2,5.046977,-0.852336,-1.853112,running
3,-3.53863,5.027824,-3.356671,running
4,0.229843,18.033127,-3.62961,running
5,15.959748,25.603594,4.826711,running
6,-4.146757,-7.005434,0.905008,running
7,23.357832,17.951727,29.41995,running
8,11.492168,5.525818,15.710752,running
9,4.127604,-12.157756,-7.896077,walking


In [150]:
X = table.drop('activity', axis=1)
y = table['activity']

In [151]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# **Random Forest Model**

In [152]:
time_rf_1 = time.perf_counter()
random_forest_model = RandomForestClassifier()
random_forest_model.fit(X_train, y_train)
time_rf_2 = time.perf_counter()

*Runtime*

In [153]:
time_taken_to_run_rf = time_rf_2 - time_rf_1

# **SVM Model**

In [154]:
time_svm_1 = time.perf_counter()
svm_model = SVC()
svm_model.fit(X_train, y_train)
time_svm_2 = time.perf_counter()

*Runtime*

In [155]:
time_taken_to_run_svm = time_svm_2 - time_svm_1

# **Predictions**

**SVM**

In [156]:
time_svm_1_pred = time.perf_counter()
svm_prediction = svm_model.predict(X_test)
time_svm_2_pred = time.perf_counter()

*Runtime*

In [157]:
time_taken_to_run_svm_pred = time_svm_2_pred - time_svm_1_pred

**Random** **Forest**

In [158]:
time_rf_1_pred = time.perf_counter()
random_forest_prediction = random_forest_model.predict(X_test)
time_rf_2_pred = time.perf_counter()

*Runtime*

In [159]:
time_taken_to_run_rf_pred = time_rf_2_pred - time_rf_1_pred

# **Accuracy**

In [160]:
svm_accuracy = accuracy_score(y_test, svm_prediction)
random_forest_accuracy = accuracy_score(y_test, random_forest_prediction)

# **Classification Report**

In [161]:
svm_report = classification_report(y_test, svm_prediction)
print(f'''SVM Classification Report:
      {svm_report}''')

random_forest_report = classification_report(y_test, random_forest_prediction)
print(f'''Random Forest Classification Report:
      {random_forest_report}''')

SVM Classification Report:
                    precision    recall  f1-score   support

        idle       0.95      0.99      0.97      6255
     running       0.93      0.90      0.92     20597
      stairs       1.00      0.01      0.01       964
     walking       0.80      0.90      0.85     11256

    accuracy                           0.89     39072
   macro avg       0.92      0.70      0.69     39072
weighted avg       0.90      0.89      0.88     39072

Random Forest Classification Report:
                    precision    recall  f1-score   support

        idle       1.00      1.00      1.00      6255
     running       1.00      1.00      1.00     20597
      stairs       1.00      0.99      0.99       964
     walking       1.00      1.00      1.00     11256

    accuracy                           1.00     39072
   macro avg       1.00      1.00      1.00     39072
weighted avg       1.00      1.00      1.00     39072



# **Total** **Time**

In [162]:
total_time_svm = time_taken_to_run_svm_pred + time_taken_to_run_svm
total_time_rf = time_taken_to_run_rf_pred + time_taken_to_run_rf

# **Final** **Comparison**

In [163]:
final = pd.DataFrame()
final['Models'] = ('SVM', 'Random Forest')
final['Accuracy'] = (svm_accuracy, random_forest_accuracy)
final['Total Runtime'] = (total_time_svm, total_time_rf)

In [164]:
final

Unnamed: 0,Models,Accuracy,Total Runtime
0,SVM,0.893581,480.88162
1,Random Forest,0.999616,18.449155


As per final report total runtime of SVM method was approx 5 times longer than Random forest method. In addition accuracy of Random Forest method was much more better in comparison with SVM method. So as a final decision we can say that Random Forest method is more effective in our current case. SVM is much more time- and resource-intensive.

# **Dataset preparation (manually added features)**

In [165]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [166]:
path_to_data = '/content/drive/My Drive/data'
activities = listdir(path_to_data)
print(activities)

['running', 'walking', 'idle', 'stairs']


In [167]:
for act in activities:
  path = join(path_to_data, act)
  frames = listdir(path)
  print(act, ': ', len(frames))

running :  3438
walking :  1860
idle :  1049
stairs :  165


In [168]:
def get_features(frame):
  features = []
  features = np.array(features)

  features = np.concatenate((features, frame.min(axis=0).values), axis=0)
  features = np.concatenate((features, frame.max(axis=0).values), axis=0)
  features = np.concatenate((features, frame.mean(axis=0).values), axis=0)
  features = np.concatenate((features, frame.skew(axis=0).values), axis=0)
  features = np.concatenate((features, frame.kurt(axis=0).values), axis=0)
  features = np.concatenate((features, frame.var(axis=0).values), axis=0)
  features = np.concatenate((features, frame.median(axis=0).values), axis=0)
  features = np.concatenate((features, frame.std(axis=0).values), axis=0)
  features = np.concatenate((features, frame.idxmax(axis=0).values), axis=0)
  features = np.concatenate((features, frame.idxmin(axis=0).values), axis=0)

  frame['mean_x'] = frame.mean(axis=0)['accelerometer_X']
  frame['mean_y'] = frame.mean(axis=0)['accelerometer_Y']
  frame['mean_z'] = frame.mean(axis=0)['accelerometer_Z']

  mae_x = mean_absolute_error(frame['accelerometer_X'], frame['mean_x'])
  mae_y = mean_absolute_error(frame['accelerometer_Y'], frame['mean_y'])
  mae_z = mean_absolute_error(frame['accelerometer_Z'], frame['mean_z'])

  rmse_x = np.sqrt(mean_squared_error(frame['accelerometer_X'], frame['mean_x']))
  rmse_y = np.sqrt(mean_squared_error(frame['accelerometer_Y'], frame['mean_y']))
  rmse_z = np.sqrt(mean_squared_error(frame['accelerometer_Z'], frame['mean_z']))

  metrics = np.array([mae_x, mae_y, mae_z, rmse_x, rmse_y, rmse_z])

  features = np.concatenate((features, metrics), axis=0)

  correlations = frame.corr()
  corr = np.array([correlations['accelerometer_X']['accelerometer_Y'],
                     correlations['accelerometer_X']['accelerometer_Z'],
                     correlations['accelerometer_Y']['accelerometer_Z']])

  features = np.concatenate((features, metrics), axis=0)

  return features




In [169]:
def class_data_stat_prepare(class_name, class_number):
    path = join(path_to_data, class_name)
    X = []
    for item in listdir(path):
        frame = pd.read_csv(join(path, item))
        features = get_features(frame)
        X.append(features)

    y = [class_number]*len(X)

    X = np.array(X)
    y = np.array(y)

    return X, y

In [170]:
def create_dataset(class_prepare):
    X_idle, y_idle = class_prepare('idle', 0)
    X_walking, y_walking = class_prepare('walking', 1)
    X_stairs, y_stairs = class_prepare('stairs', 2)
    X_running, y_running = class_prepare('running', 3)

    X = np.concatenate((X_idle, X_walking), axis=0)
    X = np.concatenate((X, X_stairs), axis=0)
    X = np.concatenate((X, X_running), axis=0)

    Y = np.concatenate((y_idle, y_walking), axis=0)
    Y = np.concatenate((Y, y_stairs), axis=0)
    Y = np.concatenate((Y, y_running), axis=0)

    return X, Y

In [171]:
X, y = create_dataset(class_data_stat_prepare)

In [172]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# **Random Forest Model**

In [173]:
time_rf_1 = time.perf_counter()
random_forest_model = RandomForestClassifier()
random_forest_model.fit(X_train, y_train)
time_rf_2 = time.perf_counter()

*Runtime*

In [174]:
time_taken_to_run_rf = time_rf_2 - time_rf_1

# **SVM Model**

In [175]:
time_svm_1 = time.perf_counter()
svm_model = SVC()
svm_model.fit(X_train, y_train)
time_svm_2 = time.perf_counter()

*Runtime*

In [176]:
time_taken_to_run_svm = time_svm_2 - time_svm_1

# **Predictions**

***SVM***

In [177]:
time_svm_1_pred = time.perf_counter()
svm_prediction = svm_model.predict(X_test)
time_svm_2_pred = time.perf_counter()

*Runtime*

In [178]:
time_taken_to_run_svm_pred = time_svm_2_pred - time_svm_1_pred

***Random*** ***Forest***

In [179]:
time_rf_1_pred = time.perf_counter()
random_forest_prediction = random_forest_model.predict(X_test)
time_rf_2_pred = time.perf_counter()

*Runtime*

In [180]:
time_taken_to_run_rf_pred = time_rf_2_pred - time_rf_1_pred

# **Accuracy**

In [181]:
svm_accuracy = accuracy_score(y_test, svm_prediction)
random_forest_accuracy = accuracy_score(y_test, random_forest_prediction)

# **Classification Report**

In [182]:
svm_report = classification_report(y_test, svm_prediction)
print(f'''SVM Classification Report:
      {svm_report}''')

random_forest_report = classification_report(y_test, random_forest_prediction)
print(f'''Random Forest Classification Report:
      {random_forest_report}''')

SVM Classification Report:
                    precision    recall  f1-score   support

           0       1.00      1.00      1.00       212
           1       0.93      1.00      0.96       362
           2       0.00      0.00      0.00        27
           3       1.00      1.00      1.00       702

    accuracy                           0.98      1303
   macro avg       0.73      0.75      0.74      1303
weighted avg       0.96      0.98      0.97      1303

Random Forest Classification Report:
                    precision    recall  f1-score   support

           0       1.00      1.00      1.00       212
           1       1.00      1.00      1.00       362
           2       1.00      0.96      0.98        27
           3       1.00      1.00      1.00       702

    accuracy                           1.00      1303
   macro avg       1.00      0.99      0.99      1303
weighted avg       1.00      1.00      1.00      1303



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# **Total** **Time**

In [183]:
total_time_svm = time_taken_to_run_svm_pred + time_taken_to_run_svm
total_time_rf = time_taken_to_run_rf_pred + time_taken_to_run_rf

# **Final Comparison**

In [184]:
final = pd.DataFrame()
final['Models'] = ('SVM', 'Random Forest')
final['Accuracy'] = (svm_accuracy, random_forest_accuracy)
final['Total Runtime in sec'] = (total_time_svm, total_time_rf)

In [185]:
final

Unnamed: 0,Models,Accuracy,Total Runtime in sec
0,SVM,0.979279,0.261219
1,Random Forest,0.999233,1.784172


As per final report total runtime was pretty fast with good accuracy rate for both methods.
