In [None]:
# Shoaib DataSet Link : https://www.utwente.nl/en/eemcs/ps/dataset-folder/activity-recognition-dataset-shoaib.rar

# 1 Data Collection Procedure 

# We used four Samsung Galaxy S2 smartphones in our experiments for data collection. Using these smartphones, we collected data for six different physical activities. They are walking, running, sitting, standing, walking upstairs and downstairs. We asked four participants to perform these six activities for a few minutes. As these are repetitive activities, so the amount of time for each activity was kept between 3-5 minutes per participant which gave us enough examples for our evaluations. The activities were carried out indoor in one of our university buildings. For walking, and running, the department's corridor was used. For sitting activity, participants offices were used when they were using their computers. For standing activity, coffee area was used where some participants stood for few minutes while involved in a conversation with others. For walking upstairs and downstairs, 5-floor stairs were used. It is important to mention that these stairs had short walks at each floor for switching between stairs (2-3 steps) but there were only four such switches in the whole walking upstairs and downstairs activities. We could not find one single continuous stair from first till 5th floor. Each of these participants was provided with four smartphones on four body positions: right jeans pocket, belt, arm, and wrist. The data was recorded for all four positions at the same time for each activity. All the four participants were male, between the age of 25 and 30. They signed a consent form before taking part in the activities

# 2 Datafile Formats 
# Arm.xlsx (Sensor data on arm position for all four participants)
# Belt.xlsx (Sensor data on Belt position for all four participants)
# Wrist.xlsx (Sensor data on Wrist position for all four participants)
# Pocket.xlsx (Sensor data on Pocket position for all four participants)

# 3 Raw Data Columns within these files: 

# Timestamp 
# Ax (Accelerometer X-axis) 
# Ay (Accelerometer Y-axis)
# Az (Accelerometer Z-axis)
# Gx (Gyroscope X-axis)
# Gy (Gyroscope Y-axis)
# Gz (Gyroscope Z-axis)
# Mx (Magnetometer X-axis)
# My (Magnetometer Y-axis)
# Mz (Magnetometer Z-axis)
# Activity Label 

# 4 Data Labelling 
# All data samples in these files are properly labelled with our six acitivities. 

# Citation: This dataset can only be used with citing the following paper. 
# Shoaib, Muhammad,  "Human Activity Recognition Using Hetrogenious Sensors" Appears in the Adjunct Proceedings of UbiComp  2013.


In [None]:
## Libraries Prequisities
!pip install scikit-learn
!pip install numpy==1.16.1

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
os.chdir("/content/drive/")
!ls
import os
os.chdir("My Drive/Computer_Vision_Masters/Wearable_Sensors_Code/")

In [None]:
## Libraries
import numpy as np
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics.classification import accuracy_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile
import scipy.stats as st
import sys
import warnings
warnings.filterwarnings('ignore')
np.random.seed(12227)

In [None]:
# Classical Machine Learning Algos
def train_j48(X, y):
    from sklearn import tree
    clf = tree.DecisionTreeClassifier()
    #clf = clf.fit(X, y)
    return clf

def train_mlp(X, y):
    from sklearn.neural_network import MLPClassifier
    a = int((X.shape[1] + np.amax(y)) / 2 )#Default param of weka, amax(y) gets the number of classes
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes = (a,),
                        learning_rate_init=0.3, momentum=0.2, max_iter=500, #Default param of weka
                        )
    #clf.fit(X, y)
    return clf

def train_logistic_regression(X, y):
    from sklearn.linear_model import LogisticRegression
    clf = LogisticRegression(multi_class='ovr')
    #clf.fit(X, y)
    return clf

In [None]:
def DataPreparation(data_input_file):
  print('Shoaib Human Activity DataSet using Catal_2015 Approach {}'.format(data_input_file))
  df = pd.read_excel(data_input_file)
  X = df.loc[:,['Ax','Ay','Az','Gx','Gy','Gz','Mx','My','Mz']]
  Y = df['Activity_Label']	
  Y[Y=='Downstairs']=0
  Y[Y=='Running']=1
  Y[Y=='Sitting']=2
  Y[Y=='Standing']=3
  Y[Y=='Upstairs']=4
  Y[Y=='Walking']=5
  Y=Y.astype('int')
  X_train, X_test, Y_train, Y_test = train_test_split(X, np.array(Y), test_size=0.2, random_state=42)
  return X_train, Y_train, X_test, Y_test

In [None]:
def RunCatal2015Model(X_train, Y_train, X_test, Y_test):
    j_48 = train_j48(X_train, Y_train)
    mlp = train_mlp(X_train, Y_train)
    logistic_regression = train_logistic_regression(X_train, Y_train)

    majority_voting = VotingClassifier(estimators=[('dt', j_48), ('mlp', mlp), ('lr', logistic_regression)], voting='soft')
    majority_voting.fit(X_train, Y_train)
    tmp = majority_voting.predict(X_test)

    acc_fold = accuracy_score(Y_test, tmp)

    recall_fold = recall_score(Y_test, tmp, average='macro')

    f1_fold  = f1_score(Y_test, tmp, average='macro')

    print('Accuracy[{:.4f}] Recall[{:.4f}] F1[{:.4f}]'.format(acc_fold, recall_fold, f1_fold))
    print('________________________________________________________________')

In [None]:
def Run(data_input_file):
  X_train, Y_train, X_test, Y_test = DataPreparation(data_input_file)
  RunCatal2015Model(X_train, Y_train, X_test, Y_test)

In [None]:
Run('data/SHOAIB/Arm.xlsx')

In [None]:
Run('data/SHOAIB/Belt.xlsx')

In [None]:
Run('data/SHOAIB/Wrist.xlsx')

In [None]:
Run('data/SHOAIB/Pocket.xlsx')