In [None]:
# Important note: In section 3 (data collection) in our paper, the term running is mistakenly used for the jogging. So please only consider the jogging activity.

# In the data collection experiments, we collected data for seven physical activities. These are walking, sitting, standing, jogging, biking, walking upstairs and walking downstairs, which are mainly used in the related studies and they are the basic motion activities in daily life. There were ten participants involved in our data collection experiment who performed each of these activities for 3-4 minutes. All ten participants were male, between the ages of 25 and 30. The experiments were carried out indoors in one of the university buildings, except biking. For walking, and jogging, the department corridor was used. For walking upstairs and downstairs, a 5-floor building with stairs was used. Each of these participants was equipped with five smartphones on five body positions: 
# 1.	One in their right jeans pocket. 
# 2.	One in their left jeans pocket.
# 3.	One on belt position towards the right leg using a belt clipper.
# 4.	One on the right upper arm. 
# 5.	One on the right wrist. 
# The first three positions are commonly used by people carrying smartphones. The fourth position is usually used when activities like jogging are performed. However, we used this position for all activities to see its role on the performance.  A smart-watch was simulated with the fifth position as smart-watches are coming into the market these days.  For these experiments, we used Samsung Galaxy SII (i9100) smartphones. 
#   The orientation of the smartphones was portrait for the upper arm, wrist, and two pockets, and landscape for the belt position. The data was recorded for all five positions at the same time for each activity and it was collected at a rate of 50 samples per second. This sampling rate (50 samples per second) is enough to recognize human physical activities, as we show in our previous study . Moreover, in the state of the art, frequencies lower than 50 samples per second have been shown to be sufficient for activity recognition.
# For data collection, we adapted our own data collection app from our previous study by adding the linear acceleration sensor. The data was collected for an accelerometer, a gyroscope, a magnetometer, and a linear acceleration sensor. 

# Each excel file contains data for each participant's seven physical activities for all five positions. 

# Notation in these files: 

# Accelerometer ( Ax = x-axis, Ay = y-axis, Az= Z-aixs)   
# Linear Acceleration Sensor ( Lx = x-axis, Ly = y axis, Lz= Z-aixs) 
# Gyroscope ( Gx = x-axis, Gy = y-axis, Gz= Z-aixs) 
# Magnetometer ( Mx = x-axis, My = y-axis, Mz= Z-aixs) 

In [None]:
## Libraries Prequisities
!pip install scikit-learn
!pip install numpy==1.16.1

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
os.chdir("/content/drive/")
!ls
import os
os.chdir("My Drive/Computer_Vision_Masters/Wearable_Sensors_Code/")

In [None]:
## Libraries
import numpy as np
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics.classification import accuracy_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile
import scipy.stats as st
import sys
import warnings
warnings.filterwarnings('ignore')
np.random.seed(12227)

In [None]:
# Classical Machine Learning Algos
def train_j48(X, y):
    from sklearn import tree
    clf = tree.DecisionTreeClassifier()
    #clf = clf.fit(X, y)
    return clf

def train_mlp(X, y):
    from sklearn.neural_network import MLPClassifier
    a = int((X.shape[1] + np.amax(y)) / 2 )#Default param of weka, amax(y) gets the number of classes
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes = (a,),
                        learning_rate_init=0.3, momentum=0.2, max_iter=500, #Default param of weka
                        )
    #clf.fit(X, y)
    return clf

def train_logistic_regression(X, y):
    from sklearn.linear_model import LogisticRegression
    clf = LogisticRegression(multi_class='ovr')
    #clf.fit(X, y)
    return clf

In [None]:
def DataPreparation(data_input_file):
    print('FUSMPA 2014 Activity DataSet using Catal_2015 Approach {}'.format(data_input_file))
    df = pd.read_csv(data_input_file)
    X = df.loc[:,['LPAx','LPAy','LPAz','LPLx','LPLy','LPLz','LPGx','LPGy','LPGz','LPMx','LPMy','LPMz',
                 'RPAx','RPAy','RPAz','RPLx','RPLy','RPLz','RPGx','RPGy','RPGz','RPMx','RPMy','RPMz',
                 'Wax','Way','Waz','Wlx','Wly','Wlz','Wgx','Wgy','Wgz','Wmx','WMy','Wmz',
                 'UAAx','UAAy','UAAz','UALx','UALy','UALz','UAGx','UAGy','UAGz','UAMx','UAMy','UAMz',
                 'Bax','Bay','Baz','Blx','Bly','BLz','Bgx','Bgy','Bgz','BMx','Bmy','Bmz']]
    Y = df['Activity_Label']
    Y[Y=='walking']=0
    Y[Y=='standing']=1
    Y[Y=='jogging']=2
    Y[Y=='sitting']=3
    Y[Y=='biking']=4
    Y[Y=='upstairs']=5
    Y[Y=='downstairs']=5
    Y=Y.astype('int')
    X_train, X_test, Y_train, Y_test = train_test_split(X, np.array(Y), test_size=0.2, random_state=42)
    return X_train, Y_train, X_test, Y_test

In [None]:
def DataPreparationLeaveOneOut():
    print('FUSMPA 2014 Activity DataSet using Catal_2015 Approach')
    X_train_total=[]
    Y_train_total=[]
    
    X_test=[]
    Y_test=[]
    
    TrainFiles=['Participant_1.csv','Participant_2.csv', 'Participant_3.csv', 'Participant_4.csv','Participant_5.csv',
           'Participant_6.csv','Participant_7.csv','Participant_8.csv','Participant_9.csv']
    TestFile='Participant_10.csv'
    
    for file in TrainFiles:
        df = pd.read_csv('FUSMPA-2014/'+ file)
        X = df.loc[:,['LPAx','LPAy','LPAz','LPLx','LPLy','LPLz','LPGx','LPGy','LPGz','LPMx','LPMy','LPMz',
                     'RPAx','RPAy','RPAz','RPLx','RPLy','RPLz','RPGx','RPGy','RPGz','RPMx','RPMy','RPMz',
                     'Wax','Way','Waz','Wlx','Wly','Wlz','Wgx','Wgy','Wgz','Wmx','WMy','Wmz',
                     'UAAx','UAAy','UAAz','UALx','UALy','UALz','UAGx','UAGy','UAGz','UAMx','UAMy','UAMz',
                     'Bax','Bay','Baz','Blx','Bly','BLz','Bgx','Bgy','Bgz','BMx','Bmy','Bmz']]
        Y = df['Activity_Label']
        Y[Y=='walking']=0
        Y[Y=='standing']=1
        Y[Y=='jogging']=2
        Y[Y=='sitting']=3
        Y[Y=='biking']=4
        Y[Y=='upstairs']=5
        Y[Y=='downstairs']=5
        Y=Y.astype('int')
        X=np.array(X)
        for i in range(len(X)):
            X_train_total.append(X[i])
            Y_train_total.append(Y[i])
    
    df = pd.read_csv('FUSMPA-2014/'+ TestFile)
    X=[]
    Y=[]
    X = df.loc[:,['LPAx','LPAy','LPAz','LPLx','LPLy','LPLz','LPGx','LPGy','LPGz','LPMx','LPMy','LPMz',
                 'RPAx','RPAy','RPAz','RPLx','RPLy','RPLz','RPGx','RPGy','RPGz','RPMx','RPMy','RPMz',
                 'Wax','Way','Waz','Wlx','Wly','Wlz','Wgx','Wgy','Wgz','Wmx','WMy','Wmz',
                 'UAAx','UAAy','UAAz','UALx','UALy','UALz','UAGx','UAGy','UAGz','UAMx','UAMy','UAMz',
                 'Bax','Bay','Baz','Blx','Bly','BLz','Bgx','Bgy','Bgz','BMx','Bmy','Bmz']]
    Y = df['Activity_Label']
    Y[Y=='walking']=0
    Y[Y=='standing']=1
    Y[Y=='jogging']=2
    Y[Y=='sitting']=3
    Y[Y=='biking']=4
    Y[Y=='upstairs']=5
    Y[Y=='downstairs']=5
    Y=Y.astype('int')
    X=np.array(X)
    for i in range(len(X)):
        X_test.append(X[i])
        Y_test.append(Y[i])
        
        
    return np.array(X_train_total), np.array(Y_train_total),np.array(X_test), np.array(Y_test)

In [None]:
def RunCatal2015Model(X_train, Y_train, X_test, Y_test):
    j_48 = train_j48(X_train, Y_train)
    mlp = train_mlp(X_train, Y_train)
    logistic_regression = train_logistic_regression(X_train, Y_train)

    majority_voting = VotingClassifier(estimators=[('dt', j_48), ('mlp', mlp), ('lr', logistic_regression)], voting='soft')
    majority_voting.fit(X_train, Y_train)
    tmp = majority_voting.predict(X_test)

    acc_fold = accuracy_score(Y_test, tmp)

    recall_fold = recall_score(Y_test, tmp, average='macro')

    f1_fold  = f1_score(Y_test, tmp, average='macro')

    print('Accuracy[{:.4f}] Recall[{:.4f}] F1[{:.4f}]'.format(acc_fold, recall_fold, f1_fold))
    print('________________________________________________________________')

In [None]:
def Run(data_input_file):
    X_train, Y_train, X_test, Y_test = DataPreparation(data_input_file)
    RunCatal2015Model(X_train, Y_train, X_test, Y_test)

In [None]:
def RunLeaveOneParticipantOut():
    X_train, Y_train, X_test, Y_test=DataPreparationLeaveOneOut()
    print(X_train.shape,Y_train.shape)
    print(X_test.shape,Y_test.shape)
    RunCatal2015Model(X_train, Y_train, X_test, Y_test)

In [None]:
RunLeaveOneParticipantOut()

In [None]:
Run('FUSMPA-2014/Participant_1.csv')

In [None]:
Run('FUSMPA-2014/Participant_2.csv')

In [None]:
Run('FUSMPA-2014/Participant_3.csv')

In [None]:
Run('FUSMPA-2014/Participant_4.csv')

In [None]:
Run('FUSMPA-2014/Participant_5.csv')

In [None]:
Run('FUSMPA-2014/Participant_6.csv')

In [None]:
Run('FUSMPA-2014/Participant_7.csv')

In [None]:
Run('FUSMPA-2014/Participant_8.csv')

In [None]:
Run('FUSMPA-2014/Participant_9.csv')

In [None]:
Run('FUSMPA-2014/Participant_10.csv')