In [None]:
## Cat et al 2015 Paper
## Doi : https://doi.org/10.1016/j.asoc.2015.01.025

In [None]:
## Libraries Prequisities
!pip install scikit-learn
!pip install numpy==1.16.1

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
os.chdir("/content/drive/")
!ls

In [None]:
import os
os.chdir("My Drive/Computer_Vision_Masters/Wearable_Sensors_Code/")

In [None]:
## Libraries
import numpy as np
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics.classification import accuracy_score, recall_score, f1_score
import scipy.stats as st
import sys
import warnings
warnings.filterwarnings('ignore')
np.random.seed(12227)

In [None]:
## Catal_2015 Paper

In [None]:
def RunOpportunity():
  X_train,Y_train,X_test,Y_test=OpportunityDataSetAnalysis()
  TrainAndTestOpportunity(X_train,Y_train,X_test,Y_test)

In [None]:
def DataSetAnalysis(data_input_file):
  data = np.load(data_input_file)
  # DataSet of 2555 Rows
  # Each Row consists of 3 columns
  ## X, y, folds
  ## For X set -> 2555 rows
  ##  Each row has 5750 elements
  X = data['X']
  print("Length of X: ",len(X))
  print("Size of X[0]: ",X[0].size)
  print("X Shape before reshape: ",X.shape)
  X = X[:, 0, :, :]
  print("X Shape after reshape: ",X.shape)
  ## For Y set -> 2555 rows
  ## Each row has 12 elements
  ## Since we have 12 classes so it is hot vector of 12
  Y = data['y']
  print("Length of Y: ", len(Y))
  print("First Element Y size: ",Y[0].size)
  print("First Element Size: " ,Y[0])
  ## Folds 2D array that contains
  ##    1- Number of Folds for cross Validation
  ##    2- Data Size for Train + Test
  ##    3- Folds[][0]-> Data for Train
  ##    4- Folds[][1] -> Data for Test
  ##    
  folds = data['folds']
  print("Length of Folds: ",len(folds))
  print("Sample of Train Data: ",folds[1][0])
  print("Sample of Test Data: ",folds[1][1])
  print("Length Sample of Train Data: ",len(folds[1][0]))
  print("Length Sample of Test Data: ",len(folds[1][1]))
  classes_number = Y.shape[1]
  print("Number of Classes: ", classes_number)
  train_idx = folds[0][0]
  test_idx = folds[0][1]
  X_train = feature_extraction(X[train_idx])
  print('Train X element 0: ', X_train[0])
  print("Train X Index shape: ", X_train.shape)
  print("Train Y Index shape: ", Y[train_idx].shape)
  

In [None]:
def TrainDataAnalysis():
  train_idx = folds[0][0]
  test_idx = folds[0][1]

  print("Train Index shape: ", train_idx.shape)
  print("Test Index shape: ", test_idx.shape)

  X_train = X[train_idx]
  X_test = X[test_idx]

  print("Train shape before Feature Extraction: ", X_train.shape)
  print("Test shape before Feature Extraction: ", X_test.shape)

  X_train = feature_extraction(X_train)
  X_test = feature_extraction(X_test)

  print("Train shape after Feature Extraction: ", X_train.shape)
  print("Test shape after Feature Extraction: ", X_test.shape)

In [None]:
DataSetAnalysis('data/LOSO/USCHAD.npz')

In [None]:
TrainDataAnalysis()

In [None]:
def A(sample):
    feat = []
    for col in range(0,sample.shape[1]):
        average = np.average(sample[:,col])
        feat.append(average)

    return feat

def SD(sample):
    feat = []
    for col in range(0, sample.shape[1]):
        std = np.std(sample[:, col])
        feat.append(std)

    return feat

def AAD(sample):
    feat = []
    for col in range(0, sample.shape[1]):
        data = sample[:, col]
        add = np.mean(np.absolute(data - np.mean(data)))
        feat.append(add)

    return feat

def ARA(sample):
    #Average Resultant Acceleration[1]:
    # Average of the square roots of the sum of the values of each axis squared √(xi^2 + yi^2+ zi^2) over the ED
    feat = []
    sum_square = 0
    sample = np.power(sample, 2)
    for col in range(0, sample.shape[1]):
        sum_square = sum_square + sample[:, col]

    sample = np.sqrt(sum_square)
    average = np.average(sample)
    feat.append(average)
    return feat

def TBP(sample):
    from scipy import signal
    feat = []
    sum_of_time = 0
    for col in range(0, sample.shape[1]):
        data = sample[:, col]
        peaks = signal.find_peaks_cwt(data, np.arange(1,4))

        feat.append(peaks)

    return feat

In [None]:
## Feature Extraction

def feature_extraction(X):
    # Extracts the features, as mentioned by Catal et al. 2015
    # Average - A,
    # Standard Deviation - SD,
    # Average Absolute Difference - AAD,
    # Average Resultant Acceleration - ARA(1),
    # Time Between Peaks - TBP
    X_tmp = []
    for sample in X:
        features = A(sample)
        features = np.hstack((features, A(sample)))
        features = np.hstack((features, SD(sample)))
        features = np.hstack((features, AAD(sample)))
        features = np.hstack((features, ARA(sample)))
        #features = np.hstack((features, TBP(sample)))
        X_tmp.append(features)

    X = np.array(X_tmp)
    return X

In [None]:
# Classical Machine Learning Algos
def train_j48(X, y):
    from sklearn import tree
    clf = tree.DecisionTreeClassifier()
    #clf = clf.fit(X, y)
    return clf

def train_mlp(X, y):
    from sklearn.neural_network import MLPClassifier
    a = int((X.shape[1] + np.amax(y)) / 2 )#Default param of weka, amax(y) gets the number of classes
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes = (a,),
                        learning_rate_init=0.3, momentum=0.2, max_iter=500, #Default param of weka
                        )
    #clf.fit(X, y)
    return clf

def train_logistic_regression(X, y):
    from sklearn.linear_model import LogisticRegression
    clf = LogisticRegression(multi_class='ovr')
    #clf.fit(X, y)
    return clf

In [None]:
def DataPreparation(data_input_file):
  print('Catal et al. 2015 {}'.format(data_input_file))
  data = np.load(data_input_file)
  X = data['X']
  X = X[:, 0, :, :]
  Y = data['y']
  folds = data['folds']
  classes_number = Y.shape[1]
  Y = np.argmax(Y, axis=1)
  return X,Y,folds

In [None]:
def Train(X,Y,folds):
  avg_acc = []
  avg_recall = []
  avg_f1 = []
  for i in range(0, len(folds)):
          train_idx = folds[i][0]
          test_idx = folds[i][1]

          X_train = X[train_idx]
          X_test = X[test_idx]

          X_train = feature_extraction(X_train)
          X_test = feature_extraction(X_test)

          j_48 = train_j48(X_train,Y[train_idx])
          mlp = train_mlp(X_train, Y[train_idx])
          logistic_regression = train_logistic_regression(X_train, Y[train_idx])

          majority_voting = VotingClassifier(estimators=[('dt', j_48), ('mlp', mlp), ('lr', logistic_regression)], voting='soft')
          majority_voting.fit(X_train, Y[train_idx])
          tmp = majority_voting.predict(X_test)

          acc_fold = accuracy_score(Y[test_idx], tmp)
          avg_acc.append(acc_fold)

          recall_fold = recall_score(Y[test_idx], tmp, average='macro')
          avg_recall.append(recall_fold)

          f1_fold  = f1_score(Y[test_idx], tmp, average='macro')
          avg_f1.append(f1_fold)

          print('Accuracy[{:.4f}] Recall[{:.4f}] F1[{:.4f}] at fold[{}]'.format(acc_fold, recall_fold, f1_fold ,i+1))
          print('________________________________________________________________')
    
  return avg_acc, avg_recall,avg_f1    

In [None]:
def ReportAccuracies(avg_acc, avg_recall,avg_f1):
  ic_acc = st.t.interval(0.9, len(avg_acc) - 1, loc=np.mean(avg_acc), scale=st.sem(avg_acc))
  ic_recall = st.t.interval(0.9, len(avg_recall) - 1, loc=np.mean(avg_recall), scale=st.sem(avg_recall))
  ic_f1 = st.t.interval(0.9, len(avg_f1) - 1, loc=np.mean(avg_f1), scale=st.sem(avg_f1))
  print('Mean Accuracy[{:.4f}] IC [{:.4f}, {:.4f}]'.format(np.mean(avg_acc), ic_acc[0], ic_acc[1]))
  print('Mean Recall[{:.4f}] IC [{:.4f}, {:.4f}]'.format(np.mean(avg_recall), ic_recall[0], ic_recall[1]))
  print('Mean F1[{:.4f}] IC [{:.4f}, {:.4f}]'.format(np.mean(avg_f1), ic_f1[0], ic_f1[1]))

In [None]:
def OpportunityDataSetAnalysis():
  X_train=np.load('data/Opportunity/Opportunity_train_X.npz')['arr_0']
  X_test=np.load('data/Opportunity/Opportunity_test_X.npz')['arr_0']
  Y_train=np.load('data/Opportunity/Opportunity_train_y.npz')['arr_0']
  Y_test=np.load('data/Opportunity/Opportunity_test_Y.npz')['arr_0']
  Y_train = np.argmax(Y_train, axis=1)
  Y_test = np.argmax(Y_test, axis=1)
  return X_train,Y_train,X_test,Y_test

In [None]:
def UCIHeterognityDataSetAnalysis():
  XDataSet=np.load('data/UCI_Heterognity/UCI_HHAR_X_2.npz')['arr_0']
  YDataSet=np.load('data/UCI_Heterognity/UCI_HHAR_Y_2.npz')['arr_0']
  XDataSet = np.argmax(XDataSet, axis=1)
  YDataSet = np.argmax(YDataSet, axis=1)
  from sklearn.model_selection import train_test_split
  X_train,Y_train, X_test, Y_test = train_test_split(XDataSet, YDataSet, test_size=0.2, random_state=42)
  return X_train,Y_train,X_test,Y_test

In [None]:
def UCIHeterognityDataSetAnalysisNonOverlapping():
  XDataSet=np.load('data/UCI_Heterognity/UCI_HHAR_X.npz')['arr_0']
  YDataSet=np.load('data/UCI_Heterognity/UCI_HHAR_Y.npz')['arr_0']
  XDataSet = np.argmax(XDataSet, axis=1)
  YDataSet = np.argmax(YDataSet, axis=1)
  from sklearn.model_selection import train_test_split
  X_train,Y_train, X_test, Y_test = train_test_split(XDataSet, YDataSet, test_size=0.2, random_state=42)
  return X_train,Y_train,X_test,Y_test

In [None]:
def RunCatal(data_input_file):
  X,Y,folds= DataPreparation(data_input_file)
  avg_acc, avg_recall,avg_f1= Train(X,Y,folds)
  ReportAccuracies(avg_acc, avg_recall,avg_f1 )

In [None]:
def TrainAndTestOpportunity(X_train,Y_train,X_test,Y_test):
    X_train = feature_extraction(X_train)
    X_test = feature_extraction(X_test)

    j_48 = train_j48(X_train,Y_train)
    mlp = train_mlp(X_train, Y_train)
    logistic_regression = train_logistic_regression(X_train, Y_train)

    majority_voting = VotingClassifier(estimators=[('dt', j_48), ('mlp', mlp), ('lr', logistic_regression)], voting='soft')
    majority_voting.fit(X_train, Y_train)
    tmp = majority_voting.predict(X_test)

    acc = accuracy_score(Y_test, tmp)

    recall = recall_score(Y_test, tmp, average='macro')

    f1 = f1_score(Y_test, tmp, average='macro')

    print('Accuracy[{:.4f}] Recall[{:.4f}] F1[{:.4f}]'.format(acc, recall, f1))
    print('________________________________________________________________') 

In [None]:
def TrainAndTestUCI(X_train,Y_train,X_test,Y_test):
    j_48 = train_j48(X_train,Y_train)
    mlp = train_mlp(X_train, Y_train)
    logistic_regression = train_logistic_regression(X_train, Y_train)

    majority_voting = VotingClassifier(estimators=[('dt', j_48), ('mlp', mlp), ('lr', logistic_regression)], voting='soft')
    majority_voting.fit(X_train, Y_train)
    tmp = majority_voting.predict(X_test)

    acc = accuracy_score(Y_test, tmp)

    recall = recall_score(Y_test, tmp, average='macro')

    f1 = f1_score(Y_test, tmp, average='macro')

    print('Accuracy[{:.4f}] Recall[{:.4f}] F1[{:.4f}]'.format(acc, recall, f1))
    print('________________________________________________________________') 

In [None]:
def RunUCIHeterognity():
  X_train, X_test, Y_train, Y_test=UCIHeterognityDataSetAnalysis()
  print(X_train.shape,Y_train.shape)
  print(X_test.shape,Y_test.shape)
  TrainAndTestUCI(X_train,Y_train,X_test,Y_test)

In [None]:
def RunUCIHeterognityNonOverlapping():
  X_train, X_test, Y_train, Y_test=UCIHeterognityDataSetAnalysisNonOverlapping()
  print(X_train.shape,Y_train.shape)
  print(X_test.shape,Y_test.shape)
  TrainAndTestUCI(X_train,Y_train,X_test,Y_test)

In [None]:
RunUCIHeterognityNonOverlapping()

In [None]:
RunUCIHeterognity()

In [None]:
RunOpportunity()

In [None]:
RunCatal('data/LOSO/MHEALTH.npz')

In [None]:
RunCatal('data/LOSO/USCHAD.npz')

In [None]:
RunCatal('data/LOSO/UTD-MHAD1_1s.npz')

In [None]:
RunCatal('data/LOSO/UTD-MHAD2_1s.npz')

In [None]:
RunCatal('data/LOSO/WHARF.npz')

In [None]:
RunCatal('data/LOSO/WISDM.npz')

In [None]:
RunCatal('data/SNOW/MHEALTH.npz')

In [None]:
RunCatal('data/SNOW/USCHAD.npz')

In [None]:
RunCatal('data/SNOW/UTD-MHAD1_1s.npz')

In [None]:
RunCatal('data/SNOW/UTD-MHAD2_1s.npz')

In [None]:
RunCatal('data/SNOW/WHARF.npz')

In [None]:
RunCatal('data/SNOW/WISDM.npz')

In [None]:
RunCatal('data/LOTO/MHEALTH.npz')

In [None]:
RunCatal('data/LOTO/USCHAD.npz')

In [None]:
RunCatal('data/LOTO/UTD-MHAD1_1s.npz')

In [None]:
RunCatal('data/LOTO/UTD-MHAD2_1s.npz')

In [None]:
RunCatal('data/LOTO/WHARF.npz')

In [None]:
RunCatal('data/LOTO/WISDM.npz')

In [None]:
RunCatal('data/FNOW/MHEALTH.npz')

In [None]:
RunCatal('data/FNOW/USCHAD.npz')

In [None]:
RunCatal('data/FNOW/UTD-MHAD1_1s.npz')

In [None]:
RunCatal('data/FNOW/UTD-MHAD2_1s.npz')

In [None]:
RunCatal('data/FNOW/WHARF.npz')

In [None]:
RunCatal('data/FNOW/WISDM.npz')