In [1]:
import importlib
import dlm
from collections import Counter
import numpy as np
import pandas as pd
import warnings
from sklearn.metrics import classification_report,f1_score, precision_score, recall_score, accuracy_score
warnings.filterwarnings("ignore")

In [1]:
activities_names = {1:"lying", 2:"sitting", 3:"standing", 4:"walking", 5:"running", 6:"cycling", 7:"Nordic walking",
9:"watching TV", 10:"computer work", 11:"car driving", 12:"ascending stairs", 13:"descending stairs",
16:"vacuum cleaning", 17:"ironing", 18:"folding laundry", 19:"house cleaning", 20:"playing soccer", 24:"rope jumping"}

low_intense_acts = [1,2,3,4,17]

middle_intense_acts = [13,16,7] 

high_intense_acts = [5,6,24,12]

[activities_names[act] for act in low_intense_acts]

['lying', 'sitting', 'standing', 'walking', 'ironing']

In [2]:
[activities_names[act] for act in middle_intense_acts]

['descending stairs', 'vacuum cleaning', 'Nordic walking']

In [3]:
[activities_names[act] for act in high_intense_acts]

['running', 'cycling', 'rope jumping', 'ascending stairs']

In [3]:
def agregate_similar_acts(df,acts, intensity):


    def preprocess_by_nomr(Y_data):

    
        Y_train = []
        Y_test = []
        
        
        N = len(Y_data)

        Y_data["3D_acc_norm"] = np.sqrt(Y_data[4]**2 + Y_data[5]**2 + Y_data[6]**2)
        Y_data["3D_gyr_norm"] = np.sqrt(Y_data[10]**2 + Y_data[11]**2 + Y_data[12]**2)
       # Y_data["3D_mag_norm"] = np.sqrt(Y_data[13]**2 + Y_data[14]**2 + Y_data[15]**2)

        Y_train = Y_data[["3D_acc_norm","3D_gyr_norm"]].iloc[:int(N/2)]
        Y_test = Y_data[["3D_acc_norm","3D_gyr_norm"]].iloc[int(N/2):]
        
        
        labels_Y_lrain = [intensity]*len(Y_train)
        labels_Y_lest = [intensity]*len(Y_test)
        
        
        return Y_train,Y_test,labels_Y_lrain,labels_Y_lest



    df_train = []
    df_test = []

    label_df_train = []
    label_df_test = []


    for act in acts:

        df_aux = df[df[1] == act]

        if len(df_aux) > 0:

            Y_train,Y_test,labels_Y_train,labels_Y_lest = preprocess_by_nomr(df_aux)


            df_train.append(Y_train)
            df_test.append(Y_test)


            label_df_train.append(labels_Y_train)
            label_df_test.append(labels_Y_lest)

    label_df_train = np.concatenate(label_df_train)
    labels_df_lest = np.concatenate(label_df_test)

    return pd.concat(df_train), label_df_train, pd.concat(df_test), labels_df_lest



def train_test_agregated(number,acts, intensity):

    df = pd.read_csv(f"subject10{number}.dat",sep=" ",header=None)
    df = df.interpolate()


    df_acts = df[df[1].isin(acts)]

    X_train, y_train, X_test, y_test = agregate_similar_acts(df_acts,acts,intensity)


    return X_train, y_train, X_test, y_test


def MET_activities_aggregation(userID, merge = False):


    low_intense_acts = [1,2,3,4,17]

    middle_intense_acts = [13,16,7] 

    high_intense_acts = [5,6,24,12]

    all_acts = [low_intense_acts, middle_intense_acts, high_intense_acts]

    df_train = []
    label_train= []
    df_test= []
    label_test = []


    for j in range(len(all_acts)):

        X_train, y_train, X_test, y_test = train_test_agregated(userID, all_acts[j],j)


        df_train.append(X_train)
        label_train.append(y_train)
        df_test.append(X_test)
        label_test.append(y_test)

    label_train= np.concatenate(label_train)

    label_test = np.concatenate(label_test)
    
    if merge is True:

        df_train = pd.concat(df_train)
        
        df_test= pd.concat(df_test)
        

    return df_train ,label_train,df_test,label_test 


In [4]:
def report_results(subjectID):


    df_train , _ ,df_test,label_test = MET_activities_aggregation(subjectID, merge = False)

    bdlm = dlm.ensemble_BDLM(n_acts = 3)
    bdlm.fit(df_train)

    y_hat = []
    
    for act in range(3):

        y_hat.append(bdlm.predict(df_test[act], return_probs=False,restart=False))

    y_hat = np.concatenate(y_hat)

    params_stats = {}

    precision = precision_score(label_test,y_hat,average = "macro")
    recall = recall_score(label_test,y_hat,average = "macro")
    accuracy = accuracy_score(label_test,y_hat)
 
    params_stats["macro_precision"] = precision
    params_stats["macro_recall"] = recall
    params_stats["accuracy"] = accuracy
    
    return params_stats

In [5]:
subjects_report = []

for i in [1,2,3,4,5,6,7,8]:

    stats = report_results(subjectID = i)
    subjects_report.append(stats)
    
    print(f"Finished Training subject {i}")


Finished Training subject 1
Finished Training subject 2
Finished Training subject 3
Finished Training subject 4
Finished Training subject 5
Finished Training subject 6
Finished Training subject 7
Finished Training subject 8


In [6]:
model_report = {}


macro_precision = []
macro_recall = []
accuracy = []

for report in subjects_report:
    
    macro_precision.append(report["macro_precision"])
    macro_recall.append(report["macro_recall"])
    accuracy.append(report["accuracy"])

macro_precision = np.array(macro_precision)    
macro_recall = np.array(macro_recall)    
accuracy = np.array(accuracy) 

model_report["PRECISION"] = [round(macro_precision.mean(), 4),round(macro_precision.std(), 4)]
model_report["RECALL"] = [round(macro_recall.mean(), 4),round(macro_recall.std(), 4)]
model_report["ACCURACY"] = [round(accuracy.mean(), 4),round(accuracy.std(), 4)]

model_report

{'PRECISION': [0.5725, 0.0897],
 'RECALL': [0.5453, 0.1216],
 'ACCURACY': [0.5473, 0.0692]}

In [18]:
df_reports = pd.DataFrame({
    'Subject': [f'subject{i}' for i in range(1, 9)],
    'Accuracy': accuracy,
    'Macro Precision': macro_precision,
    'Macro Recall': macro_recall
})

df_reports.to_csv("user_dependent_pamap2.csv",index=False)

In [8]:
def fit_baseline(model):
    
    

    macro_precision = []
    macro_recall = []
    accuracy = []

    for i in [1,2,3,4,5,6,7,8]: 

        
        X_train,y_train ,X_test,y_test = MET_activities_aggregation(i, merge = True)

        model.fit(X_train,y_train)

        y_hat = model.predict(X_test)

        precision = precision_score(y_test,y_hat,average = "macro")
        recall = recall_score(y_test,y_hat,average = "macro")
        accuracy_ = accuracy_score(y_test,y_hat)

        macro_precision.append(precision)
        macro_recall.append(recall)
        accuracy.append(accuracy_)

    macro_precision = np.array(macro_precision)    
    macro_recall = np.array(macro_recall)    
    accuracy = np.array(accuracy)  

    model_report = {}

    model_report["PRECISION"] = [round(macro_precision.mean(), 4),round(macro_precision.std(), 4)]
    model_report["RECALL"] = [round(macro_recall.mean(), 4),round(macro_recall.std(), 4)]
    model_report["ACCURACY"] = [round(accuracy.mean(), 4),round(accuracy.std(), 4)]
    
    
    return model_report

In [9]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.naive_bayes import GaussianNB

baselines = [GaussianNB(),KNN(),RFC(n_jobs=-1),LogisticRegression(n_jobs=-1)]

report_baselines = {}

for baseline in baselines:
    
    model = baseline
    
    result = fit_baseline(model)
    
    report_baselines[f"{baseline}".strip("()")] = [result]
    
    print(f"training finished to {baseline}")
    print(result)

training finished to GaussianNB()
{'PRECISION': [0.5522, 0.0478], 'RECALL': [0.4517, 0.0322], 'ACCURACY': [0.6036, 0.0501]}
training finished to KNeighborsClassifier()
{'PRECISION': [0.5441, 0.0181], 'RECALL': [0.505, 0.0241], 'ACCURACY': [0.6178, 0.0511]}
training finished to RandomForestClassifier(n_jobs=-1)
{'PRECISION': [0.5364, 0.0142], 'RECALL': [0.5154, 0.0246], 'ACCURACY': [0.615, 0.0507]}
training finished to LogisticRegression(n_jobs=-1)
{'PRECISION': [0.4209, 0.0917], 'RECALL': [0.39, 0.0389], 'ACCURACY': [0.5656, 0.0678]}


In [13]:
means_precision = []
means_recall = []
means_accuracy = []

stds_precision = []
stds_recall = []
stds_accuracy = []


baselines = list(report_baselines.keys())
for i in range(5):
    
    if i == 0:
        
        means_precision.append(model_report['PRECISION'][0])
        means_recall.append(model_report['RECALL'][0])
        means_accuracy.append(model_report['ACCURACY'][0])

        stds_precision.append(model_report['PRECISION'][1])
        stds_recall.append(model_report['RECALL'][1])
        stds_accuracy.append(model_report['ACCURACY'][1])
    
    else:
        
        model_report = report_baselines[baselines[i-1]][0]
        
        means_precision.append(model_report['PRECISION'][0])
        means_recall.append(model_report['RECALL'][0])
        means_accuracy.append(model_report['ACCURACY'][0])

        stds_precision.append(model_report['PRECISION'][1])
        stds_recall.append(model_report['RECALL'][1])
        stds_accuracy.append(model_report['ACCURACY'][1])
        

In [17]:
df = pd.DataFrame({"model": ["BDLM"] + baselines, "means_precision":means_precision, "stds_precision":stds_precision,
                  "means_recall":means_recall, "stds_recall":stds_recall,"means_accuracy": means_accuracy, "stds_accuracy":stds_accuracy})
df.to_csv("user_specific_pamap2.csv", index=False)