In [1]:
import numpy as np
import matplotlib.pylab as plt
import pandas as pd
import gc
import warnings
from sklearn.metrics import classification_report,f1_score, precision_score, recall_score, accuracy_score
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_parquet("PAMAP2.parquet")

In [3]:
def return_train_test(df_train,df_test):
        
    def return_data_merged(data):
            
            low_intense_acts = [1,2,3,4,17]

            middle_intense_acts = [13,16,7] 

            high_intense_acts = [5,6,24,12]

            df_low_intense_acts = data[data["1"].isin(low_intense_acts)][["3D_acc_norm","3D_gyr_norm"]]
            df_middle_intense_acts = data[data["1"].isin(middle_intense_acts)][["3D_acc_norm","3D_gyr_norm"]]
            df_high_intense_acts = data[data["1"].isin(high_intense_acts)][["3D_acc_norm","3D_gyr_norm"]]

            data = [df_low_intense_acts, df_middle_intense_acts, df_high_intense_acts]

            return data
                


        
    X_train = return_data_merged(df_train)

    X_test = return_data_merged(df_test)

    y_train = []

    y_test = []

    for intensity in range(3):

        y_train.append([intensity]*len(X_train[intensity]))
        y_test.append([intensity]*len(X_test[intensity]))
    
    
        
    y_train = np.concatenate(y_train)
    y_test = np.concatenate(y_test)
    
    return pd.concat(X_train), pd.concat(X_test), y_train, y_test




def fit_baseline(model):

    macro_precision = []
    macro_recall = []
    accuracy = []
    model_report = {}

    subjects = [i for i in range(1,9)]
    for number in subjects:
        
        baseline = model

        df_train = df[df.subject != number]
        df_test = df[df.subject == number]
        

        X_train, X_test, y_train, y_test =  return_train_test(df_train,df_test)


        baseline.fit(X_train,y_train)
        y_hat = baseline.predict(X_test)

        precision = precision_score(y_test,y_hat,average = "macro")
        recall = recall_score(y_test,y_hat,average = "macro")
        accuracy_ = accuracy_score(y_test,y_hat)

        macro_precision.append(precision)
        macro_recall.append(recall)
        accuracy.append(accuracy_)



    macro_precision = np.array(macro_precision)    
    macro_recall = np.array(macro_recall)    
    accuracy = np.array(accuracy) 
    
    model_report["PRECISION"] = [round(macro_precision.mean(), 4),round(macro_precision.std(), 4)]
    model_report["RECALL"] = [round(macro_recall.mean(), 4),round(macro_recall.std(), 4)]
    model_report["ACCURACY"] = [round(accuracy.mean(), 4),round(accuracy.std(), 4)]

    
    return model_report


In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.naive_bayes import GaussianNB

baselines = [GaussianNB(),RFC(n_jobs=-1),LogisticRegression(n_jobs=-1),KNN()]

report_baselines = {}



import gc
for baseline in baselines:
    
    model = baseline
    
    result = fit_baseline(model)
    
    report_baselines[f"{baseline}"] = [result]
    
    print(f"training finished to {baseline}")
    print(result)
    print(gc.collect())

training finished to GaussianNB()
{'PRECISION': [0.5208, 0.0761], 'RECALL': [0.4484, 0.0376], 'ACCURACY': [0.5951, 0.0365]}
0
training finished to RandomForestClassifier(n_jobs=-1)
{'PRECISION': [0.4936, 0.0254], 'RECALL': [0.4847, 0.0164], 'ACCURACY': [0.576, 0.0245]}
130
training finished to LogisticRegression(n_jobs=-1)
{'PRECISION': [0.4286, 0.0798], 'RECALL': [0.3839, 0.0322], 'ACCURACY': [0.5519, 0.0432]}
95
training finished to KNeighborsClassifier()
{'PRECISION': [0.4962, 0.0287], 'RECALL': [0.4721, 0.0176], 'ACCURACY': [0.5808, 0.0313]}
182


In [6]:
means_precision = []
means_recall = []
means_accuracy = []

stds_precision = []
stds_recall = []
stds_accuracy = []


baselines = list(report_baselines.keys())
for i in range(4):
    

        
        model_report = report_baselines[baselines[i-1]][0]
        
        means_precision.append(model_report['PRECISION'][0])
        means_recall.append(model_report['RECALL'][0])
        means_accuracy.append(model_report['ACCURACY'][0])

        stds_precision.append(model_report['PRECISION'][1])
        stds_recall.append(model_report['RECALL'][1])
        stds_accuracy.append(model_report['ACCURACY'][1])
        

In [8]:
df = pd.DataFrame({"model": baselines, "means_precision":means_precision, "stds_precision":stds_precision,
                  "means_recall":means_recall, "stds_recall":stds_recall,"means_accuracy": means_accuracy, "stds_accuracy":stds_accuracy})
df.to_csv("banchmarks.csv", index=False)