In [1]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import bootstrap

In [2]:
# Test Dateset Preparation

test_data_folder = "./data/AReM/"
test_df_list = []


for dir_name in os.listdir(test_data_folder):
    activity_path = os.path.join(test_data_folder, dir_name)

    if os.path.isdir(activity_path):
        if dir_name in ["bending1", "bending2"]:
            selected_files = ["dataset1.csv", "dataset2.csv"]
        else:
            selected_files = ["dataset1.csv", "dataset2.csv", "dataset3.csv"]
            
        for file_name in selected_files:
            file_path = os.path.join(activity_path, file_name)

            if os.path.exists(file_path):
                df = pd.read_csv(file_path, delimiter=",", skiprows=4)
                df["Activity"] = dir_name
                test_df_list.append(df)


if test_df_list:
    test_df = pd.concat(test_df_list, ignore_index=False, axis=0)
    test_df.reset_index(drop=True, inplace=True)
    test_df = test_df.rename(columns = {'# Columns: time':'Time'})

test_df.head()

Unnamed: 0,Time,avg_rss12,var_rss12,avg_rss13,var_rss13,avg_rss23,var_rss23,Activity
0,0,39.25,0.43,22.75,0.43,33.75,1.3,bending1
1,250,39.25,0.43,23.0,0.0,33.0,0.0,bending1
2,500,39.25,0.43,23.25,0.43,33.0,0.0,bending1
3,750,39.5,0.5,23.0,0.71,33.0,0.0,bending1
4,1000,39.5,0.5,24.0,0.0,33.0,0.0,bending1


In [4]:
#training dataset

train_data_folder = "./data/AReM/"
train_df_list = []

for dir_name in os.listdir(train_data_folder):
    activity_path = os.path.join(train_data_folder, dir_name)

    if os.path.isdir(activity_path):
        
        if dir_name in ["bending1", "bending2"]:
            test_files = {"dataset1.csv", "dataset2.csv"}
        else:
            test_files = {"dataset1.csv", "dataset2.csv", "dataset3.csv"}

        train_files = [f for f in os.listdir(activity_path) if f.endswith('.csv') and f not in test_files]

        for file_name in train_files:
            file_path = os.path.join(activity_path, file_name)
            if os.path.exists(file_path):
                df = pd.read_csv(file_path, delimiter=",", skiprows=4)
                df["Activity"] = dir_name
                train_df_list.append(df)


if train_df_list:
    train_df = pd.concat(train_df_list, ignore_index=False, axis=0)
    train_df.reset_index(drop=True, inplace=True)
    train_df = train_df.rename(columns = {'# Columns: time':'Time'})


train_df.head()

Unnamed: 0,Time,avg_rss12,var_rss12,avg_rss13,var_rss13,avg_rss23,var_rss23,Activity
0,0,42.0,0.0,18.5,0.5,12.0,0.0,bending1
1,250,42.0,0.0,18.0,0.0,11.33,0.94,bending1
2,500,42.75,0.43,16.75,1.79,18.25,0.43,bending1
3,750,42.5,0.5,16.75,0.83,19.0,1.22,bending1
4,1000,43.0,0.82,16.25,0.83,18.0,0.0,bending1


In [5]:
data_folder = "./data/AReM/"
df_list = []

for dir_name in os.listdir(data_folder):
    activity_path = os.path.join(data_folder, dir_name)

    if os.path.isdir(activity_path):
        for csv_file in os.listdir(activity_path):
            if csv_file.endswith('.csv'):
                data_path_csv = os.path.join(activity_path, csv_file)
                df = pd.read_csv(data_path_csv, delimiter=",", skiprows=4)
                df_list.append(df)

len(df_list)

88

In [6]:
feature_list = []
for i, df in enumerate(df_list):  
    features = {"Instance": i + 1}
    
    for col_i, col in enumerate(df.columns[1:], start=1):
        features[f"min_{col_i}"] = np.min(df[col])
        features[f"max_{col_i}"] = np.max(df[col])
        features[f"mean_{col_i}"] = np.mean(df[col])
        features[f"median_{col_i}"] = np.median(df[col])
        features[f"STD_{col_i}"] = np.std(df[col])
        features[f"q1_{col_i}"] = np.percentile(df[col], 25)
        features[f"q3_{col_i}"] = np.percentile(df[col], 75)
    
    feature_list.append(features)

feature_df = pd.DataFrame(feature_list)
feature_df = feature_df.drop(columns=['Instance'])
print(feature_df.shape)
feature_df.head()

(88, 42)


Unnamed: 0,min_1,max_1,mean_1,median_1,STD_1,q1_1,q3_1,min_2,max_2,mean_2,...,STD_5,q1_5,q3_5,min_6,max_6,mean_6,median_6,STD_6,q1_6,q3_6
0,36.25,48.0,43.969125,44.5,1.616677,43.31,44.67,0.0,1.5,0.413125,...,3.314843,20.5,23.75,0.0,2.96,0.555312,0.49,0.487318,0.0,0.83
1,37.0,48.0,43.454958,43.25,1.384653,42.5,45.0,0.0,1.58,0.378083,...,2.486268,22.25,24.0,0.0,5.26,0.679646,0.5,0.621885,0.43,0.87
2,33.0,47.75,42.179812,43.5,3.66684,39.15,45.0,0.0,3.0,0.696042,...,3.845436,30.4575,36.33,0.0,2.18,0.613521,0.5,0.523771,0.0,1.0
3,33.0,45.75,41.678063,41.75,2.241152,41.33,42.75,0.0,2.83,0.535979,...,2.408514,28.4575,31.25,0.0,1.79,0.383292,0.43,0.388759,0.0,0.5
4,37.25,45.0,40.624792,40.5,1.475428,39.25,42.0,0.0,1.3,0.358604,...,2.186168,33.0,36.0,0.0,1.92,0.570583,0.43,0.582308,0.0,1.3


In [7]:
std_feature_df = feature_df.describe()
std_feature_df = std_feature_df.loc[['std']].T
std_feature_df

Unnamed: 0,std
min_1,9.569975
max_1,4.394362
mean_1,5.335718
median_1,5.440054
STD_1,1.770306
q1_1,6.15359
q3_1,5.138925
min_2,0.0
max_2,5.062729
mean_2,1.574164


In [8]:
n_trials = 1000
ci_dict = {}
for col in feature_df.columns:
    col_values = feature_df[col].dropna().values
    bootstrap_std = bootstrap((feature_df[col].values,), np.std, confidence_level=0.9, n_resamples=n_trials, method='percentile')
    ci_dict[col] = (bootstrap_std.confidence_interval.low, bootstrap_std.confidence_interval.high)

ci_df = pd.DataFrame(ci_dict, index=['Lower Bound', 'Upper Bound']).T
std_feature_df.join(ci_df)

Unnamed: 0,std,Lower Bound,Upper Bound
min_1,9.569975,8.222084,10.710577
max_1,4.394362,3.325934,5.244509
mean_1,5.335718,4.674034,5.847397
median_1,5.440054,4.77401,6.034362
STD_1,1.770306,1.564938,1.936584
q1_1,6.15359,5.56069,6.612223
q3_1,5.138925,4.307651,5.82714
min_2,0.0,0.0,0.0
max_2,5.062729,4.596498,5.370562
mean_2,1.574164,1.388451,1.699318
