### Imports


In [1]:
import os
import json
from envyaml import EnvYAML
import pandas as pd
import tsfel
import datetime
import numpy as np


### Accessing datasets


In [2]:
def read_csv(path):
    return pd.read_csv(path)


def to_csv(df, path):
    df.to_csv(path, index=False)


E4 dataset directories


In [3]:
VAR_ENV = EnvYAML("../../../../env.yaml")
path_list = [os.path.join("../../../..", VAR_ENV["dataset.path"], VAR_ENV["dataset.version"],
                          f"P{i:02d}", "EARBUDS") for i in range(1, VAR_ENV["dataset.participants"] + 1)]


### Dataframes


In [None]:
study_information = pd.read_csv(os.path.join("../../..", VAR_ENV["dataset.path"], VAR_ENV["dataset.version"],"Study_Information.csv"))
moving_window_size = 32 * 60
for path in path_list:
    # dataset manipulation
    df_temp = pd.read_csv(os.path.join(path, 'ACC.csv'))
    initial_time = float(df_temp.columns[0])
    frequency = float(df_temp.iloc[0, 0])
    df_temp = df_temp.drop([0])
    df_temp.columns = ["acc_x", "acc_y", "acc_z"]

    # adding timestamps
    timestamps = [datetime.datetime.fromtimestamp(initial_time+(i/frequency), datetime.timezone(datetime.timedelta(hours=0))) for i in range(len(df_temp))]
    df_temp.insert(0, "timestamp", timestamps, True)

    # remove rows until sessions starting according to study information
    breaking_index = 0
    for i in range (1,len(df_temp)):
        if (datetime.datetime.fromisoformat(str(df_temp["timestamp"][i]).split('.')[0].split('+')[0]) < datetime.datetime.fromisoformat(study_information["Start_Sit"][path_list.index(path)])):
            breaking_index += 1
        else:
            break
    df_temp.drop(df_temp.index[:breaking_index], inplace=True)

    # transform (1/64)g to g
    df_temp["acc_x"] = df_temp["acc_x"]/64
    df_temp["acc_y"] = df_temp["acc_y"]/64
    df_temp["acc_z"] = df_temp["acc_z"]/64

    # initialize new columns for central moving averages
    df_temp['acc_x_avg'] = 0
    df_temp['acc_y_avg'] = 0
    df_temp['acc_z_avg'] = 0

    # calculate moving windows
    for i in range(0, len(df_temp)-moving_window_size):
        df_temp.loc[i+moving_window_size//2, 'acc_x_avg'] = df_temp.iloc[i:i+moving_window_size]['acc_x'].sum()/moving_window_size
        df_temp.loc[i+moving_window_size//2, 'acc_y_avg'] = df_temp.iloc[i:i+moving_window_size]['acc_y'].sum()/moving_window_size
        df_temp.loc[i+moving_window_size//2, 'acc_z_avg'] = df_temp.iloc[i:i+moving_window_size]['acc_z'].sum()/moving_window_size

    # drop previous columns
    df_temp.drop('acc_x', inplace=True, axis=1)
    df_temp.drop('acc_y', inplace=True, axis=1)
    df_temp.drop('acc_z', inplace=True, axis=1)

    # saving
    to_csv(df_temp, os.path.join("../../..",
           VAR_ENV["dataset.path"], VAR_ENV["dataset.version"], f"P{path_list.index(path)+1:02d}", "E4", "modified_ACC.csv"))


### Remove unnecessary rows

In [None]:
for path in path_list:
    df_temp = pd.read_csv(os.path.join(path, 'modified_ACC.csv'))
    df_temp.drop(df_temp.index[:moving_window_size//2-1], inplace=True)
    df_temp.drop(df_temp.index[len(df_temp)-moving_window_size//2-1:], inplace=True)

    to_csv(df_temp, os.path.join("../../..",
           VAR_ENV["dataset.path"], VAR_ENV["dataset.version"], f"P{path_list.index(path)+1:02d}", "E4", "modified_0_rows_removed_ACC.csv"))

1


KeyError: 1