### Imports


In [2]:
import os
import json
from envyaml import EnvYAML
import pandas as pd
import tsfel
import datetime
import numpy as np
import glob

### Accessing datasets


In [3]:
def read_csv(path):
    return pd.read_csv(path)


def to_csv(df, path):
    df.to_csv(path, index=False)


E4 dataset directories


In [4]:
VAR_ENV = EnvYAML("../../../../env.yaml")
path_list = [os.path.join("../../../..", VAR_ENV["dataset.path"], VAR_ENV["dataset.version"],
                          f"P{i:02d}", "EARBUDS") for i in range(1, VAR_ENV["dataset.participants"] + 1)]


### Get all CSV files to same type

In [25]:
study_information = pd.read_csv(os.path.join("../../../..", VAR_ENV["dataset.path"], VAR_ENV["dataset.version"],"Study_Information.csv"))

for path in path_list:

    # dataset manipulation

    removing_files = glob.glob(os.path.join(path, '*_initial.csv'), recursive=True)

    for file_path in removing_files:
        os.remove(file_path)

    file_path_list = glob.glob(os.path.join(path, f'P{path_list.index(path)+1:02d}-imu-*.csv'), recursive=True)

    for file_path in file_path_list:
        print(file_path)

        # deciding the side of the earbud
        side = 'right' if 'right' in file_path else 'left'

        # read file
        df_temp = pd.read_csv(file_path)

        if(path_list.index(path) == 0 or path_list.index(path) == 5):

            # get start time from the first row as a unix millisecond timestamp
            starting_time = datetime.datetime.strptime(df_temp.loc[0, 'timestamp'], "%Y-%m-%d %H:%M:%S").timestamp() * 1000

            # drop first row
            df_temp.drop(df_temp.index[0], inplace=True)

            # change data type of the timestamp column to numeric
            df_temp['timestamp'] = pd.to_numeric(df_temp['timestamp'])

            # add start time to every other row's timestamp
            df_temp['timestamp'] +=  starting_time

            # convert to datetime
            timestamps = [datetime.datetime.fromtimestamp(df_temp.loc[i+1, 'timestamp']/1000) for i in range(len(df_temp))]

        else:
            # convert to datetime
            timestamps = [datetime.datetime.fromtimestamp(df_temp.loc[i, 'timestamp']/1000, datetime.timezone(datetime.timedelta(hours=0))) for i in range(len(df_temp))]
        df_temp.drop('timestamp', axis=1, inplace=True)
        df_temp.insert(0, 'timestamp', timestamps, True)

        # change data type of the timestamp column to numeric
        df_temp['timestamp'] = pd.to_datetime(df_temp['timestamp'])

        # sort whole dataframe by time stamp
        df_temp = df_temp.sort_values(by=['timestamp'])

        # remove rows until sessions starting according to study information
        breaking_index = 0
        for i in range (1,len(df_temp)):
            if (datetime.datetime.fromisoformat(str(df_temp["timestamp"][i]).split('.')[0].split('+')[0]) < datetime.datetime.fromisoformat(study_information["Start_Sit"][path_list.index(path)])):
                breaking_index += 1
            else:
                break
        df_temp.drop(df_temp.index[:breaking_index], inplace=True)
            
        # saving
        to_csv(df_temp, os.path.join("../../../..", VAR_ENV["dataset.path"], VAR_ENV["dataset.version"], f"P{path_list.index(path)+1:02d}", "EARBUDS", f'{file_path.split("/")[-1].split(".")[0]}_initial.csv'))

../../../../data/v1/P01/EARBUDS/P01-imu-left.csv


  df_temp = pd.read_csv(file_path)


../../../../data/v1/P01/EARBUDS/P01-imu-right.csv


  df_temp = pd.read_csv(file_path)


../../../../data/v1/P02/EARBUDS/P02-imu-right.csv
../../../../data/v1/P03/EARBUDS/P03-imu-right.csv
../../../../data/v1/P04/EARBUDS/P04-imu-right.csv
../../../../data/v1/P05/EARBUDS/P05-imu-right.csv
../../../../data/v1/P06/EARBUDS/P06-imu-right.csv


  df_temp = pd.read_csv(file_path)


../../../../data/v1/P07/EARBUDS/P07-imu-right.csv
../../../../data/v1/P07/EARBUDS/P07-imu-left.csv
../../../../data/v1/P08/EARBUDS/P08-imu-right.csv
../../../../data/v1/P08/EARBUDS/P08-imu-left.csv
../../../../data/v1/P09/EARBUDS/P09-imu-left.csv
../../../../data/v1/P09/EARBUDS/P09-imu-right.csv
../../../../data/v1/P10/EARBUDS/P10-imu-right.csv
../../../../data/v1/P10/EARBUDS/P10-imu-left.csv
../../../../data/v1/P11/EARBUDS/P11-imu-right.csv
../../../../data/v1/P11/EARBUDS/P11-imu-left.csv
../../../../data/v1/P12/EARBUDS/P12-imu-right.csv
../../../../data/v1/P12/EARBUDS/P12-imu-left.csv
../../../../data/v1/P13/EARBUDS/P13-imu-left.csv
../../../../data/v1/P13/EARBUDS/P13-imu-right.csv
../../../../data/v1/P14/EARBUDS/P14-imu-left.csv
../../../../data/v1/P14/EARBUDS/P14-imu-right.csv
../../../../data/v1/P15/EARBUDS/P15-imu-right.csv
../../../../data/v1/P15/EARBUDS/P15-imu-left.csv
../../../../data/v1/P16/EARBUDS/P16-imu-right.csv
../../../../data/v1/P16/EARBUDS/P16-imu-left.csv
../../../.

### Dataframes


In [32]:
g_multiplier = 0.061/1000
dps_multiplier = 17.5/1000

moving_window_size = 100 * 60
for path in path_list[:1]:

    # dataset manipulation

    file_path_list = glob.glob(os.path.join(path, '*_initial.csv'), recursive=True)

    for file_path in file_path_list:
        print(file_path)
        # deciding the side of the earbud
        side = 'right' if 'right' in file_path else 'left'

        # read file
        df_temp = pd.read_csv(file_path)

        # transform device accelerometer output to g
        df_temp["acc_x"] = df_temp["ax"]*g_multiplier
        df_temp["acc_y"] = df_temp["ay"]*g_multiplier
        df_temp["acc_z"] = df_temp["az"]*g_multiplier

        # transform device gyroscope output to dps
        df_temp["gyro_x"] = df_temp["gx"]*dps_multiplier
        df_temp["gyro_y"] = df_temp["gy"]*dps_multiplier
        df_temp["gyro_z"] = df_temp["gz"]*dps_multiplier

        # initialize new columns for central moving averages
        df_temp['acc_x_avg'] = 0
        df_temp['acc_y_avg'] = 0
        df_temp['acc_z_avg'] = 0
        # df_temp['gyro_x_avg'] = 0
        # df_temp['gyro_y_avg'] = 0
        # df_temp['gyro_z_avg'] = 0

        # calculate moving windows
        for i in range(0, len(df_temp)-moving_window_size):
            df_temp.loc[i+moving_window_size//2, 'acc_x_avg'] = df_temp.iloc[i:i+moving_window_size]['acc_x'].sum()/moving_window_size
            df_temp.loc[i+moving_window_size//2, 'acc_y_avg'] = df_temp.iloc[i:i+moving_window_size]['acc_y'].sum()/moving_window_size
            df_temp.loc[i+moving_window_size//2, 'acc_z_avg'] = df_temp.iloc[i:i+moving_window_size]['acc_z'].sum()/moving_window_size
            print(f'{i*100/len(df_temp)}%')

        # drop previous columns
        df_temp.drop('acc_x', inplace=True, axis=1)
        df_temp.drop('acc_y', inplace=True, axis=1)
        df_temp.drop('acc_z', inplace=True, axis=1)
        df_temp.drop('ax', inplace=True, axis=1)
        df_temp.drop('ay', inplace=True, axis=1)
        df_temp.drop('az', inplace=True, axis=1)
        df_temp.drop('gx', inplace=True, axis=1)
        df_temp.drop('gy', inplace=True, axis=1)
        df_temp.drop('gz', inplace=True, axis=1)

        print(df_temp.iloc[2999])

        # # saving
        to_csv(df_temp, os.path.join("../../../..",
               VAR_ENV["dataset.path"], VAR_ENV["dataset.version"], f"P{path_list.index(path)+1:02d}", "EARBUDS", f"modified_imu_{side}.csv"))


../../../../data/v1/P01/EARBUDS/P01-imu-left_initial.csv


KeyboardInterrupt: 

### Remove unnecessary rows

In [None]:
for path in path_list[:1]:
    # dataset manipulation

    file_path_list = glob.glob(os.path.join(path, '*_initial.csv'), recursive=True)

    for file_path in file_path_list:
        # deciding the side of the earbud
        side = 'right' if 'right' in file_path else 'left'

        df_temp = pd.read_csv(file_path)
        df_temp.drop(df_temp.index[:moving_window_size//2-1], inplace=True)
        df_temp.drop(df_temp.index[len(df_temp)-moving_window_size//2-1:], inplace=True)

        to_csv(df_temp, os.path.join("../../../..",
              VAR_ENV["dataset.path"], VAR_ENV["dataset.version"], f"P{path_list.index(path)+1:02d}", "EARBUDS", f"modified_0_rows_removed_IMU_ACC_{side}.csv"))

1


KeyError: 1