## Takes 2 whole eSense sensor files and cuts them according to timestamps

This program takes two eSense sensor excel files recorded during one study (adapted to iterativly go over several studies) and extracts the sensor readings of single trials in new excel files. 

notes: 
- The script is adapted to work for 2 different sets of studies with slightly different setups. 
- The timestamps are gathered from the audio recorded by the eSense devices (after synchronizing them with the sensor readings). [see SynchronySplit.ipynb] 
- Cutting the data might render the gyroscope readings unusable depending on application. 

In [1]:
# imports

import matplotlib.pyplot as plt
import os 
import numpy as np
import pandas as pd
from datetime import datetime

In [2]:
# methods

# Converts a datetime and converts their min, sec and microsec values into milliseconds
# takes: Datetime and NaT
# returns: ms as integer if time was given, 0 if NaT was given
def to_integer(dt_time): 
    if pd.isnull(dt_time): 
        return 0
    return int((dt_time.minute*60000) + (dt_time.second * 1000) + (dt_time.microsecond / 1000))

# Creates an excel file name from a number. 
# takes: integer (should be between 0 and 99)
# returns: string containing an excel file name with leading 0 if input was below 10
def create_file_name(num):
    file_ending = '.xls'
    if num < 10: 
        return '0' + str(num) + file_ending
    else: 
        return str(num) + file_ending

In [13]:
# change value here depending on which study set should be used: 
# 1: for first study set 
# 2: for second study set
# error otherwise 
study_set = 1
process_continue = True

if study_set == 1: 
    start_num = 1
    end_num = 4
    folder_name = '#study_23_06_20'
elif study_set == 2: 
    start_num = 5
    end_num = 9
    folder_name = '#study_27_07_20'
else: 
    print('Wrong Study Number! Program terminated')
    process_continue = False

if process_continue:
    for j in range(start_num, end_num):
        # file paths 
        study_name = '00' + str(j)
        data_dir = os.path.join('C:\\', 'Users', 'Sabrina', 'Documents', 'master_thesis', folder_name, study_name) 
        timeline_dir = os.path.join('C:\\', 'Users', 'Sabrina', 'Documents', 'master_thesis', 'timeline')
        saving_dir = os.path.join('C:\\', 'Users', 'Sabrina', 'Documents', 'master_thesis', folder_name, study_name)

        # adapt path and variable name
        open_name_1 = os.path.join(data_dir, 'eSense_0237', 'synchronized_whole.xls')
        open_name_2 = os.path.join(data_dir, 'eSense_0308', 'synchronized_whole.xls')

        # open sensor data files
        df_1 = pd.read_excel(io=open_name_1, sheet_name="Experiment")
        df_2 = pd.read_excel(io=open_name_2, sheet_name="Experiment")


        # open timeline data file
        timeline_name = study_name + '_timeline.xlsx'
        df_tl = pd.read_excel(io=os.path.join(timeline_dir, timeline_name))

        # make changes to timeline data:
        # adapt index number to match trial numbers later 
        df_tl.index = df_tl.index +1
        # trim dataframe
        if study_set == 1: 
            # for studies 1-3: 
            df_tl = df_tl.loc[1:20, ['start', 'end']]
        else: 
            # for studies 5-end:
            df_tl = df_tl.loc[1:14, ['start', 'end']]


        # transform timeline column values to datetime objects
        df_tl['start'] = pd.to_datetime(df_tl['start'].astype(str), format='%M.%S%f', errors='coerce')
        df_tl['end'] = pd.to_datetime(df_tl['end'].astype(str), format='%M.%S%f', errors='coerce')



        # convert datetime minute, sec and microsec to millisec      
        start_list = []
        end_list = []
        for index, row in df_tl.iterrows():
            if not pd.isnull(df_tl.at[index, 'start']):
                # convert start to ms
                start_list.append(to_integer(row['start']))
                # convert end to ms
                end_list.append(to_integer(row['end']))
            else: 
                start_list.append(0)
                end_list.append(0)

        # add millisec values to timeline dataframe
        df_tl['start_ms'] = start_list
        df_tl['end_ms'] = end_list



        # extract single trials from sensor data files by taking 
        # timestamps from timeline data
        # note: if timeline data is zero, no trial has been recorded. 
        for i in range(1, len(df_tl.index)+1):
            if df_tl.at[i, 'start_ms'] == 0:
                # trial was not recorded
                # create empty files 
                column_titles_1 = df_1.columns.values
                df_trial_1 = pd.DataFrame(columns=column_titles_1)
                save_name_1 = os.path.join(saving_dir, 'eSense_0237', 'trials', create_file_name(i))
                df_trial_1.to_excel(save_name_1, sheet_name="Experiment", index=False)

                column_titles_2 = df_2.columns.values
                df_trial_2 = pd.DataFrame(columns=column_titles_2)
                save_name_2 = os.path.join(saving_dir, 'eSense_0308', 'trials', create_file_name(i))
                df_trial_2.to_excel(save_name_2, sheet_name="Experiment", index=False)
            else:
                # find corresponding indices to start time in sensor data files 
                start_1 = (df_1['time']-df_tl.at[i, 'start_ms']).apply(abs).idxmin()
                start_2 = (df_2['time']-df_tl.at[i, 'start_ms']).apply(abs).idxmin()

                # find corresponding indices to end time in sensor data files
                end_1 = (df_1['time']-df_tl.at[i, 'end_ms']).apply(abs).idxmin()
                end_2 = (df_2['time']-df_tl.at[i, 'end_ms']).apply(abs).idxmin()

                # extract and save trial sensor data for 1st eSense
                df_trial_1 = df_1.loc[start_1:end_1] 
                save_name_1 = os.path.join(saving_dir, 'eSense_0237', 'trials', create_file_name(i))
                df_trial_1.to_excel(save_name_1, sheet_name="Experiment", index=False)

                # extract and save trial sensor data for 2nd eSense
                df_trial_2 = df_2.loc[start_2:end_2]
                save_name_2 = os.path.join(saving_dir, 'eSense_0308', 'trials', create_file_name(i))
                df_trial_2.to_excel(save_name_2, sheet_name="Experiment", index=False)
    print('Process finished successfully.')

Process finished successfully.
