### Imports

In [1]:
import os, glob
import pandas as pd

### Google Drive pre-requisite

In [2]:
# Only works within Google Colab
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Generating list of HRV files to download

*   HRV simple sensor 1
*   HRV simple sensor 2
*   HRV garmin sensor

In [3]:
# Change of directory to be in the folder where HRV files are
path_to_hrv_files = '/content/drive/MyDrive/core simulateur/data/'
os.chdir(path_to_hrv_files)

In [4]:
# Create a list that contains all of the files present in the "data" directory ending with ".txt"
list_hrv_files = []
for file in glob.glob("*.txt"):
    list_hrv_files.append(file)

In [5]:
# Create two lists from the list of HRV files - 1 for simple sensor & 1 for garmin sensor
list_hrv_files_simple_sensor = [elem for elem in list_hrv_files if elem.find('garmin') == -1]
list_hrv_files_garmin = [elem for elem in list_hrv_files if elem.find('garmin') != -1]

In [6]:
# Create a list that would be needed to split the simple sensor list into two lists
# It is due to differences in the output of simple sensor files
key_list = []
for key in range(25,31):
  key_list.append(str(key) + '_10')

In [7]:
# Create the two lists from the simple sensor list
list_hrv_files_simple_sensor_1 = []
list_hrv_files_simple_sensor_2 = []
for file in list_hrv_files_simple_sensor:
  if file[:5] in key_list:
    list_hrv_files_simple_sensor_2.append(file)
  else:
    list_hrv_files_simple_sensor_1.append(file)

In [8]:
# Create list of Headers for each type of HRV files
headers_list_simple_sensor = ['Timestamp1', 'Timestamp2', 'Device_id', 'TBD', 'TBD2']
headers_list_garmin_sensor = ['Timestamp1', 'Timestamp2', 'TBD2']
headers_list_simple_sensor_2 = ['Timestamp1', 'Device_id', 'TBD', 'TBD2']

In [9]:
# Create a dictionary to match each HRV file with its associated Headers
dict_hrv_files = {
    'sensors_list':[list_hrv_files_simple_sensor_1,list_hrv_files_simple_sensor_2,list_hrv_files_garmin],
    'headers_type':[headers_list_simple_sensor, headers_list_simple_sensor_2, headers_list_garmin_sensor]
}

In [10]:
# Create the list of dataframes of all the HRV files
def list_dataframes(path_to_hrv_files, dict_hrv_files):
  df_list = []
  index = 0

  for sensor_list in dict_hrv_files['sensors_list']:
    for sensor in sensor_list:
      df = pd.read_csv(path_to_hrv_files + sensor, sep=";", header=None, names=dict_hrv_files['headers_type'][index])
      df['file_name'] = sensor
      if sensor.find('garmin') != -1:
        df['Garmin'] = 1
      else:
        df['Garmin'] = 0
      df_list.append(df)
    index += 1
  
  return df_list

In [11]:
dataframes = list_dataframes(path_to_hrv_files, dict_hrv_files)

### Combined all HRV dataframes into a main one (HRV_df)

In [12]:
# Create a main dataframe containing all HRV dataframes combined
def combine_df(df_list):
    for index, df in enumerate(df_list):
      if index == 0:
        HRV_df = df.copy()
      if index != 0:
          HRV_df = pd.concat([HRV_df,df], axis=0, ignore_index=True)
    return HRV_df

In [13]:
HRV_df = combine_df(dataframes)

### Formatting

In [14]:
# Format the Timestamp features
def timestamp_formatting(df, *args):
  for timestamp in args:
    df[timestamp] = df[timestamp].apply(lambda x: pd.Timestamp(x, unit="ms"))
  return df

In [15]:
# Allow to drop useless columns
def drop_columns(df, *args):
  for column in args:
    df.drop(column, axis=1, inplace=True)
  return df

In [16]:
HRV_df = timestamp_formatting(HRV_df, 'Timestamp1', 'Timestamp2')
HRV_df = drop_columns(HRV_df, 'Device_id')

In [17]:
HRV_df

Unnamed: 0,Timestamp1,Timestamp2,TBD,TBD2,file_name,Garmin
0,2021-11-05 09:50:25.764,2021-11-05 09:50:26.650,57.0,[],05_11_2021_10_44 ed2.txt,0
1,2021-11-05 09:50:25.794,2021-11-05 09:50:26.681,70.0,[2272],05_11_2021_10_44 ed2.txt,0
2,2021-11-05 09:50:25.996,2021-11-05 09:50:26.881,56.0,[],05_11_2021_10_44 ed2.txt,0
3,2021-11-05 09:50:26.475,2021-11-05 09:50:27.367,56.0,[],05_11_2021_10_44 ed2.txt,0
4,2021-11-05 09:50:26.602,2021-11-05 09:50:27.499,70.0,[],05_11_2021_10_44 ed2.txt,0
...,...,...,...,...,...,...
1363841,2021-11-23 14:27:02.968,2021-11-23 14:27:01.824,,830,22_11_2021_15_38_garmin eb0.txt,1
1363842,2021-11-23 14:27:02.976,2021-11-23 14:27:01.833,,830,22_11_2021_15_38_garmin eb0.txt,1
1363843,2021-11-23 14:27:02.983,2021-11-23 14:27:01.840,,830,22_11_2021_15_38_garmin eb0.txt,1
1363844,2021-11-23 14:27:02.990,2021-11-23 14:27:01.846,,830,22_11_2021_15_38_garmin eb0.txt,1
