# E4FileFormatter
The E4FileFormatter is for longitudinal studies with the wearable sensor the Empatica E4. It compiles all .csv of sensors of varying time lengths and merges all into a complete .csv for each sensor for the duration of the study.
***

##### **Input:** Unzipped files of raw .csv files downloaded from Empatica. (You only need to specify files)
##### **Output:** Properly formatted .csv files compiled from all recordings with correct datatimestamps
***

##### Format of input: 
Configuration of folders/files: 
  > Folder for each participant ->
  > Folder named Empatica ->
  > Downloaded all folders (originally zipped) containing csv files from Empatica session
            
***

**Check:** 
* Time Zone Correction- may need to change this dependent on time zone the data from the watch was uploaded via the E4 Portal

**Sources:**
* Empatica Timestamp Explanation: https://support.empatica.com/hc/en-us/articles/202800715-Session-start-time-format-and-synchronization-
* GitHub with helpful ideas on E4 Data here: https://github.com/Ev4ngelos/EmpaticaBiophysicalSync/blob/master/E4BioSync.py

***
***
### First, you need to Unzip files: 
Using Bash/Terminal:
* cd to directory then:
* find -name '*.zip' -exec sh -c 'unzip -d "${1%.*}" "$1"' _ {} \;

***

## User-defined Input:

In [None]:
theid = '00000' #This is the subject ID number (name of file)
filesource = 'C:/Users/X/X/' #This is the source folder that contains all of your participant folders

***

In [None]:
import csv
import datetime
import math
import time
import collections
from collections import OrderedDict
import os.path
import pandas as pd
import glob

## Import & Format EDA, TEMP, HR, BVP
Functions: 
* readFile() - reads file into dictionary and corrects for time zone
* formatFile() - formats into dataframe with time as timestamp using datetime (ISO8601), formats sensor values to float, writes to .csv
* importandexport() - finds all files of sensor type in participant folder and runs formatfile for each input file

In [None]:
def readFile(file):
    dict = OrderedDict()

    with open(file, 'rt') as csvfile:
        reader = csv.reader(csvfile, delimiter='\n')
        i =0;
        for row in reader:
            if(i==0):
                timestamp=row[0]
                #print(timestamp)
                timestamp=float(timestamp)-3600*4 #Time Zone Correction - will need to change depending on time zone!
                #print(timestamp)
            elif(i==1):
                hertz = float(row[0])
            elif(i==2):
                dict[timestamp]=row[0]
            else:
                timestamp = timestamp + 1.0/hertz
                dict[timestamp]=row[0]
            i = i+1.0
    return dict

In [None]:
def formatfile(file, idd, typed):
    EDA = {}
    EDA = readFile(file = file)
    EDA =  {datetime.datetime.utcfromtimestamp(k).strftime('%Y-%m-%d %H:%M:%S.%f'): v for k, v in EDA.items()}
    EDAdf = pd.DataFrame.from_dict(EDA, orient='index', columns=['EDA'])
    EDAdf['EDA'] = EDAdf['EDA'].astype(float)
    
    EDAdf['Datetime'] =EDAdf.index
    EDAdf['Datetime'] = pd.to_datetime(EDAdf['Datetime'], format='%Y-%m-%dT%H:%M:%S.%f')
    EDAdf  = EDAdf.set_index('Datetime')
    
    out_filename = (filesource + idd + '/' + typed + '.csv')
    EDAdf.to_csv(out_filename, mode='a', header=False)
    print('Done')

In [None]:
def importandexport(idd, typed):
    configfiles = glob.glob((filesource + idd + '/Empatica/*/' + typed + '.csv'))
    print(configfiles)
    
    [formatfile(file, idd, typed) for file in configfiles]
    print(('Completed Import and Export of:' + typed))

In [None]:
listtyped = ['EDA','TEMP', 'HR','BVP'] 
[importandexport(theid, typed) for typed in listtyped]

## Import & Format ACC
Functions: 
* processAcceleration() - converts 3 axis to float values
* readAccFile() - reads file into dictionary and corrects for time zone
* formatAccFile() - formats into dataframe with time as timestamp using datetime (ISO8601), formats sensor values to float, writes to .csv
* importandexport() - finds all files of sensor type 'ACC' in participant folder and runs formatfile for each input file

In [None]:
def processAcceleration(x,y,z):
    x = float(x)
    y = float(y)
    z = float(z) 
    return {'x':x,'y':y,'z':z}

In [None]:
def readAccFile(file):
    dict = OrderedDict()
    
    with open(file, 'rt') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        i=0;
        for row in reader:
            if(i == 0):
                timestamp = float(row[0])-3600*4 #Time Zone Correction
            elif(i == 1):    
                hertz=float(row[0])
            elif(i == 2):
                dict[timestamp]= processAcceleration(row[0],row[1],row[2])
            else:
                timestamp = timestamp + 1.0/hertz 
                dict[timestamp] = processAcceleration(row[0],row[1],row[2])
            i = i + 1
        return dict

In [None]:
def formatAccfile(file, idd, typed):
    EDA = {}
    EDA = readAccFile(file = file)
    EDA =  {datetime.datetime.utcfromtimestamp(k).strftime('%Y-%m-%d %H:%M:%S.%f'): v for k, v in EDA.items()}
    EDAdf = pd.DataFrame.from_dict(EDA, orient='index', columns=['x', 'y', 'z'])
    
    EDAdf['x'] = EDAdf['x'].astype(float)
    EDAdf['y'] = EDAdf['y'].astype(float)
    EDAdf['z'] = EDAdf['z'].astype(float)
    
    EDAdf['Datetime'] =EDAdf.index
    EDAdf['Datetime'] = pd.to_datetime(EDAdf['Datetime'], format='%Y-%m-%dT%H:%M:%S.%f')
    EDAdf  = EDAdf.set_index('Datetime')
    
    out_filename = (filesource + idd + '/' + typed + '.csv')
    EDAdf.to_csv(out_filename, mode='a', header=False)
    print('Done')

In [None]:
def importandexportAcc(idd, typed):
    configfiles = glob.glob((filesource + idd + '/Empatica/*/' + typed + '.csv'))
    print(configfiles)
    
    [formatAccfile(file, idd, typed) for file in configfiles]
    print(('Completed Import and Export of:' + typed))

In [None]:
importandexportAcc(theid, 'ACC') 

## Import & Format IBI
Functions: 
* importIBI() - reads file into dataframe and corrects for time zone, formats time as timestamp using datetime (ISO8601), formats sensor values to float, writes to .csv
* importandexportIBI() - finds all files of sensor type 'IBI' in participant folder and runs importIBI() for each input file

In [None]:
def importIBI(file, idd, typed):
    IBI = pd.read_csv(file, header=None)
    timestampstart = float(IBI[0][0])-3600*4
    IBI[0] = (IBI[0][1:len(IBI)]).astype(float)+timestampstart
    IBI = IBI.drop([0])
    IBI[0] = IBI[0].apply(lambda x: datetime.datetime.utcfromtimestamp(x).strftime('%Y-%m-%d %H:%M:%S.%f'))
    IBI  = IBI.set_index(0)
    
    out_filename = (filesource + idd + '/' + typed + '.csv')
    IBI.to_csv(out_filename, mode='a', header=False)
    print('Done')

In [None]:
def importandexportIBI(idd, typed):
    configfiles = glob.glob((filesource + idd + '/Empatica/*/' + typed + '.csv'))
    print(configfiles)
    
    [importIBI(file, idd, typed) for file in configfiles]
    print(('Completed Import and Export of:' + typed))

In [None]:
importandexportIBI(theid, 'IBI') 