In [40]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import *
import datetime 
from pytz import timezone
import os
import sys
import json
import rowingdata 
import mne
import re

## COMMON

In [42]:
def processed_output(user_input, watch_choice, readcsv,
                    process_df,get_filenames, preprocess_empatica, add_time_empatica, output):    
    watch_choice = watch_choice.lower()
    if 'apple' in watch_choice:
        apple(readcsv, user_input, process_df, watch_choice)
    elif 'fitbit' in watch_choice:
        fitbit(readcsv, user_input, process_df, output, watch_choice)
    elif 'garmin' in watch_choice:
        garmin(user_input, process_df, output, watch_choice)
    elif 'miband' in watch_choice:
        miband(user_input, process_df, output, watch_choice)
    elif 'ecg' in watch_choice:
        ecg(user_input, process_df, output, watch_choice)
    elif 'biovotion' in watch_choice:
        device_id = input("Enter Device ID: ")
        biovotion(user_input, device_id, get_filenames, output, watch_choice)
    elif 'empatica' in watch_choice:
        empatica(user_input, get_filenames, preprocess_empatica, add_time_empatica, output, watch_choice)

In [3]:
def readcsv(user_input):
    if 'apple' in watch_choice:
        df = pd.read_csv(user_input, header = None)
    else:
        df = pd.read_csv(user_input)
    return df

In [16]:
def apple(readcsv, user_input, process_df, watch_choice):
    
    def main(df, watch_choice):
        dictdf, df = dict_df(df)
        start_time = dictdf['Workout date'][0]
        start_time = datetime.datetime.strptime(start_time, ' %Y-%m-%d %H:%M:%S ')
        df = pre_process_apple(df)
        df = process_df(df, watch_choice)
        df = add_time(df, start_time)
        return dictdf, df

    def dict_df(df):
        for i in range(0,len(df)):
            in_strip = df[df[0].str.contains("Time")].index[0]
            dictdf = df.loc[:(in_strip-1), :]
            dictdf = dictdf.set_index(0).T.to_dict('list')
            df = df.loc[in_strip:, :]
            return dictdf, df
    
    def pre_process_apple(df):
        df = df.reset_index()
        df = df.drop('index', axis = 1)
        new_header = df.iloc[0]
        df = df[1:]
        df.columns = new_header
        return df
    
    def add_time(df, start_time):
        delta = []
        df['Actual_time'] = 'NaN'
        for i in range(len(df)):
            df.iloc[i]['Time_(seconds)'] = df.iloc[i]['Time_(seconds)'].replace(" ", "")
            delta_sec = float(df.iloc[i]['Time_(seconds)'])
            delta_time = timedelta(seconds = delta_sec)
            delta.append(delta_time)
            df['Actual_time'].iloc[i] = start_time + delta[i]
        return df
    
    def rename_cols(df):
        df = df.rename(columns ={'Time_(seconds)' : 'Elapse_time_(sec)'})
        return df
    
    def output_dict(dictdf,df, no_patient, watch_choice):
        names = []
        for i in range(no_patient):
            pt_no = "Patient_" + str(i+1)
            names.append(pt_no)
        for j in range(no_patient):
            df.to_csv(names[j-1] + '_df_apple.csv')
            out_file = open(names[j-1]+'_dict_apple.json', "w")
            json.dump(dictdf, out_file)
            out_file.close()
    
    no_patient = 1
    apple_df = readcsv(user_input)
    dictdf, df = main(apple_df, watch_choice)
    output_dict(dictdf, df,no_patient, watch_choice)

In [21]:
def fitbit(readcsv, user_input, process_df, output, watch_choice):
    
    def main(df, watch_choice):
        df = process_df(df, watch_choice)
        start_time = df.iloc[0][0]
        df = add_time(df, start_time)
        df = rename_cols(df)
        return df
    
    def add_time(df, start_time):
        df['Elapsed_time_(sec)'] = 'NaN'
        start_time = datetime.datetime.strptime(df['Time'][0], '%H:%M:%S')
        for i in range(len(df)):
            df['Elapsed_time_(sec)'].iloc[i] = (datetime.datetime.strptime(fitbit_df.iloc[i][0], '%H:%M:%S')-start_time).total_seconds()
        return df
    
    def rename_cols(df):
        df = df.rename(columns ={'Time' : 'Actual_time', 'Heart_Rate' : 'Rate_(beats_per_minute)'})
        return df
    
    no_patient = 1
    print(user_input)
    fitbit_df = readcsv(user_input)
    df = main(fitbit_df, watch_choice)
    output(df, no_patient, watch_choice)

In [50]:
def garmin(user_input, process_df, output, watch_choice):
    
    garmin_df = rowingdata.TCXParser(user_input)
    garmin_df.write_csv("garmin_df.csv")
    garmin_df = pd.read_csv('./garmin_df.csv')
    
    def main(df, watch_choice):
        df = process_df(df, watch_choice)
        df = add_time(df)
        df = rename_cols(df)
        df = df.drop('index', axis = 1)
        return df
    
    def add_time(df):
        df['Actual_time'] = df['TimeStamp_(sec)'].apply(lambda a : datetime.datetime.utcfromtimestamp(a).strftime('%Y-%m-%d %H:%M:%S'))
        start_time = df['Actual_time'][0]
        df['Elapsed_time_(sec)'] = 'NaN'
        for i in range(len(df)):
            df['Elapsed_time_(sec)'].iloc[i] = (datetime.datetime.strptime(df.iloc[i][13],'%Y-%m-%d %H:%M:%S')- datetime.datetime.strptime(start_time,'%Y-%m-%d %H:%M:%S')).total_seconds()
        return df
    
    def rename_cols(df):
        df = df.rename(columns ={'Hrcur_(bpm)' : 'Rate_(beats_per_minute)', 
                                 'Distancemeters' : 'Distance_(meters)',
                                 'Strokedistance_(meters)' : 'Stroke_distance_(meters)',
                                 'Stroke500mpace_(sec/500m)' : 'Stroke_500m_pace_(sec/500m)',
                                })
        return df
    
    no_patient = 1
    df = main(garmin_df, watch_choice)
    output(df, no_patient, watch_choice)

In [7]:
def miband(user_input, process_df, output, watch_choice):
    mi_df = pd.read_excel(user_input)
    
    def main(df, watch_choice):
        df = process_df(df, watch_choice)
        df = add_time(df)
        df = rename_cols(df)
        return df
    
    def add_time(df):
        df['Actual_time'] = pd.to_datetime(df['Date'] + ' ' + df['Time'])
        start_time = df['Actual_time'][0]
        df['Elapsed_time_(sec)'] = 'NaN'
        for i in range(len(df)):
            df['Elapsed_time_(sec)'].iloc[i] = ((df.iloc[i][5])-start_time).total_seconds()
        return df
    
    def rename_cols(df):
        df = df.rename(columns ={'Heart_rate' : 'Rate_(beats_per_minute)'})
        return df
    
    no_patient = 1
    df = main(mi_df, watch_choice)
    output(df, no_patient, watch_choice)

In [33]:
def ecg(user_input, process_df, output, watch_choice):
    data = mne.io.read_raw_edf(user_input)
    raw_data = data.get_data()
    info = data.info
    channels = data.ch_names
    ECG_array = raw_data[0]
    
    def get_data(data):
        header = ','.join(data.ch_names)
        np.savetxt('ECG.csv', data.get_data().T, delimiter=',', header=header)
        ECG_df = pd.read_csv("./ECG.csv")
        info = data.info
        return ECG_df, info
    
    def main(info, df, watch_choice):
        df = pre_process_ECG(df)
        df = process_df(df, watch_choice)
        df = add_time(df, info)
        df = rename_cols(df)
        return df
    
    def pre_process_ECG(df):
        df = df.drop('Marker', axis = 1)
        df = df.drop('HRV', axis = 1)
        return df
    
    def add_time(df, info):
        start_time = info['meas_date']
        sfreq = info['sfreq']
        delta_sec = 1/sfreq
        delta_sec = datetime.timedelta(seconds = delta_sec)
        ECG_time = [start_time]
        df['Elapsed_time_(sec)'] = np.nan
        for i in range(1,len(df)):
            time = ECG_time[i-1] + delta_sec
            ECG_time.append(time)
        df['Actual_time'] = ECG_time
        df['Elapsed_time_(sec)'] = df['Actual_time'].apply(lambda x : (x-start_time).total_seconds())
        return df
    
    def rename_cols(df):
        df = df.rename(columns ={'#_ecg' : 'ECG_(mV)'})
        return df
    
    no_patient = 1
    df, info = get_data(data)
    df = main(info, df, watch_choice)
    output(df, no_patient, watch_choice)

In [45]:
def biovotion(user_input, device_id, get_filenames, output, watch_choice):
    
    def main(user_input, device_id, watch_choice):
        filenames = get_filenames(user_input)
        colnames = extract_names(device_id, filenames)
        dataframes, commoncols = read_data(filenames, colnames)
        df = create_df_final(dataframes, commoncols)
        df = add_time(df)
        return df

    def extract_names(device_id, filenames):
        colnames = []
        for i in range(1,len(filenames)):
            if str.find(filenames[i],'bop') != -1:    
                if str.find(filenames[i],'VitalSign') == -1:
                    match = re.search('515_(.*)_' + str(device_id), filenames[i])
                    name = match.group(0).split('_')[1]
                    colnames.append(name)
                else:
                    match = re.search('n_(.*)_' + str(device_id), filenames[i])
                    name = match.group(0).split('_')[1]
                    colnames.append(name)
            else:
                return
        return colnames
    
    def read_data(filenames, colnames):
        dataframes = []
        for filename in filenames:
            if str.find(filename,'bop') != -1:
                dataframe = pd.read_csv(user_input+'\\'+ filename)
                common_cols = pd.DataFrame(dataframe.iloc[:,:4])
                dataframe = dataframe[dataframe.columns[~dataframe.columns.str.contains('Device Instance ID')]]
                dataframe = dataframe[dataframe.columns[~dataframe.columns.str.contains('Counter')]]
                dataframe = dataframe[dataframe.columns[~dataframe.columns.str.contains('Timestamp')]]
                dataframe = dataframe[dataframe.columns[~dataframe.columns.str.contains('Timezone')]]
                dataframes.append(dataframe)
        for i in range(len(colnames)):
            dataframes[i].columns = [str(col) + "_" + str(colnames[i]) for col in dataframes[i].columns]
        return dataframes, common_cols
    
    def create_df_final(dataframes, commoncols):
        df_concat = pd.concat(dataframes, axis = 1, sort = 'False')
        df_bio = pd.concat([df_concat, commoncols], axis = 1, sort = 'False')
        return df_bio
    
    def add_time(df):
        df['Actual_time'] = df['Timestamp'].apply(lambda a : datetime.datetime.utcfromtimestamp(a).strftime('%Y-%m-%d %H:%M:%S'))
        start_time = df['Actual_time'][0]
        df['Elapsed_time_(sec)'] = 'NaN'
        for i in range(len(df)):
            df['Elapsed_time_(sec)'].iloc[i] = (datetime.datetime.strptime(df['Actual_time'][i],'%Y-%m-%d %H:%M:%S')- datetime.datetime.strptime(start_time,'%Y-%m-%d %H:%M:%S')).total_seconds()
        return df
    
    no_patient = 1
    df = main(user_input, device_id, watch_choice)
    output(df, no_patient, watch_choice)

In [48]:
def empatica(user_input, get_filenames, preprocess_empatica, add_time_empatica, output, watch_choice):
    
    def read_data(user_input, filenames, preprocess_empatica, add_time_empatica):
        dataframes = []
        for file in filenames:
            dataframe = pd.read_csv(user_input + '\\' + str(file), header = None)
            file = file.split(".")[0]
            file = file.upper()

            if 'ACC' in file:

                dataframe, time, freq = preprocess_empatica(dataframe)
                dataframe = dataframe.rename(columns = {0 : 'x_acc', 1: 'y_acc', 2: 'z_acc'})

                dataframe['Actual_time_x'] = np.nan
                dataframe['Actual_time_y'] = np.nan
                dataframe['Actual_time_z'] = np.nan

                dataframe['Timestamp_x'] = time[0]
                dataframe['Timestamp_y'] = time[1]
                dataframe['Timestamp_z'] = time[2]

                delta_x = 1/freq[0]
                delta_x = datetime.timedelta(seconds = delta_x)
                delta_y = 1/freq[1]
                delta_y = datetime.timedelta(seconds = delta_y)
                delta_z = 1/freq[2]
                delta_z = datetime.timedelta(seconds = delta_z)   

                start_timex = datetime.datetime.utcfromtimestamp(time[0]).strftime('%Y-%m-%d %H:%M:%S')
                start_timex = datetime.datetime.strptime(start_timex, '%Y-%m-%d %H:%M:%S')
                start_timey = datetime.datetime.utcfromtimestamp(time[1]).strftime('%Y-%m-%d %H:%M:%S')
                start_timey = datetime.datetime.strptime(start_timey, '%Y-%m-%d %H:%M:%S')
                start_timez = datetime.datetime.utcfromtimestamp(time[2]).strftime('%Y-%m-%d %H:%M:%S')
                start_timez = datetime.datetime.strptime(start_timez, '%Y-%m-%d %H:%M:%S')

                accx_time = [start_timex]
                accy_time = [start_timey]
                accz_time = [start_timez]

                for i in range(1, len(dataframe['x_acc'])):
                    time_x = accx_time[i-1] + delta_x
                    accx_time.append(time_x)
                dataframe['Actual_time_x'] = accx_time
                dataframe['Elapsed_timex_(sec)'] = dataframe['Actual_time_x'].apply(lambda x : (x-start_timex).total_seconds())

                for i in range(1, len(dataframe['y_acc'])):
                    time_y = accy_time[i-1] + delta_y
                    accy_time.append(time_y)
                dataframe['Actual_time_y'] = accy_time
                dataframe['Elapsed_timey_(sec)'] = dataframe['Actual_time_x'].apply(lambda x : (x-start_timex).total_seconds())

                for i in range(1, len(dataframe['z_acc'])):
                    time_z = accz_time[i-1] + delta_z
                    accz_time.append(time_z)
                dataframe['Actual_time_z'] = accz_time
                dataframe['Elapsed_timez_(sec)'] = dataframe['Actual_time_x'].apply(lambda x : (x-start_timex).total_seconds())

            if 'BVP' in file:
                dataframe, time, freq = preprocess_empatica(dataframe)
                dataframe = dataframe.rename(columns = {0 : 'BVP'})
                dataframe = add_time_empatica(dataframe, time, freq, 'BVP')            

            if 'EDA' in file:
                dataframe, time, freq = preprocess_empatica(dataframe)
                dataframe = dataframe.rename(columns = {0 : 'EDA(microsiemens)'})
                dataframe = add_time_empatica(dataframe, time, freq, 'EDA')

            if 'HR' in file:
                dataframe, time, freq = preprocess_empatica(dataframe)
                dataframe = dataframe.rename(columns = {0 : 'HR'})
                dataframe = add_time_empatica(dataframe, time, freq, 'HR')

            if 'IBI' in file:
                ibi_time = dataframe.iloc[0][0]
                dataframe = dataframe.iloc[1:]
                dataframe = dataframe.rename(columns = {0 : 'Time(sec)', 1: 'IBI'})
                dataframe = dataframe.reset_index()
                dataframe = dataframe.drop('index', axis = 1)

            if 'TEMP' in file:
                dataframe, time, freq = preprocess_empatica(dataframe)
                dataframe = dataframe.rename(columns = {0 : 'Temp(celsius)'})
                dataframe = add_time_empatica(dataframe, time, freq, 'Temp')

            if 'TAGS' in file:
                dataframe = dataframe.rename(columns = {0 : 'Timestamp_buttonpress'})
                dataframe['Time'] = dataframe['Timestamp_buttonpress'].apply(lambda a : datetime.datetime.utcfromtimestamp(a).strftime('%Y-%m-%d %H:%M:%S'))
            dataframes.append(dataframe)
        return dataframes

    def all_dfs(dataframes):
        df = pd.concat(dataframes, axis = 1)
        return df

    def main(user_input, preprocess_empatica, add_time_empatica, all_dfs, watch_choice):
        filenames = get_filenames(user_input)
        dataframes = read_data(user_input, filenames, preprocess_empatica, add_time_empatica)
        df = all_dfs(dataframes)
        df['Watch_type'] = watch_choice
        return df 
    
    no_patient = 1
    df = main(user_input, preprocess_empatica, add_time_empatica, all_dfs, watch_choice)
    output(df,no_patient, watch_choice)

In [10]:
"""
Common
"""
def process_df(df, watch_choice):
    df.columns = df.columns.str.lstrip(' ')
    df.columns = df.columns.str.replace(' ','_')
    df['Watch_type'] = watch_choice
    return df

In [11]:
def output(df, no_patient, watch_choice):
    names = []
    for i in range(no_patient):
        pt_no = "Patient_" + str(i+1)
        names.append(pt_no)
    for j in range(no_patient):
        df.to_csv(names[j-1] + '_df_output_' + str(watch_choice) + '.csv')

In [12]:
def get_filenames(user_input):
    newnames = []
    files = os.listdir(user_input)    
    filenames = list(filter(lambda f: f.endswith('.csv'), files))
    return filenames

In [47]:
def preprocess_empatica(dataframe):
    time = dataframe.values[0]
    freq = dataframe.values[1]
    dataframe = dataframe.iloc[2:]
    dataframe = dataframe.reset_index()
    dataframe = dataframe.drop('index', axis = 1)
    return dataframe, time, freq

In [14]:
def add_time_empatica(dataframe, time, freq, filename):
    dataframe['Timestamp' + '_' + str(filename)] = time[0]
    delta = 1/freq[0]
    delta = datetime.timedelta(seconds = delta)
    start_time = datetime.datetime.utcfromtimestamp(time[0]).strftime('%Y-%m-%d %H:%M:%S')
    start_time = datetime.datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S')
    actual_time = [start_time]
    for i in range(1, len(dataframe)):
        time = actual_time[i-1] + delta
        actual_time.append(time)
    dataframe['Actual_time'+ '_' + str(filename)] = actual_time
    dataframe['Elapsed_time_(sec)'+ '_' + str(filename)] = dataframe['Actual_time'+ '_' + str(filename)].apply(lambda x : (x-start_time).total_seconds())
    return dataframe

In [51]:
"""
To process multiple files at once, path of the folder where the files are stored
"""

print("For Biovotion and Empatica, please enter the path of the folder where all the files are stored, for the rest you can select a file")
watch_choice = input("Choose the type of watch: ")
watch_choice = watch_choice.lower()

if 'apple' == watch_choice or 'fitbit' == watch_choice or 'garmin'== watch_choice or 'miband' == watch_choice or 'ecg'== watch_choice:
    try:
        from tkinter import Tk
        from tkFileDialog import askopenfilenames
    except:
        from tkinter import Tk
        from tkinter import filedialog

    Tk().withdraw() 
    user_input = filedialog.askopenfilenames() 
    user_input = user_input[0]
    processed_output(user_input, watch_choice, readcsv,
                    process_df, get_filenames, preprocess_empatica, add_time_empatica, output)

elif 'biovotion'== watch_choice or 'empatica' == watch_choice:
    user_input = input("Enter the path of the folder of raw files: ")
    processed_output(user_input, watch_choice, readcsv,
                    process_df,get_filenames, preprocess_empatica, add_time_empatica, output)

For Biovotion and Empatica, please enter the path of the folder where all the files are stored, for the rest you can select a file
Choose the type of watch: garmin
