In [1]:
import numpy as np
import os
import pandas as pd
import copy 
import datetime 
data_path = '../../../code_data/arises/'
process_file = 'coldstart_fl'

pid_list = [1000 + d for d in range(1, 13)]


In [2]:
pid2regular_df = {}
names = ['ts', 'glucose_level', 'isRealBGL', 'basal', 'meal', 
'bolus', 'correction_bolus', 'finger_stick',	'hypoStart', 
'EDA',	'SCL', 'SCR', 'HR', 'TEMP', 'ACC', 'RMSSD', 'SDNN', 'medianNNI', 'CVNNI', 'CVSD', 'pNNX', 
'meanHR', 'minHR', 'maxHR', 'VLF', 'LF', 'HF', 'LHR']
usecols = [i for i in range(28)]
for pid in pid_list:
    
    load_path = os.path.join(data_path, 'raw_data', f'dc_{pid}_data.csv')
    df = pd.read_csv(load_path, names=names, usecols=usecols, header=1)

    df['ts'] = pd.to_datetime(df['ts'], dayfirst=True, errors='coerce')
    # df = df.groupby('ts').sum()
    df = df.set_index('ts')
    df = df.sort_index()
    df['glucose_level'] = df.apply(lambda row: row['glucose_level'] if row['isRealBGL'] == True else np.nan,  axis = 1)
    df = df.drop(['hypoStart', 'isRealBGL'], axis=1)
    df = df.astype(np.float64)
    df = df.replace(0.0, np.nan)
    pid2regular_df[pid] = df
    print(f'{pid}, total_points:{len(df)}, start:{df.index[0]}, end:{df.index[-1]}')


1001, total_points:15831, start:2019-02-28 12:43:28, end:2019-04-24 11:51:50
1002, total_points:13215, start:2019-03-04 14:18:20, end:2019-04-19 11:25:42
1003, total_points:14896, start:2019-03-04 19:17:53, end:2019-04-25 12:31:25
1004, total_points:11517, start:2019-03-05 13:47:53, end:2019-04-14 13:25:57
1005, total_points:14064, start:2019-03-07 13:48:01, end:2019-04-25 09:41:10
1006, total_points:13464, start:2019-03-08 14:15:27, end:2019-04-24 08:08:52
1007, total_points:13066, start:2019-04-29 13:35:19, end:2019-06-13 22:18:07
1008, total_points:14307, start:2019-04-29 16:00:45, end:2019-06-18 08:08:44
1009, total_points:14201, start:2019-04-30 12:41:20, end:2019-06-18 19:59:44
1010, total_points:12911, start:2019-04-30 16:12:50, end:2019-06-14 12:01:44
1011, total_points:13018, start:2019-05-07 12:41:15, end:2019-06-21 17:25:02
1012, total_points:15797, start:2019-05-07 16:05:46, end:2019-07-01 12:23:44


In [3]:
# add time features

def add_time_attributes(pid2data):
    for pid in pid2data:
        data = pid2data[pid]

        temp = data.index.to_frame().iloc[:, 0].dt
        
        data['day_of_week'] = temp.dayofweek
        data['day_of_week'] = data['day_of_week'].astype(np.float64)

        data['hour'] = temp.hour
        data['hour'] = data['hour'].astype(np.float64)

        data['minute'] = temp.minute
        data['minute'] = data['minute'].astype(np.float64)

        
        data['timestamp'] = temp.hour * 3600 +\
                                temp.minute * 60 +\
                                temp.second
        data['timestamp'] = data['timestamp'].astype(np.float64)
        
        # new ————————————————————————
        seconds_in_day = 24*60*60

        data['sin_time'] = np.sin(2 * np.pi * data.timestamp / seconds_in_day)
        data['cos_time'] = np.cos(2 * np.pi * data.timestamp / seconds_in_day)
        data['sin_time'].astype(np.float64)
        data['cos_time'].astype(np.float64)
        # end ______________________
        
        data['datastamp'] = temp.year * 10000 + temp.month * 100 + temp.day
        
        
    return pid2data
pid2regular_df = add_time_attributes(pid2regular_df)
for pid in pid2regular_df:
    data = pid2regular_df[pid]
    data = data.loc[pd.notna(data['glucose_level'])]
    pid2regular_df[pid] = data

In [4]:
def f(x):
    return 1.509 * ((np.log(x))**1.084 - 5.381)

def cal_LBGI(x):
    x = f(x) if f(x) <= 0 else 0
    return 10 * x ** 2

def cal_HBGI(x):
    x = f(x) if f(x) > 0 else 0
    return 10 * x ** 2
count2count_list = {}
count2count_list['No. of Days'] = []
count2count_list['CV'] = []
count2count_list['TIR'] = []
count2count_list['TBR'] = []
count2count_list['TAR'] = []

count2count_list['LBGI'] = []
count2count_list['HBGI'] = []

count2count_list['Mean of CGM data'] = []
count2count_list['SD of CGM data'] = []
count2count_list['No. of CGM records'] = []

for pid in pid2regular_df:
    
    glucose_avg_by_day = pid2regular_df[pid][['glucose_level', 'datastamp']].groupby('datastamp').mean()
    
    count2count_list['No. of Days'].append(len(glucose_avg_by_day))
    
    glucose_mean = pid2regular_df[pid]['glucose_level'].mean()
    glucose_std = pid2regular_df[pid]['glucose_level'].std()
    count2count_list['Mean of CGM data'].append(glucose_mean)
    count2count_list['SD of CGM data'].append(glucose_std)
    
    count2count_list['No. of CGM records'].append(len(pid2regular_df[pid]['glucose_level']))
 
    count2count_list['CV'].append((glucose_std / glucose_mean) * 100)

    target_range_min = 70
    target_range_max = 180
    
    glucose_values = pid2regular_df[pid]['glucose_level']
    time_in_range = ((glucose_values >= target_range_min) & (glucose_values <= target_range_max)).sum()
    total_time_points = len(glucose_values)
    tir_percentage = (time_in_range / total_time_points) * 100
    count2count_list['TIR'].append(tir_percentage)
    
    
    time_below_range = ((glucose_values < target_range_min)).sum()
    tbr_percentage = (time_below_range / total_time_points) * 100
    count2count_list['TBR'].append(tbr_percentage)
    
    time_above_range = ((glucose_values > target_range_max)).sum()
    tar_percentage = (time_above_range / total_time_points) * 100
    count2count_list['TAR'].append(tar_percentage)
    
    LBGI = pid2regular_df[pid]['glucose_level'].apply(cal_LBGI)
    HBGI = pid2regular_df[pid]['glucose_level'].apply(cal_HBGI)

    count2count_list['LBGI'].append(LBGI.mean())
    count2count_list['HBGI'].append(HBGI.mean())
    


In [6]:
import xlwt

book = xlwt.Workbook(encoding='utf-8', style_compression=0)
sheet = book.add_sheet('Main', cell_overwrite_ok=True)

row_names = ['No. of Days', 'No. of CGM records', 'Mean of CGM data', 'SD of CGM data', 'TIR', 'TBR','TAR', 'CV', 'LBGI', 'HBGI'] 


for r_idx, r_name in enumerate(row_names):
    sheet.write(r_idx + 3, 0, r_name)
    
    if 'No' not in r_name:
        print_str = f'{np.mean(count2count_list[r_name]):.2f}({np.std(count2count_list[r_name]):.2f})'
    else:
        print_str = f'{int(np.mean(count2count_list[r_name]))}({int(np.std(count2count_list[r_name]))})'
    sheet.write(r_idx + 3, 1, print_str)
book.save(f'arises.xls')
for k in count2count_list:

   print(k, np.mean(count2count_list[k]), np.std(count2count_list[k]))

No. of Days 49.0 4.0
CV 35.14479775901872 4.469392093920178
TIR 63.29116174988036 15.997617740927524
TBR 2.9246056438832633 1.9136272259956326
TAR 33.78423260623638 17.009640422881322
LBGI 0.7837967165325365 0.4567854522895121
HBGI 7.5889000829848845 4.1665294915465205
Mean of CGM data 161.25314844059844 26.01804947466335
SD of CGM data 57.06354672290676 13.502595392607377
No. of CGM records 13324.833333333334 1081.1200853230362
