In [7]:
import numpy as np
import pandas as pd
import os

data_path = '../../../code_data/replace-bg/raw_data/DataTables'
data_save_path = '../../../code_data/replace-bg/'
process_file = 'coldstart_fl'
data_cgm = pd.read_csv(os.path.join(data_path, 'HDeviceCGM.txt'),sep='|')

base = pd.Timestamp('2023-10-09') 
data_cgm['ts'] = base + pd.to_timedelta(data_cgm['DeviceDtTmDaysFromEnroll'], unit='D') + pd.to_timedelta(data_cgm['DeviceTm'])
data_cgm.rename(columns={'GlucoseValue':'glucose_level', 'PtID':'pid'}, inplace=True)
data_cgm['glucose_level'] = data_cgm['glucose_level'].replace(0.0, np.nan)
data_cgm.head()

Unnamed: 0,RecID,ParentHDeviceUploadsID,pid,SiteID,DeviceDtTmDaysFromEnroll,DeviceTm,DexInternalDtTmDaysFromEnroll,DexInternalTm,RecordType,glucose_level,ts
0,1655236,782,183,12,-6,05:35:41,-6.0,12:37:02,CGM,162.0,2023-10-03 05:35:41
1,1655237,782,183,12,-6,05:30:41,-6.0,12:32:02,CGM,164.0,2023-10-03 05:30:41
2,1655238,782,183,12,-6,05:25:41,-6.0,12:27:02,CGM,168.0,2023-10-03 05:25:41
3,1655239,782,183,12,-6,05:20:41,-6.0,12:22:02,CGM,169.0,2023-10-03 05:20:41
4,1655240,782,183,12,-6,05:15:41,-6.0,12:17:02,CGM,170.0,2023-10-03 05:15:41


In [8]:
import copy
import datetime
pid_list = list(set(data_cgm['pid']))
pid2data = {}
for pid in pid_list:
    pid2data[pid] = data_cgm[data_cgm['pid']==pid]
    pid2data[pid].set_index('ts', inplace=True)
    pid2data[pid] = pid2data[pid].sort_index()
    pid2data[pid] = pid2data[pid].drop(['pid'], axis=1)

In [9]:
# add time features

def add_time_attributes(pid2data):
    for pid in pid2data:
        data = pid2data[pid]

        temp = data.index.to_frame().iloc[:, 0].dt
        
        data['day_of_week'] = temp.dayofweek
        data['day_of_week'] = data['day_of_week'].astype(np.float64)

        data['hour'] = temp.hour
        data['hour'] = data['hour'].astype(np.float64)

        data['minute'] = temp.minute
        data['minute'] = data['minute'].astype(np.float64)

        
        data['timestamp'] = temp.hour * 3600 +\
                                temp.minute * 60 +\
                                temp.second
        data['timestamp'] = data['timestamp'].astype(np.float64)
        
        # new ————————————————————————
        seconds_in_day = 24*60*60

        data['sin_time'] = np.sin(2 * np.pi * data.timestamp / seconds_in_day)
        data['cos_time'] = np.cos(2 * np.pi * data.timestamp / seconds_in_day)
        data['sin_time'].astype(np.float64)
        data['cos_time'].astype(np.float64)
        # end ______________________
        
        data['datastamp'] = temp.year * 10000 + temp.month * 100 + temp.day
        
        
    return pid2data
pid2regular_df = add_time_attributes(pid2data)
for pid in pid2regular_df:
    data = pid2regular_df[pid]
    data = data.loc[pd.notna(data['glucose_level'])]
    pid2regular_df[pid] = data

In [10]:
def f(x):
    return 1.509 * ((np.log(x))**1.084 - 5.381)

def cal_LBGI(x):
    x = f(x) if f(x) <= 0 else 0
    return 10 * x ** 2

def cal_HBGI(x):
    x = f(x) if f(x) > 0 else 0
    return 10 * x ** 2
count2count_list = {}
count2count_list['No. of Days'] = []
count2count_list['CV'] = []
count2count_list['TIR'] = []
count2count_list['TBR'] = []
count2count_list['TAR'] = []

count2count_list['LBGI'] = []
count2count_list['HBGI'] = []

count2count_list['Mean of CGM data'] = []
count2count_list['SD of CGM data'] = []
count2count_list['No. of CGM records'] = []

for pid in pid2regular_df:
    
    glucose_avg_by_day = pid2regular_df[pid][['glucose_level', 'datastamp']].groupby('datastamp').mean()
    
    count2count_list['No. of Days'].append(len(glucose_avg_by_day))
    
    glucose_mean = pid2regular_df[pid]['glucose_level'].mean()
    glucose_std = pid2regular_df[pid]['glucose_level'].std()
    count2count_list['Mean of CGM data'].append(glucose_mean)
    count2count_list['SD of CGM data'].append(glucose_std)
    
    count2count_list['No. of CGM records'].append(len(pid2regular_df[pid]['glucose_level']))
 
    count2count_list['CV'].append((glucose_std / glucose_mean) * 100)

    target_range_min = 70
    target_range_max = 180
    
    glucose_values = pid2regular_df[pid]['glucose_level']
    time_in_range = ((glucose_values >= target_range_min) & (glucose_values <= target_range_max)).sum()
    total_time_points = len(glucose_values)
    tir_percentage = (time_in_range / total_time_points) * 100
    count2count_list['TIR'].append(tir_percentage)
    
    
    time_below_range = ((glucose_values < target_range_min)).sum()
    tbr_percentage = (time_below_range / total_time_points) * 100
    count2count_list['TBR'].append(tbr_percentage)
    
    time_above_range = ((glucose_values > target_range_max)).sum()
    tar_percentage = (time_above_range / total_time_points) * 100
    count2count_list['TAR'].append(tar_percentage)
    
    LBGI = pid2regular_df[pid]['glucose_level'].apply(cal_LBGI)
    HBGI = pid2regular_df[pid]['glucose_level'].apply(cal_HBGI)

    count2count_list['LBGI'].append(LBGI.mean())
    count2count_list['HBGI'].append(HBGI.mean())
    


In [11]:
import xlwt

book = xlwt.Workbook(encoding='utf-8', style_compression=0)
sheet = book.add_sheet('Main', cell_overwrite_ok=True)

row_names = ['No. of Days', 'No. of CGM records', 'Mean of CGM data', 'SD of CGM data', 'TIR', 'TBR','TAR', 'CV', 'LBGI', 'HBGI'] 


for r_idx, r_name in enumerate(row_names):
    sheet.write(r_idx + 3, 0, r_name)
    
    if 'No' not in r_name:
        print_str = f'{np.mean(count2count_list[r_name]):.2f}({np.std(count2count_list[r_name]):.2f})'
    else:
        print_str = f'{int(np.mean(count2count_list[r_name]))}({int(np.std(count2count_list[r_name]))})'
    sheet.write(r_idx + 3, 1, print_str)
book.save(f'replace-bg.xls')
for k in count2count_list:

   print(k, np.mean(count2count_list[k]), np.std(count2count_list[k]))

No. of Days 251.3716814159292 39.61455768222941
CV 37.451042164881855 4.696081366181531
TIR 63.09505673771426 12.181013309607666
TBR 3.778177075381157 2.506843438844662
TAR 33.12676618690458 12.929693052922117
LBGI 1.0009831742384256 0.5844372151216393
HBGI 7.570896927300283 3.4228080658792295
Mean of CGM data 160.68664549757307 21.18223529573464
SD of CGM data 60.33265726539755 11.64624006406101
No. of CGM records 66153.36725663717 10701.420454991156
