In [1]:
import numpy as np
import pandas as pd
import os


data_path = '../../../code_data/ctr3_cgm_only/raw_data/DataTables'
data_save_path = '../../../code_data/ctr3_cgm_only/'
process_file = 'coldstart_fl'
data_cgm = pd.read_csv(os.path.join(data_path, 'CGM.txt'),sep='|')
data_cgm

data_cgm['ts'] = pd.to_datetime(data_cgm['DisplayTime'])
data_cgm.rename(columns={'CGM':'glucose_level', 'DeidentID':'pid'}, inplace=True)
data_cgm['glucose_level'] = data_cgm['glucose_level'].replace(0.0, np.nan)
data_cgm = data_cgm[['glucose_level', 'ts', 'pid']]
data_cgm.head()


Unnamed: 0,glucose_level,ts,pid
0,194,2013-09-29 17:36:48,1
1,204,2013-09-29 17:41:48,1
2,201,2013-09-29 17:46:48,1
3,204,2013-09-29 17:51:48,1
4,200,2013-09-29 17:56:48,1


In [2]:
import copy
import datetime
pid_list = list(set(data_cgm['pid']))
data_merge = data_cgm
pid2data = {}
for pid in pid_list:
    pid2data[pid] = copy.deepcopy(data_merge[data_merge['pid']==pid])
    pid2data[pid].set_index('ts', inplace=True)
    pid2data[pid] = pid2data[pid].sort_index()
    pid2data[pid] = pid2data[pid].drop(['pid'], axis=1)

In [4]:
# add time features

def add_time_attributes(pid2data):
    for pid in pid2data:
        data = pid2data[pid]

        temp = data.index.to_frame().iloc[:, 0].dt
        
        data['day_of_week'] = temp.dayofweek
        data['day_of_week'] = data['day_of_week'].astype(np.float64)

        data['hour'] = temp.hour
        data['hour'] = data['hour'].astype(np.float64)

        data['minute'] = temp.minute
        data['minute'] = data['minute'].astype(np.float64)

        
        data['timestamp'] = temp.hour * 3600 +\
                                temp.minute * 60 +\
                                temp.second
        data['timestamp'] = data['timestamp'].astype(np.float64)
        
        # new ————————————————————————
        seconds_in_day = 24*60*60

        data['sin_time'] = np.sin(2 * np.pi * data.timestamp / seconds_in_day)
        data['cos_time'] = np.cos(2 * np.pi * data.timestamp / seconds_in_day)
        data['sin_time'].astype(np.float64)
        data['cos_time'].astype(np.float64)
        # end ______________________
        
        data['datastamp'] = temp.year * 10000 + temp.month * 100 + temp.day
        
        
    return pid2data
pid2regular_df = add_time_attributes(pid2data)
for pid in pid2regular_df:
    data = pid2regular_df[pid]
    data = data.loc[pd.notna(data['glucose_level'])]
    pid2regular_df[pid] = data

In [5]:
def f(x):
    return 1.509 * ((np.log(x))**1.084 - 5.381)

def cal_LBGI(x):
    x = f(x) if f(x) <= 0 else 0
    return 10 * x ** 2

def cal_HBGI(x):
    x = f(x) if f(x) > 0 else 0
    return 10 * x ** 2
count2count_list = {}
count2count_list['No. of Days'] = []
count2count_list['CV'] = []
count2count_list['TIR'] = []
count2count_list['TBR'] = []
count2count_list['TAR'] = []

count2count_list['LBGI'] = []
count2count_list['HBGI'] = []

count2count_list['Mean of CGM data'] = []
count2count_list['SD of CGM data'] = []
count2count_list['No. of CGM records'] = []

for pid in pid2regular_df:
    
    glucose_avg_by_day = pid2regular_df[pid][['glucose_level', 'datastamp']].groupby('datastamp').mean()
    
    count2count_list['No. of Days'].append(len(glucose_avg_by_day))
    
    glucose_mean = pid2regular_df[pid]['glucose_level'].mean()
    glucose_std = pid2regular_df[pid]['glucose_level'].std()
    count2count_list['Mean of CGM data'].append(glucose_mean)
    count2count_list['SD of CGM data'].append(glucose_std)
    
    count2count_list['No. of CGM records'].append(len(pid2regular_df[pid]['glucose_level']))
 
    count2count_list['CV'].append((glucose_std / glucose_mean) * 100)

    target_range_min = 70
    target_range_max = 180
    
    glucose_values = pid2regular_df[pid]['glucose_level']
    time_in_range = ((glucose_values >= target_range_min) & (glucose_values <= target_range_max)).sum()
    total_time_points = len(glucose_values)
    tir_percentage = (time_in_range / total_time_points) * 100
    count2count_list['TIR'].append(tir_percentage)
    
    
    time_below_range = ((glucose_values < target_range_min)).sum()
    tbr_percentage = (time_below_range / total_time_points) * 100
    count2count_list['TBR'].append(tbr_percentage)
    
    time_above_range = ((glucose_values > target_range_max)).sum()
    tar_percentage = (time_above_range / total_time_points) * 100
    count2count_list['TAR'].append(tar_percentage)
    
    LBGI = pid2regular_df[pid]['glucose_level'].apply(cal_LBGI)
    HBGI = pid2regular_df[pid]['glucose_level'].apply(cal_HBGI)

    count2count_list['LBGI'].append(LBGI.mean())
    count2count_list['HBGI'].append(HBGI.mean())
    


In [6]:
import xlwt

book = xlwt.Workbook(encoding='utf-8', style_compression=0)
sheet = book.add_sheet('Main', cell_overwrite_ok=True)

row_names = ['No. of Days', 'No. of CGM records', 'Mean of CGM data', 'SD of CGM data', 'TIR', 'TBR','TAR', 'CV', 'LBGI', 'HBGI'] 


for r_idx, r_name in enumerate(row_names):
    sheet.write(r_idx + 3, 0, r_name)
    
    if 'No' not in r_name:
        print_str = f'{np.mean(count2count_list[r_name]):.2f}({np.std(count2count_list[r_name]):.2f})'
    else:
        print_str = f'{int(np.mean(count2count_list[r_name]))}({int(np.std(count2count_list[r_name]))})'
    sheet.write(r_idx + 3, 1, print_str)
book.save(f'ctr3_cgm.xls')
for k in count2count_list:

   print(k, np.mean(count2count_list[k]), np.std(count2count_list[k]))

No. of Days 163.83333333333334 67.67475321138764
CV 36.43758981856561 3.9024563285819
TIR 69.92240482313998 7.948750149413096
TBR 3.533204629345925 2.111051304613102
TAR 26.544390547514073 8.56889698050804
LBGI 0.9707314305621149 0.47849941367742926
HBGI 5.887973785261634 1.927457961649773
Mean of CGM data 151.37040630304875 13.335475300496565
SD of CGM data 55.29414836233891 8.244232801526753
No. of CGM records 43421.2 18309.832302891253


In [7]:
len(pid2regular_df)

30