In [1]:
import numpy as np
import os
import pandas as pd
import copy 
import datetime 
data_path = '../../../code_data/abc4d/'
process_file = 'coldstart_fl_analysis'

pid_list = [4000 + d for d in range(1, 27)]
pid_list.remove(4019)
# print(pid_list, len(pid_list))

In [2]:
pid2CGM = {}
for pid in pid_list:
    try:
        load_path = os.path.join(data_path, 'raw_data', f'ABC{pid}_CGM_6m_I.xls')
        df = pd.read_excel(load_path, sheet_name='CGM',  names=['ts', 'glucose_level'], skiprows=1, usecols=[0,1])
    except:
        load_path = os.path.join(data_path, 'raw_data', f'ABC{pid}_CGM_6m_C.xls')
        df = pd.read_excel(load_path, sheet_name='CGM',  names=['ts', 'glucose_level'], skiprows=1, usecols=[0,1])

    df['ts'] = pd.to_datetime(df['ts'], dayfirst=True)
    df['glucose_level'] = df['glucose_level'] * 18.0 # change mmol/l to mg/dL

    df = df.groupby('ts').apply(lambda x: x[pd.notna(x['glucose_level'])].iloc[-1])['glucose_level'].to_frame()
    df = df.sort_index()
    df['glucose_level'].replace(0.0, np.nan)
    pid2CGM[pid] = df
    print(f'{pid}, total_points:{len(df)}, start:{df.index[0]}, end:{df.index[-1]}')

4001, total_points:45949, start:2015-11-26 13:06:00, end:2016-05-12 23:57:00
4002, total_points:43317, start:2015-12-11 00:00:00, end:2016-05-27 23:56:00
4003, total_points:37518, start:2015-12-11 00:00:00, end:2016-05-27 23:57:00
4004, total_points:41137, start:2015-12-18 12:19:00, end:2016-06-03 23:53:00
4005, total_points:43836, start:2016-01-18 00:04:00, end:2016-07-04 23:56:00
4006, total_points:46421, start:2016-01-14 00:02:00, end:2016-06-30 23:58:00
4007, total_points:44501, start:2016-02-12 00:01:00, end:2016-07-29 23:55:00
4008, total_points:44691, start:2016-02-12 00:02:00, end:2016-07-29 23:58:00
4009, total_points:47472, start:2016-02-12 00:03:00, end:2016-07-29 23:58:00
4010, total_points:43188, start:2016-02-23 00:00:00, end:2016-08-09 23:55:00
4011, total_points:32893, start:2016-02-23 00:01:00, end:2016-08-09 23:55:00
4012, total_points:44833, start:2016-02-23 00:01:00, end:2016-08-09 23:31:00
4013, total_points:40524, start:2016-02-29 00:01:00, end:2016-08-15 23:58:00

In [34]:
# add time features

def add_time_attributes(pid2data):
    for pid in pid2data:
        data = pid2data[pid]

        temp = data.index.to_frame().iloc[:, 0].dt
        
        data['day_of_week'] = temp.dayofweek
        data['day_of_week'] = data['day_of_week'].astype(np.float64)

        data['hour'] = temp.hour
        data['hour'] = data['hour'].astype(np.float64)

        data['minute'] = temp.minute
        data['minute'] = data['minute'].astype(np.float64)

        
        data['timestamp'] = temp.hour * 3600 +\
                                temp.minute * 60 +\
                                temp.second
        data['timestamp'] = data['timestamp'].astype(np.float64)
        
        # new ————————————————————————
        seconds_in_day = 24*60*60

        data['sin_time'] = np.sin(2 * np.pi * data.timestamp / seconds_in_day)
        data['cos_time'] = np.cos(2 * np.pi * data.timestamp / seconds_in_day)
        data['sin_time'].astype(np.float64)
        data['cos_time'].astype(np.float64)
        # end ______________________
        
        data['datastamp'] = temp.year * 10000 + temp.month * 100 + temp.day
        
        
    return pid2data
pid2regular_df = add_time_attributes(pid2CGM)
for pid in pid2regular_df:
    data = pid2regular_df[pid]
    data = data.loc[pd.notna(data['glucose_level'])]
    pid2regular_df[pid] = data

In [35]:
def f(x):
    return 1.509 * ((np.log(x))**1.084 - 5.381)

def cal_LBGI(x):
    x = f(x) if f(x) <= 0 else 0
    return 10 * x ** 2

def cal_HBGI(x):
    x = f(x) if f(x) > 0 else 0
    return 10 * x ** 2
count2count_list = {}
count2count_list['No. of Days'] = []
count2count_list['CV'] = []
count2count_list['TIR'] = []
count2count_list['TBR'] = []
count2count_list['TAR'] = []

count2count_list['LBGI'] = []
count2count_list['HBGI'] = []

count2count_list['Mean of CGM data'] = []
count2count_list['SD of CGM data'] = []
count2count_list['No. of CGM records'] = []

for pid in pid2regular_df:
    
    glucose_avg_by_day = pid2regular_df[pid][['glucose_level', 'datastamp']].groupby('datastamp').mean()
    
    count2count_list['No. of Days'].append(len(glucose_avg_by_day))
    
    glucose_mean = pid2regular_df[pid]['glucose_level'].mean()
    glucose_std = pid2regular_df[pid]['glucose_level'].std()
    count2count_list['Mean of CGM data'].append(glucose_mean)
    count2count_list['SD of CGM data'].append(glucose_std)
    
    count2count_list['No. of CGM records'].append(len(pid2regular_df[pid]['glucose_level']))
 
    count2count_list['CV'].append((glucose_std / glucose_mean) * 100)

    target_range_min = 70
    target_range_max = 180
    
    glucose_values = pid2regular_df[pid]['glucose_level']
    time_in_range = ((glucose_values >= target_range_min) & (glucose_values <= target_range_max)).sum()
    total_time_points = len(glucose_values)
    tir_percentage = (time_in_range / total_time_points) * 100
    count2count_list['TIR'].append(tir_percentage)
    
    
    time_below_range = ((glucose_values < target_range_min)).sum()
    tbr_percentage = (time_below_range / total_time_points) * 100
    count2count_list['TBR'].append(tbr_percentage)
    
    time_above_range = ((glucose_values > target_range_max)).sum()
    tar_percentage = (time_above_range / total_time_points) * 100
    count2count_list['TAR'].append(tar_percentage)
    
    LBGI = pid2regular_df[pid]['glucose_level'].apply(cal_LBGI)
    HBGI = pid2regular_df[pid]['glucose_level'].apply(cal_HBGI)

    count2count_list['LBGI'].append(LBGI.mean())
    count2count_list['HBGI'].append(HBGI.mean())
    
for k in count2count_list:

   print(k, np.mean(count2count_list[k]), np.std(count2count_list[k]))

import xlwt

book = xlwt.Workbook(encoding='utf-8', style_compression=0)
sheet = book.add_sheet('Main', cell_overwrite_ok=True)

row_names = ['No. of Days', 'No. of CGM records', 'Mean of CGM data', 'SD of CGM data', 'TIR', 'TBR','TAR', 'CV', 'LBGI', 'HBGI'] 


for r_idx, r_name in enumerate(row_names):
    sheet.write(r_idx + 3, 0, r_name)
    
    if 'No' not in r_name:
        print_str = f'{np.mean(count2count_list[r_name]):.2f}({np.std(count2count_list[r_name]):.2f})'
    else:
        print_str = f'{int(np.mean(count2count_list[r_name]))}({int(np.std(count2count_list[r_name]))})'
    sheet.write(r_idx + 3, 1, print_str)
book.save(f'abc4d.xls')

No. of Days 168.32 14.248424474305923
CV 38.40417339822355 6.22260719122595
TIR 62.53531551446589 15.57568882718213
TBR 6.013288973809913 4.1280607336301465
TAR 31.451395511724193 15.654518442093057
LBGI 1.7332229287787677 1.0229794276365125
HBGI 7.263246858249318 3.845223232750552
Mean of CGM data 156.6635061278651 24.236207136944014
SD of CGM data 60.52448594838872 14.47238096835106
No. of CGM records 43259.6 5460.833145226102
