# Heatmaps hourly glucose per person

Here I am exploring the median and the standard deviation of the glucose values in each hour of the day per person. I will look at PNP3, T2D and 10K cohort.

## PNP3

In [1]:
import pandas as pd
from LabData.DataLoaders.CGMLoader import CGMLoader
from LabData.DataLoaders.DietLoggingLoader import DietLoggingLoader

In [2]:
import datetime
%matplotlib inline

In [3]:
import plotly
import plotly.graph_objects as go

In [4]:
cgml = CGMLoader()
dll = DietLoggingLoader()

In [5]:
cgmdf = cgml.get_data(study_ids=[3,49]).df

In [6]:
cgmdf = cgml._remove_first_day_of_connections(cgmdf)

In [7]:
cgmdf = cgmdf.reset_index()

In [8]:
cgmdf['hour'] = cgmdf.Date.dt.hour

In [9]:
cgmdf = cgmdf.set_index('Date')

In [10]:
cgmdf.index = cgmdf.index.tz_localize(None)

Processing of the file from Nastya

```python
adj_gluc = pd.read_json('/net/mraid08/export/genie/LabData/Data/WIS/adjusted_glucose.json')
adj_gluc = adj_gluc.reset_index()
adj_gluc['index'] = adj_gluc['index'].str.strip('[]')
adj_gluc[['UserID', 'ConnectionID', 'Date']] = adj_gluc['index'].str.split(",", n = 2, expand = True)
adj_gluc = adj_gluc.drop(columns='index')
adj_gluc['Date'] = pd.to_numeric(adj_gluc['Date'])
adj_gluc['Date'] = pd.to_datetime(adj_gluc['Date'], unit='ms')
adj_gluc['ConnectionID'] = adj_gluc['ConnectionID'].astype(str)
adj_gluc['GlucoseAdj50N13_Mm'] = adj_gluc['GlucoseAdj50N13_Mm'].round(1)
adj_gluc = adj_gluc.set_index(['UserID', 'ConnectionID', 'Date']).reset_index()
adj_gluc.to_csv('/home/elming/Cache/PNP3_cgm.csv', index=False)
```

In [46]:
adj_gluc = pd.read_csv('/home/elming/Cache/PNP3_cgm.csv')

In [47]:
adj_gluc['ConnectionID'] = adj_gluc['ConnectionID'].astype(str)
adj_gluc['Date'] = pd.to_datetime(adj_gluc['Date'])

In [206]:
adj_gluc.head()

Unnamed: 0,UserID,ConnectionID,Date,GlucoseAdj50N13_M,GlucoseAdj50N13_Mm,GlucoseValue,PPGR,PPGRMin,PPGRMax,DaysFromT0
0,1181,1407,2017-02-09 11:44:00,102.0,104.3,97,15.0,96.285714,130.285714,-24
1,1181,1407,2017-02-09 11:59:00,102.0,104.3,97,14.25,96.285714,130.285714,-24
2,1181,1407,2017-02-09 12:14:00,112.0,114.3,107,0.0,96.285714,130.285714,-24
3,1181,1407,2017-02-09 12:29:00,124.0,126.3,119,0.0,96.285714,130.285714,-24
4,1181,1407,2017-02-09 12:44:00,128.0,130.3,123,0.0,96.285714,138.285714,-24


In [207]:
adj_gluc = adj_gluc.set_index(['ConnectionID', 'Date'])

In [208]:
cgm_adj = pd.merge(cgmdf, adj_gluc['GlucoseAdj50N13_Mm'], on=['ConnectionID', 'Date'])

In [209]:
cgm_adj = cgm_adj.rename(columns={'GlucoseAdj50N13_Mm':'GlucoseAdj'})

In [211]:
cgm_adj.head()

Unnamed: 0_level_0,RegistrationCode,ConnectionID,GlucoseValue,PPGR,hour,GlucoseAdj
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-11-07 14:28:00,111527,1926,108.0,16.75,14,106.6
2017-11-07 14:43:00,111527,1926,118.0,1.25,14,116.6
2017-11-07 14:58:00,111527,1926,119.0,2.75,14,117.6
2017-11-07 15:13:00,111527,1926,119.0,5.5,15,117.6
2017-11-07 15:28:00,111527,1926,121.0,3.25,15,119.6


In [55]:
median = pd.DataFrame(cgm_adj.groupby(['RegistrationCode','hour'], as_index=True)['GlucoseAdj'].median().rename('median'))

In [56]:
std = pd.DataFrame(cgm_adj.groupby(['RegistrationCode','hour'], as_index=True)['GlucoseAdj'].std().rename('std'))

In [40]:
median['median'] = median['median'].round(1)
std['std'] = std['std'].round(1)

In [30]:
def minmax(df):
    from sklearn.preprocessing import MinMaxScaler
    scaler = MinMaxScaler()
    tmp = df.unstack(level=0)
    tmp.columns =tmp.columns.droplevel(0)
    tmp.columns.name = None
    tmp = pd.DataFrame(scaler.fit_transform(tmp), columns=tmp.columns, index = tmp.index)
    tmp = tmp.stack(level=-1).unstack(level=0)
    tmp.index.name = 'RegistrationCode'
    tmp.columns.name = None
    return tmp

In [57]:
median_scaled = minmax(median)
std_scaled = minmax(std)

In [61]:
median = median.unstack(level=-1)
std = std.unstack(level=-1)

In [62]:
median.columns =median.columns.droplevel(0)
std.columns =std.columns.droplevel(0)

In [63]:
median.columns.name = None
std.columns.name = None

In [64]:
rc_list = median.index

In [65]:
rc_list = ['rc' + item for item in rc_list]

In [66]:
hours_list = median.columns

In [67]:
hours_list = ['h' + str(item) for item in hours_list]

In [68]:
medians_list = median.values.tolist()
std_list = std.values.tolist()
medians_scaled_list = median_scaled.values.tolist()
std_scaled_list = std_scaled.values.tolist()

In [71]:
fig = go.Figure(data=go.Heatmap(
                   z=medians_list,
                   x=hours_list,
                   y=rc_list,
                   colorscale = 'Portland'))
fig.update_layout(width = 650, height = 800,
    autosize = False )
fig.update_layout(
    title={
        'text': "PNP3 median glucose",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    xaxis_title="hour of the day",
    yaxis_title="RegistrationCode"
    )
fig.write_html("/home/elming/Cache/plotly_figures/PNP3_median_glucose_unsc.html")

In [72]:
fig = go.Figure(data=go.Heatmap(
                   z=medians_scaled_list,
                   x=hours_list,
                   y=rc_list,
                   colorscale = 'Viridis'))
fig.update_layout(width = 650, height = 800,
    autosize = False )
fig.update_layout(
    title={
        'text': "PNP3 median glucose minmax scaled",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    xaxis_title="hour of the day",
    yaxis_title="RegistrationCode"
    )
fig.write_html("/home/elming/Cache/plotly_figures/PNP3_median_glucose_sc.html")

In [73]:
fig = go.Figure(data=go.Heatmap(
                   z=std_list,
                   x=hours_list,
                   y=rc_list,
                   colorscale = 'Portland'))
fig.update_layout(width = 650, height = 800,
    autosize = False )
fig.update_layout(
    title={
        'text': "PNP3 glucose std",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    xaxis_title="hour of the day",
    yaxis_title="RegistrationCode"
    )
fig.write_html("/home/elming/Cache/plotly_figures/PNP3_glucose_std_unsc.html")

In [74]:
fig = go.Figure(data=go.Heatmap(
                   z=std_scaled_list,
                   x=hours_list,
                   y=rc_list,
                   colorscale = 'Viridis'))
fig.update_layout(width = 650, height = 800,
    autosize = False )
fig.update_layout(
    title={
        'text': "PNP3 glucose std minmax scaled",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    xaxis_title="hour of the day",
    yaxis_title="RegistrationCode"
    )
fig.write_html("/home/elming/Cache/plotly_figures/PNP3_glucose_std_sc.html")

### PNP3 scaled (divide by mean)

In [75]:
# mean_adj = pd.DataFrame(cgm_adj.groupby('RegistrationCode')['GlucoseAdj'].mean().rename('mean'))

# cgm_adj = pd.merge(cgm_adj, mean_adj, on='RegistrationCode')

# cgm_adj['gluc_scaled'] = cgm_adj['GlucoseAdj'] / cgm_adj['mean']


## T2D

I do this visualization on the unadjusted glucose values because otherwise it is a big gemor

In [11]:
cgmdf.head()

Unnamed: 0_level_0,RegistrationCode,ConnectionID,GlucoseValue,PPGR,hour
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-11-07 14:28:00,111527,1926,108.0,16.75,14
2017-11-07 14:43:00,111527,1926,118.0,1.25,14
2017-11-07 14:58:00,111527,1926,119.0,2.75,14
2017-11-07 15:13:00,111527,1926,119.0,5.5,15
2017-11-07 15:28:00,111527,1926,121.0,3.25,15


In [14]:
t2d_cid = ['1551', '1552', '1608', '1609', '1607', '1553', '1554', '1605', '1606', 
           '2134', '2135', '2136', '2137', '2227', '2228', '2229', '3132', '2997', 
           '3133', '3001', '3002', '3346', '3241', '3444', '3445', '3303', '3446', 
           '3302', '3393', '3510', '3395', '3511', '3394', '3509', '3507', '3508', 
           '4286', '4042', '4287', '4116', '4452', '4229', '4230', '4449', '4448', 
           '4451', '4450', '4194', '4195', '4453', '4718', '4541', '4950', '5102']

In [15]:
t2d_cgm = cgmdf[cgmdf['ConnectionID'].isin(t2d_cid)]

In [26]:
t2d_cgm = t2d_cgm.drop(columns='PPGR')

In [27]:
t2d_cgm[t2d_cgm.isna().any(axis=1)]

Unnamed: 0_level_0,RegistrationCode,ConnectionID,GlucoseValue,hour
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


In [16]:
median = pd.DataFrame(t2d_cgm.groupby(['RegistrationCode','hour'], as_index=True)['GlucoseValue'].median().rename('median'))

In [17]:
std = pd.DataFrame(t2d_cgm.groupby(['RegistrationCode','hour'], as_index=True)['GlucoseValue'].std().rename('std'))

In [18]:
median['median'] = median['median'].round(1)
std['std'] = std['std'].round(1)

In [31]:
median_scaled = minmax(median)
std_scaled = minmax(std)

In [32]:
median_scaled = minmax(median)
std_scaled = minmax(std)

In [33]:
median = median.unstack(level=-1)
std = std.unstack(level=-1)

In [34]:
median.columns =median.columns.droplevel(0)
std.columns =std.columns.droplevel(0)

In [35]:
median.columns.name = None
std.columns.name = None

In [36]:
rc_list = median.index

In [37]:
rc_list = ['rc' + item for item in rc_list]

In [38]:
hours_list = median.columns

In [39]:
hours_list = ['h' + str(item) for item in hours_list]

In [40]:
medians_list = median.values.tolist()
std_list = std.values.tolist()
medians_scaled_list = median_scaled.values.tolist()
std_scaled_list = std_scaled.values.tolist()

In [44]:
fig = go.Figure(data=go.Heatmap(
                   z=medians_scaled_list,
                   x=hours_list,
                   y=rc_list,
                   colorscale = 'Viridis'))
fig.update_layout(width = 650, height = 650,
    autosize = False )
fig.update_layout(
    title={
        'text': "T2D median glucose minmax scaled",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    xaxis_title="hour of the day",
    yaxis_title="Registration Code"
    )
fig.write_html("/home/elming/Cache/plotly_figures/T2D_median_glucose_sc.html")

In [45]:
fig = go.Figure(data=go.Heatmap(
                   z=std_scaled_list,
                   x=hours_list,
                   y=rc_list,
                   colorscale = 'Viridis'))
fig.update_layout(width = 650, height = 650,
    autosize = False )
fig.update_layout(
    title={
        'text': "T2D glucose std minmax scaled",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    xaxis_title="hour of the day",
    yaxis_title="Registration Code"
    )
fig.write_html("/home/elming/Cache/plotly_figures/T2D_glucose_std_sc.html")

In [214]:
set(t2d_cid) - set(t2d_cgm.index.get_level_values('ConnectionID').unique())

{'2136', '2137', '4194', '4229', '4230', '4452', '4541', '4718'}

{'2136', '2137', '4194', '4229', '4230', '4452', '4541', '4718'}
4194 is short connection, others are 3 people who only did T2D so they didn't have T0 that's why they don't appear in the Nastya's table. I still have them in cgmdf though and can do the adjustment separately using quantile 10. Here is the code:
```python
for userid in t2d_conns.keys():
    cgm_user = cgm[cgm['ConnectionID'].isin(user_conns[userid])][['UserID', 'ConnectionID', 'GlucoseTimestamp', 'Period_start',
                                                                  'GlucoseValue']]
for ind in cgm_user.loc[cgm_user['ConnectionID']==4042].loc[cgm_user['GlucoseTimestamp']>'2018-10-13 16:36:00'].index:
    cgm_user.loc[ind, 'GlucoseValue'] = cgm_user.loc[ind, 'GlucoseValue']-27

reg_code = cgm[cgm.UserID==userid]['RegistrationCode'].values[0]
quantile_10 = cgm_user.groupby('ConnectionID')['GlucoseValue'].quantile(0.10)
cgm_user['Glucose_adjusted'] = cgm_user['GlucoseValue']+cgm_user['ConnectionID'].apply(lambda x: quantile_10.mean() - quantile_10.loc[x])
```
In Lab data I don't have short connections: {'1552', '1606', '1609', '4194', '4229'}

## 10K 

In [1]:
import pandas as pd
from LabData.DataLoaders.CGMLoader import CGMLoader
from LabData.DataLoaders.DietLoggingLoader import DietLoggingLoader

In [2]:
import datetime
%matplotlib inline

In [35]:
import plotly.graph_objects as go

In [3]:
cgml = CGMLoader()
dll = DietLoggingLoader()

In [76]:
cgmdf = cgml.get_data(study_ids=10).df

In [151]:
cgmdf.index.get_level_values('RegistrationCode').nunique()

1165

In [152]:
cgmdf.shape

(1343966, 2)

In [77]:
cgmdf = cgml._remove_first_day_of_connections(cgmdf)

In [154]:
cgmdf.shape

(1231379, 2)

In [78]:
cgmdf = cgmdf.reset_index()

In [79]:
cgmdf['hour'] = cgmdf.Date.dt.hour

In [80]:
cgmdf.head()

Unnamed: 0,RegistrationCode,ConnectionID,Date,GlucoseValue,PPGR,hour
0,10K_1007599726,1195589702,2019-07-11 12:41:00+03:00,84.6,53.325,12
1,10K_1007599726,1195589702,2019-07-11 12:56:00+03:00,93.6,33.3,12
2,10K_1007599726,1195589702,2019-07-11 13:11:00+03:00,109.8,0.0,13
3,10K_1007599726,1195589702,2019-07-11 13:26:00+03:00,136.8,0.0,13
4,10K_1007599726,1195589702,2019-07-11 13:41:00+03:00,142.2,0.0,13


In [32]:
cgmdf.ConnectionID.nunique()

1159

In [68]:
# mins = pd.DataFrame(cgmdf.groupby('RegistrationCode')['GlucoseValue'].min().rename('min'))
# maxs = pd.DataFrame(cgmdf.groupby('RegistrationCode')['GlucoseValue'].max().rename('max'))
# cgmdf = pd.merge(cgmdf, mins, on='RegistrationCode')
# cgmdf = pd.merge(cgmdf, maxs, on='RegistrationCode')
# cgmdf['gluc_minmax_scaled'] = (cgmdf['GlucoseValue'] - cgmdf['min']) / (cgmdf['max'] - cgmdf['min'])

In [88]:
median = pd.DataFrame(cgmdf.groupby(['RegistrationCode','hour'], as_index=True)['GlucoseValue'].median().rename('median'))

In [89]:
std = pd.DataFrame(cgmdf.groupby(['RegistrationCode','hour'], as_index=True)['GlucoseValue'].std().rename('std'))

In [90]:
median['median'] = median['median'].round(1)
std['std'] = std['std'].round(1)

In [84]:
medians_scaled = minmax(median)
std_scaled = minmax(std)

In [91]:
median = median.unstack(level=-1)
std = std.unstack(level=-1)

median.columns =median.columns.droplevel(0)
std.columns =std.columns.droplevel(0)

median.columns.name = None
std.columns.name = None

In [97]:
rc_list = median.index

In [93]:
hours_list = median.columns

hours_list = ['h' + str(item) for item in hours_list]

In [94]:
medians_list = median.values.tolist()
std_list = std.values.tolist()

medians_scaled_list = medians_scaled.values.tolist()
std_scaled_list = std_scaled.values.tolist()

In [95]:
fig = go.Figure(data=go.Heatmap(
                   z=medians_list,
                   x=hours_list,
                   y=rc_list,
                   colorscale = 'Portland'))
fig.update_layout(width = 600, height = 900,
    autosize = False )
fig.update_layout(
    title={
        'text': "10K median glucose",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    xaxis_title="hour of the day",
    yaxis_title="Registration Code"
    )
fig.write_html("/home/elming/Cache/plotly_figures/10K_median_glucose_unsc.html")

In [98]:
fig = go.Figure(data=go.Heatmap(
                   z=medians_scaled_list,
                   x=hours_list,
                   y=rc_list,
                   colorscale = 'Viridis'))
fig.update_layout(width = 600, height = 900,
    autosize = False )
fig.update_layout(
    title={
        'text': "10K median glucose minmax scaled",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    xaxis_title="hour of the day",
    yaxis_title="Registration Code"
    )
fig.write_html("/home/elming/Cache/plotly_figures/10K_median_glucose_sc.html")

In [99]:
fig = go.Figure(data=go.Heatmap(
                   z=std_list,
                   x=hours_list,
                   y=rc_list,
                   colorscale = 'Portland'))
fig.update_layout(width = 600, height = 900,
    autosize = False )
fig.update_layout(
    title={
        'text': "10K glucose std",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    xaxis_title="hour of the day",
    yaxis_title="Registration Code"
    )
fig.write_html("/home/elming/Cache/plotly_figures/10K_glucose_std_unsc.html")

In [100]:
fig = go.Figure(data=go.Heatmap(
                   z=std_scaled_list,
                   x=hours_list,
                   y=rc_list,
                   colorscale = 'Viridis'))
fig.update_layout(width = 600, height = 900,
    autosize = False )
fig.update_layout(
    title={
        'text': "10K glucose std minmax scaled",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    xaxis_title="hour of the day",
    yaxis_title="Registration Code"
    )
fig.write_html("/home/elming/Cache/plotly_figures/10K_glucose_std_sc.html")