# Continuous Glucose Monitoring

This notebook serves as an introduction to working with the CGM data in mHealth format. It will open one file and make a few plots.

Information on this format may be found at https://github.com/openmhealth 


File organization is expected to follow this pattern:

pilot_data_root           
&emsp;wearable_blood_glucose    
&emsp;&emsp;manifest.tsv    
&emsp;&emsp;continuous_glucose_monitoring    
&emsp;&emsp;&emsp;dexcom_g6    
&emsp;&emsp;&emsp;&emsp;0001    
&emsp;&emsp;&emsp;&emsp;&emsp;0001_DEX.json     
&emsp;&emsp;&emsp;&emsp;0002    
&emsp;&emsp;&emsp;&emsp;&emsp;0002_DEX.json   
&emsp;&emsp;&emsp;&emsp;... etc.

In [None]:
import json
from datetime import datetime, timedelta

import matplotlib.dates as mdates  # to use ConciseDateFormatter
import matplotlib.pyplot as plt  # to make plots
import pandas as pd

In [None]:
print(f"pd version: {pd.__version__}")  # pd 2.1.4 was used for dev

## custom path -- change to match your file structure

In [None]:
data_root = "/Volumes/data/datasets/AIREADI/YEAR2/"  # change this to your own path

# Read the manifest

In [None]:
manifest_path = data_root + "wearable_blood_glucose/manifest.tsv"
print(manifest_path)

In [None]:
dfm = pd.read_csv(manifest_path, sep="\t")
print(dfm.columns)

In [None]:
dfm["participant_id"].nunique()  # number of unique participants

In [None]:
dfm.head()

In [None]:
key_columns = [
    "participant_id",
    "average_glucose_level_mg_dl",
    "glucose_sensor_sampling_duration_days",
]  # optionally view only a few columns

dfm[key_columns].head(2)

# Select a set of data to explore

In [None]:
pid = 1005  # select a participant ID

pid_cgm = dfm[dfm["participant_id"] == pid]["glucose_filepath"].values[0]
print(f"{pid} full path: {pid_cgm}")

In [None]:
cgm_path = data_root + pid_cgm
print(cgm_path)

## read and explore the data

### open the mHealth formatted data

In [None]:
# read the mHealth formatted data as json
with open(cgm_path, "r") as f:
    data = json.load(f)
print(data.keys())

In [None]:
def flatten_json(y):
    out = {}

    def flatten(x, name=""):
        print(f'type(x) is {type(x)}')
        if type(x) is dict:
            for a in x:
                flatten(x[a], name + a + "_")
        elif type(x) is list:
            i = 0
            for a in x:
                flatten(a, name + str(i) + "_")
                i += 1
        else:
            out[name[:-1]] = x

    flatten(y)
    return out

In [None]:
cgm_header = flatten_json(data["header"])
print(cgm_header)

In [None]:
# CGM observations are in a list of nested dicts; flatten these
list_of_body_dicts = list()
for observation in data["body"]["cgm"]:
    flat_obs = flatten_json(observation)
    list_of_body_dicts.append(flat_obs)

### convert to pandas data frame

In [None]:
df = pd.DataFrame.from_records(list_of_body_dicts)
print(df.shape)

In [None]:
print(f"CGM columns: {df.columns}")

In [None]:
df.rename(
    columns={
        "effective_time_frame_time_interval_start_date_time": "start_time",
        "effective_time_frame_time_interval_end_date_time": "end_time",
    },
    inplace=True,
)

In [None]:
def convert_time_string_to_datetime(t_str):
    """Converts time string to datetime format. Does not convert to local time.
    Args:
        t_str (str): UTC time string such as 2023-08-01T20:39:33Z
    Returns: datetime object
    """
    datetime_object = datetime.strptime(t_str, "%Y-%m-%dT%H:%M:%SZ")  # 4 digit Year
    return datetime_object

In [None]:
df.columns

In [None]:
df["start_dtime"] = df.apply(
    lambda row: convert_time_string_to_datetime(row["start_time"]), axis=1)

In [None]:
df.info()  # note that blood_glucose_value may be object; int is expected

In [None]:
df.head()

In [None]:
mask_bg_int_only = ~df["blood_glucose_value"].isin(["Low", "High"])
bg_min = df[mask_bg_int_only]["blood_glucose_value"].min()
bg_max = df[mask_bg_int_only]["blood_glucose_value"].max()
print(f"Lowest blood_glucose_value: {bg_min}")
print(f"Highest blood_glucose_value: {bg_max}")

## plot blood glucose values

### handle non-numeric blood glucose values

Most values are integers, but some may be either "High" or "Low".     
The sensor reports    
 - <  40 "Low"     
 - \> 400 "High"
   
The terms "Low" and "High" can be replaced with these values, or more extreme values could be used (0, 500) for visualization purposes.

In [None]:
# df['blood_glucose_value'].value_counts()  # peek at the value distribution
low_value = 40  # select a value to use for low
high_value = 400  # select a value to use for high

if 0:
    df.replace(
        {"Low": low_value, "High": high_value}, inplace=True
    )  # notebook dev used pd 2.1.4
else:  # to avoid the FutureWarning regarding Downcasting behavior in 'replace'

    def replace_alt(val, low_value, high_value):
        if val == "Low":
            return low_value
        elif val == "High":
            return high_value
        else:
            return val

    df["blood_glucose_value"] = df.apply(
        lambda x: replace_alt(x["blood_glucose_value"], low_value, high_value), axis=1
    )

In [None]:
# df.info()  # optional view of the Dtype for each column of data; blood glucose should be int

In [None]:
yaxis_adjusts_to_data = True

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(8, 6))
fig.suptitle(
    f"Blood glucose vs time. Participant range: [{bg_min}, {bg_max}]\n Alerts for Low, High are mapped to {low_value}, {high_value}\n"
)

df.plot.line(
    x="transmitter_time_value", y="blood_glucose_value", ax=ax[0], legend=False
)
ax[0].set_title("Blood Glucose value vs time index")

df.plot.line(x="start_dtime", y="blood_glucose_value", ax=ax[1], legend=False)
ax[1].set_title("Blood Glucose value vs. timestamp")
if yaxis_adjusts_to_data:
    ax[1].set_title("Blood glucose - yaxis scales with data")
else:
    ax[1].set_title("Blood glucose - yaxis [0, 505]")
    ax[1].set_ylim(0, 505)
ax[1].xaxis.set_major_locator(mdates.HourLocator(byhour=12))  # each day at noon
ax[1].xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d %H:%M"))

# plt.tick_params(
#     axis='x',          # changes apply to the x-axis
#     which='both',      # both major and minor ticks are affected
#     bottom=True,       # set ticks along the bottom edge on
#     top=False,         # set ticks along the top edge off
#     labelbottom=True)  # set labels along the bottom edge on

# plt.xlabel('timestamp')
plt.xticks(rotation=45, ha="right")
plt.tight_layout()

In [None]:
print("Done")