In [1]:
import argparse
import json
import os

import numpy as np
from cgms_data_seg import CGMSDataSeg
from cnn_ohio import regressor, regressor_transfer, test_ckpt
from data_reader import DataReader

In [2]:
# Another option --epoch 150 --prediction_horizon 12
epoch = 10
ph = 6
path = "../ohio_results"

In [None]:
import xml.etree.ElementTree as ET
import pandas as pd

# Load and parse the XML file
tree = ET.parse(r'C:\Users\username\OneDrive\Desktop\BGprediction\OhioT1DM\2018\train\559-ws-training.xml')
root = tree.getroot()

# Create a list to hold the data
data = []

# Initialize a list to hold all event data
data = []


# Loop through each glucose_level element in the patient element
for glucose_level in root.findall('glucose_level'):
    for event in glucose_level.findall('event'):
        # Create a dictionary for each event
        event_data = {
            'ts': event.get('ts'),       # Get timestamp
            'value': float(event.get('value')),  # Get glucose value
            'patient_id': root.get('id'),        # Get patient ID from root
            'weight': float(root.get('weight')),  # Get weight from root
            'insulin_type': root.get('insulin_type')  # Get insulin type from root
        }
        # Add the dictionary to our data list
        data.append(event_data)


# Create a DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
df


In [4]:
# Before the loop
# ATTENTION: verify the \ or / in different system window or unix
# read in all patients data
pid_2018 = [559, 563, 570, 588, 575, 591]
# pid_2020 = [540, 552, 544, 567, 584, 596]
pid_year = {2018: pid_2018}
# pid_year = {2018: pid_2018, 2020: pid_2020}

train_data = dict()
for year in list(pid_year.keys()):
    pids = pid_year[year]
    for pid in pids:
        reader = DataReader(
            "ohio", f"C:/Users/username/OneDrive/Desktop/BGprediction/OhioT1DM/{year}/train/{pid}-ws-training.xml", 5
        )
        train_data[pid] = reader.read()
# add test data of 2018 patient
use_2018_test = False
standard = False  # do not use standard
test_data_2018 = []
for pid in pid_2018:
    reader = DataReader(
        "ohio", f"C:/Users/username/OneDrive/Desktop/BGprediction/OhioT1DM/2018/test/{pid}-ws-testing.xml", 5
    )
    test_data_2018 += reader.read()

# a dumb dataset instance
train_dataset = CGMSDataSeg(
    "ohio", "C:/Users/username/OneDrive/Desktop/BGprediction/OhioT1DM/2018/train/559-ws-training.xml", 5
)
sampling_horizon = 12
prediction_horizon = ph
scale = 0.01
outtype = "Same"
# train on training dataset
# k_size, nblock, nn_size, nn_layer, learning_rate, batch_size, epoch, beta
with open(os.path.join(path, "config.json")) as json_file:
    config = json.load(json_file)
argv = (
    config["k_size"],
    config["nblock"],
    config["nn_size"],
    config["nn_layer"],
    config["learning_rate"],
    config["batch_size"],
    epoch,
    config["beta"],
)
l_type = config["loss"]
# test on patients data
outdir = os.path.join(path, f"ph_{prediction_horizon}_{l_type}")
if not os.path.exists(outdir):
    os.makedirs(outdir)
all_errs = []

Reading 44 segments


In [None]:
# Loop
use_2018_test = False
all_errs = []
for pid in pid_2018:
    train_pids = set(pid_2018) - set([pid])
    local_train_data = []
    if use_2018_test:
        local_train_data += test_data_2018
    for k in train_pids:
        local_train_data += train_data[k]
    print(f"Pretrain data: {sum([sum(x) for x in local_train_data])}")


    train_dataset.data = local_train_data
    train_dataset.set_cutpoint = -1
    train_dataset.reset(
        sampling_horizon,
        prediction_horizon,
        scale,
        100,
        False,
        outtype,
        1,
        standard,
    )
    regressor(train_dataset, *argv, l_type, outdir)
    # Fine-tune and test
    target_test_dataset = CGMSDataSeg(
        "ohio", f"C:/Users/username/OneDrive/Desktop/BGprediction/OhioT1DM/2018/test/{pid}-ws-testing.xml",5
    )
    target_test_dataset.set_cutpoint = 1
    target_test_dataset.reset(
        sampling_horizon,
        prediction_horizon,
        scale,
        0.01,
        False,
        outtype,
        1,
        standard,
    )
    target_train_dataset = CGMSDataSeg(
        "ohio", f"C:/Users/username/OneDrive/Desktop/BGprediction/OhioT1DM/2018/test/{pid}-ws-testing.xml", 5
    )
    target_train_dataset.set_cutpoint = -1
    target_train_dataset.reset(
        sampling_horizon,
        prediction_horizon,
        scale,
        100,
        False,
        outtype,
        1,
        standard,
    )
    err, labels = test_ckpt(target_test_dataset, outdir)
    errs = [err]
    transfer_res = [labels]
    for i in range(1, 2):
        err, labels = regressor_transfer(
            target_train_dataset,
            target_test_dataset,
            config["batch_size"],
            epoch,
            outdir,
            i,
        )
        errs.append(err)
        transfer_res.append(labels)
    transfer_res = np.concatenate(transfer_res, axis=1)
    np.savetxt(
        f"{outdir}/{pid}.txt",
        transfer_res,
        fmt="%.4f %.4f %.4f %.4f",
    )
    all_errs.append([pid] + errs)
all_errs = np.array(all_errs)
np.savetxt(f"{outdir}/errors.txt", all_errs, fmt="%d %.4f %.4f")
# label pair:(groundTruth, y_pred)

In [6]:
all_errs

array([[5.59000000e+02, 1.96096092e-01, 1.95441991e-01],
       [5.63000000e+02, 1.86664909e-01, 1.89901307e-01],
       [5.70000000e+02, 1.64349169e-01, 1.58961207e-01],
       [5.88000000e+02, 1.89570844e-01, 1.85944960e-01],
       [5.75000000e+02, 2.38005698e-01, 2.33785048e-01],
       [5.91000000e+02, 2.12123454e-01, 2.10404932e-01]])

In [7]:
# Convert the second and third columns to floats
second_column = all_errs[:, 1].astype(float)
third_column = all_errs[:, 2].astype(float)

# Calculate the average
average_second_column = np.mean(second_column)
average_third_column = np.mean(third_column)

print("Average of the second column:", average_second_column)
print("Average of the third column:", average_third_column)

Average of the second column: 0.19780169427394867
Average of the third column: 0.19573990752299628


In [7]:
# Convert the second and third columns to floats
second_column = all_errs[:, 1].astype(float)
third_column = all_errs[:, 2].astype(float)

# Calculate the average
average_second_column = np.mean(second_column)
average_third_column = np.mean(third_column)

print("Average of the second column:", average_second_column)
print("Average of the third column:", average_third_column)

Average of the second column: 0.20091557254393896
Average of the third column: 0.19819297641515732


In [16]:
target_train_dataset

<cgms_data_seg.CGMSDataSeg at 0x26982710f10>

# Look at it before fine tuning

In [None]:
use_2018_test = False
all_errs = []
for pid in pid_2018:
    train_pids = set(pid_2018) - set([pid])
    local_train_data = []
    if use_2018_test:
        local_train_data += test_data_2018
    for k in train_pids:
        local_train_data += train_data[k]
    print(f"Pretrain data: {sum([sum(x) for x in local_train_data])}")

    train_dataset.data = local_train_data
    train_dataset.set_cutpoint = -1
    train_dataset.reset(
        sampling_horizon,
        prediction_horizon,
        scale,
        100,
        False,
        outtype,
        1,
        standard,
    )
    regressor(train_dataset, *argv, l_type, outdir)

    # Test before fine-tuning
    target_test_dataset = CGMSDataSeg(
        "ohio", f"C:/Users/username/OneDrive/Desktop/BGprediction/OhioT1DM/2018/test/{pid}-ws-testing.xml", 5
    )
    target_test_dataset.set_cutpoint = 1
    target_test_dataset.reset(
        sampling_horizon,
        prediction_horizon,
        scale,
        0.01,
        False,
        outtype,
        1,
        standard,
    )

    # Record error before fine-tuning
    pre_fine_tune_err, pre_fine_tune_labels = test_ckpt(target_test_dataset, outdir)
    errs = [pre_fine_tune_err]  # Initialize the error list with the error before fine-tuning
    transfer_res = [pre_fine_tune_labels]

    # Fine-tune and test
    target_train_dataset = CGMSDataSeg(
        "ohio", f"C:/Users/username/OneDrive/Desktop/BGprediction/OhioT1DM/2018/train/{pid}-ws-training.xml", 5
    )
    target_train_dataset.set_cutpoint = -1
    target_train_dataset.reset(
        sampling_horizon,
        prediction_horizon,
        scale,
        100,
        False,
        outtype,
        1,
        standard,
    )

    for i in range(1, 2):
        err, labels = regressor_transfer(
            target_train_dataset,
            target_test_dataset,
            config["batch_size"],
            epoch,
            outdir,
            i,
        )
        errs.append(err)
        transfer_res.append(labels)

    transfer_res = np.concatenate(transfer_res, axis=1)
    np.savetxt(
        f"{outdir}/{pid}.txt",
        transfer_res,
        fmt="%.4f %.4f %.4f %.4f",
    )
    all_errs.append([pid] + errs)

all_errs = np.array(all_errs)
np.savetxt(f"{outdir}/no_fine_tune_errors.txt", all_errs, fmt="%d %.4f %.4f")


In [12]:
all_errs

array([[5.59000000e+02, 2.07258314e-01, 2.07258314e-01, 1.96519971e-01],
       [5.63000000e+02, 1.88313693e-01, 1.88313693e-01, 1.96036845e-01],
       [5.70000000e+02, 2.06858590e-01, 2.06858590e-01, 1.70068905e-01],
       [5.88000000e+02, 1.91546440e-01, 1.91546440e-01, 1.88959986e-01],
       [5.75000000e+02, 2.41410896e-01, 2.41410896e-01, 2.40480691e-01],
       [5.91000000e+02, 2.20461741e-01, 2.20461741e-01, 2.15062574e-01]])

In [7]:
# Convert the second and third columns to floats
second_column = all_errs[:, 1].astype(float)
third_column = all_errs[:, 2].astype(float)

# Calculate the average
average_second_column = np.mean(second_column)
average_third_column = np.mean(third_column)

print("Average of the second column:", average_second_column)
print("Average of the third column:", average_third_column)

Average of the second column: 0.20499620338280997
Average of the third column: 0.19652444124221802


# Specifically looking at the input

In [4]:
# Before the loop
# ATTENTION: verify the \ or / in different system window or unix
# read in all patients data
pid_2018 = [559, 563] # , 570, 588, 575, 591
# pid_2020 = [540, 552, 544, 567, 584, 596]
pid_year = {2018: pid_2018}
# pid_year = {2018: pid_2018, 2020: pid_2020}

train_data = dict()
for year in list(pid_year.keys()):
    pids = pid_year[year]
    for pid in pids:
        reader = DataReader(
            "ohio", f"C:/Users/username/OneDrive/Desktop/BGprediction/OhioT1DM/{year}/train/{pid}-ws-training.xml", 6
        )
        train_data[pid] = reader.read()
# add test data of 2018 patient
use_2018_test = False
standard = False  # do not use standard
test_data_2018 = []
for pid in pid_2018:
    reader = DataReader(
        "ohio", f"C:/Users/username/OneDrive/Desktop/BGprediction/OhioT1DM/2018/test/{pid}-ws-testing.xml", 6
    )
    test_data_2018 += reader.read()

# a dumb dataset instance
train_dataset = CGMSDataSeg(
    "ohio", "C:/Users/username/OneDrive/Desktop/BGprediction/OhioT1DM/2018/train/559-ws-training.xml", 6
)
sampling_horizon = 6
prediction_horizon = ph
scale = 0.01
outtype = "Same"
# train on training dataset
# k_size, nblock, nn_size, nn_layer, learning_rate, batch_size, epoch, beta
with open(os.path.join(path, "config.json")) as json_file:
    config = json.load(json_file)
argv = (
    config["k_size"],
    config["nblock"],
    config["nn_size"],
    config["nn_layer"],
    config["learning_rate"],
    config["batch_size"],
    epoch,
    config["beta"],
)
l_type = config["loss"]
# test on patients data
outdir = os.path.join(path, f"ph_{prediction_horizon}_{l_type}")
if not os.path.exists(outdir):
    os.makedirs(outdir)
all_errs = []

Reading 43 segments


In [7]:
train_dataset

<cgms_data_seg.CGMSDataSeg at 0x243ff637a90>

In [None]:
# Loop

all_errs = []
for pid in pid_2018:
    print(pid)
    train_pids = set(pid_2018) - set([pid])
    local_train_data = []
    if use_2018_test:
        local_train_data += test_data_2018
    for k in train_pids:
        local_train_data += train_data[k]
    print(f"Pretrain data: {sum([sum(x) for x in local_train_data])}")

    train_dataset.data = local_train_data
    train_dataset.set_cutpoint = -1
    train_dataset.reset(
        sampling_horizon,
        prediction_horizon,
        scale,
        100,
        False,
        outtype,
        1,
        standard,
    )
    regressor(train_dataset, *argv, l_type, outdir)
    # Fine-tune and test
    target_test_dataset = CGMSDataSeg(
        "ohio", f"C:/Users/username/OneDrive/Desktop/BGprediction/OhioT1DM/2018/test/{pid}-ws-testing.xml",6
    )
    target_test_dataset.set_cutpoint = 1
    target_test_dataset.reset(
        sampling_horizon,
        prediction_horizon,
        scale,
        0.01,
        False,
        outtype,
        1,
        standard,
    )
    target_train_dataset = CGMSDataSeg(
        "ohio", f"C:/Users/username/OneDrive/Desktop/BGprediction/OhioT1DM/2018/train/{pid}-ws-training.xml", 5
    )
    target_train_dataset.set_cutpoint = -1
    target_train_dataset.reset(
        sampling_horizon,
        prediction_horizon,
        scale,
        100,
        False,
        outtype,
        1,
        standard,
    )
    err, labels = test_ckpt(target_test_dataset, outdir)
    errs = [err]
    transfer_res = [labels]
    for i in range(1, 2): 
        err, labels = regressor_transfer(
            target_train_dataset,
            target_test_dataset,
            config["batch_size"],
            epoch,
            outdir,
            i,
        )
        errs.append(err)
        transfer_res.append(labels)
    transfer_res = np.concatenate(transfer_res, axis=1)
    # np.savetxt(
    #     f"{outdir}/{pid}.txt",
    #     transfer_res,
    #     fmt="%.4f %.4f %.4f %.4f",
    # )
    all_errs.append([pid] + errs)
all_errs = np.array(all_errs)

# The first error represents the output after fine-tuned, the second error represents the output from transfer learning

# np.savetxt(f"{outdir}/errors.txt", all_errs, fmt="%d %.4f %.4f")
# label pair:(groundTruth, y_pred)

In [6]:
all_errs

array([[5.59000000e+02, 2.08077967e-01, 2.08703712e-01],
       [5.63000000e+02, 1.90867111e-01, 1.93375528e-01]])

In [23]:
all_errs

array([[5.59000000e+02, 3.16456079e-01, 2.21673578e-01]])

In [None]:
target_train_dataset.data