We'll use a Long Short-Term Memory (LSTM) network, which is well-suited for time series prediction tasks like this one.

In [3]:
import numpy as np
import pandas as pd
import os
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam


In [5]:
def load_and_preprocess_data(folder_path, sequence_length=12, prediction_horizon=6):
    all_data = []
    
    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):
            file_path = os.path.join(folder_path, filename)
            df = pd.read_csv(file_path)
            
            features = ['cbg', 'finger', 'basal', 'hr', 'gsr', 'carbInput', 'bolus']
            data = df[features].values
            
            # Check for and remove any rows with NaN or infinite values
            data = data[~np.isnan(data).any(axis=1)]
            data = data[~np.isinf(data).any(axis=1)]
            
            all_data.append(data)
    
    combined_data = np.concatenate(all_data, axis=0)
    
    scaler = MinMaxScaler(feature_range=(-1, 1))  # Change scale to (-1, 1)
    data_scaled = scaler.fit_transform(combined_data)
    
    X, y = [], []
    for i in range(len(data_scaled) - sequence_length - prediction_horizon):
        X.append(data_scaled[i:(i + sequence_length)])
        y.append(data_scaled[i + sequence_length + prediction_horizon, 0])
    
    return np.array(X), np.array(y), scaler

def create_model(input_shape):
    model = Sequential([
        LSTM(64, activation='tanh', input_shape=input_shape, return_sequences=True),
        BatchNormalization(),
        LSTM(32, activation='tanh'),
        BatchNormalization(),
        Dense(16, activation='relu'),
        Dense(1, activation='linear')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001, clipnorm=1.0), loss='mse')
    return model

def train_model(X_train, y_train, X_val, y_val, epochs=20, batch_size=32):
    model = create_model((X_train.shape[1], X_train.shape[2]))
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size,
                        validation_data=(X_val, y_val), verbose=1, callbacks=[early_stopping])
    return model, history


In [9]:
# Load and preprocess the data
train_folder = r'C:\Users\alessia\Documents\00professional\school\04_unibe\semester3\diabetes_management\Ohio Data\Ohio2018_processed\train'
# Split the training data into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.2, random_state=42)

# Train the model
model, history = train_model(X_train, y_train, X_val, y_val)

NameError: name 'BatchNormalization' is not defined

In [8]:

# Evaluate the model on the validation set
val_loss = model.evaluate(X_val, y_val)
print(f"Validation Loss: {val_loss}")

NameError: name 'BatchNormalization' is not defined

In [1]:
# Evaluate the model
val_loss = model.evaluate(X_val, y_val)
print(f"Validation Loss: {val_loss}")

NameError: name 'model' is not defined

In [None]:
# Function to simulate finger stick samples
def simulate_finger_stick_samples(X, n_samples):
    mask = np.zeros_like(X)
    mask[:, ::n_samples, 0] = 1  # Set every nth CGM value to 1
    return X * mask

# Experiment with different numbers of finger stick samples
for n_samples in [4, 6, 8, 12]:
    X_sampled = simulate_finger_stick_samples(X, n_samples)
    X_train_sampled, X_val_sampled, _, _ = train_test_split(X_sampled, y, test_size=0.2, random_state=42)
    
    model_sampled, _ = train_model(X_train_sampled, y_train, X_val_sampled, y_val)
    val_loss_sampled = model_sampled.evaluate(X_val_sampled, y_val)
    print(f"Validation Loss with {n_samples} finger stick samples: {val_loss_sampled}")

# Note: For testing, you would use a similar approach with the test folder
# test_folder = 'data/test'
# X_test, y_test, _ = load_and_preprocess_data(test_folder)
# test_loss = model.evaluate(X_test, y_test)
# print(f"Test Loss: {test_loss}")