#### Surrogate Model

In [1]:
import pandas as pd
df = pd.read_csv('updated_glucose_predictions.csv')
print(df.shape)

(30724, 9)


In [2]:
# Features (X) and Target (y)
X = df.drop(columns=["Prediction"])
y = df["Prediction"]

# Display feature names
print("Features:", X.columns.tolist())


Features: ['Glucose_Level_1', 'Glucose_Level_2', 'Glucose_Level_3', 'Glucose_Level_4', 'Glucose_Level_5', 'Glucose_Level_6', 'HbA1C', 'Gender']


In [3]:
from sklearn.model_selection import train_test_split

# Split data: 80% training, 20% testing
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

print(f"Training samples: {X_train.shape[0]}")
print(f"Testing samples: {X_test.shape[0]}")


Training samples: 21506
Testing samples: 9218


In [4]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Initialize the model
surrogate_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
surrogate_model.fit(X_train, y_train)

# Predict on the test set
y_pred = surrogate_model.predict(X_test)

# Evaluate the model


In [6]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

y_train_pred = surrogate_model.predict(X_train)
y_test_pred = surrogate_model.predict(X_test)

# Calculate metrics for the training set
train_mse = mean_squared_error(y_train, y_train_pred)
train_mae = mean_absolute_error(y_train, y_train_pred)
train_r2 = r2_score(y_train, y_train_pred)

# Calculate metrics for the test set
test_mse = mean_squared_error(y_test, y_test_pred)
test_mae = mean_absolute_error(y_test, y_test_pred)
test_r2 = r2_score(y_test, y_test_pred)

# Print results
print("Training Set Performance:")
print(f"MSE: {train_mse:.3f}, MAE: {train_mae:.2f}, R2: {train_r2:.2f}")
print("\nTest Set Performance:")
print(f"MSE: {test_mse:.2f}, MAE: {test_mae:.2f}, R2: {test_r2:.2f}")

Training Set Performance:
MSE: 0.004, MAE: 0.03, R2: 1.00

Test Set Performance:
MSE: 0.04, MAE: 0.09, R2: 1.00


#### Interpreability


In [None]:
import shap

# Initialize the SHAP explainer
explainer = shap.TreeExplainer(surrogate_model)

# Compute SHAP values for the test set
shap_values = explainer.shap_values(X_test)


In [None]:
import matplotlib.pyplot as plt
# Summary plot
plt.figure(figsize=(10, 6))
shap.summary_plot(shap_values, X_test,feature_names=['Glucose_1', 'Glucose_2', 'Glucose_3',
       'Glucose_4', 'Glucose_5', 'Glucose_6', 'HbA1C',
       'Gender'])



In [None]:
import lime
from lime.lime_tabular import LimeTabularExplainer
import matplotlib.pyplot as plt

# Create a LIME explainer
explainer = LimeTabularExplainer(
    training_data=X_train.values,
    feature_names=X_train.columns,
    class_names=['Prediction'],
    mode='regression'
)

# Select instances to explain (e.g., first 5 instances in the test set)
num_instances = 3
instance_indices = range(num_instances)  # Adjust the range as needed

# Create a figure to hold multiple subplots
fig, axes = plt.subplots(nrows=num_instances, ncols=1, figsize=(8, 4 * num_instances))

for i, index in enumerate(instance_indices):
    instance = X_test.iloc[index].values  # Select the instance to explain

    # Get LIME explanation
    lime_explanation = explainer.explain_instance(
        instance,
        surrogate_model.predict,
        num_features=8
    )

    # Plot LIME explanation
    lime_explanation.as_pyplot_figure()
    plt.title(f'LIME Explanation for Instance {index}')
    plt.show()  # Show each plot sequentially
    lime_explanation.show_in_notebook(show_table=True, show_all=True)

# Close the plot if needed (optional)
plt.close()
