In [None]:
# Import libraries
import pandas as pd
import numpy as np

In [None]:
# Load data
glucose_data = pd.read_csv("glucose_monitor_data_100_patients.csv")
snomed_ct_data = pd.read_csv("snomed_ct_mapping.csv")

In [None]:
# Convert SNOMED CT data into a dictionary 
snomed_dict = dict(zip(snomed_ct_data["Variable"], snomed_ct_data["SNOMED_CT_Code"]))
snomed_dict

In [None]:
# change name from original dataset
glucose_data_mapped = glucose_data.copy()

In [None]:
# View first 5 rows
print(glucose_data_mapped.head())

In [None]:
# Apply SNOMED CT mapping to glucose monitoring data
glucose_data_mapped["Glucose_Level_SNOMED"] = snomed_dict.get("Glucose Level", None)
glucose_data_mapped["Meal_Status_SNOMED"] = snomed_dict.get("Meal Status", None)
glucose_data_mapped["Insulin_Dose_SNOMED"] = snomed_dict.get("Insulin Dose", None)
glucose_data_mapped["Exercise_Level_SNOMED"] = snomed_dict.get("Exercise Level", None)
glucose_data_mapped["HbA1c_SNOMED"] = snomed_dict.get("HbA1c", None)

In [None]:
# Save the mapped data
mappedGlucoseData = "glucose_monitor_data_mapped_python.csv"
glucose_data_mapped.to_csv(mappedGlucoseData, index=False)


# Data Viz

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

### Distribution / Histogram of Glucose Levels

In [None]:
plt.figure(figsize=(8, 5))
sns.histplot(glucose_data_mapped["Glucose_Level_mg_dL"], bins=20, kde=True)
plt.xlabel("Glucose Level (mg/dL)")
plt.ylabel("Frequency")
plt.title("Distribution of Glucose Levels")
plt.show()

### Boxplot of HbA1c Levels by Meal Status

In [None]:
plt.figure(figsize=(8, 5))
sns.boxplot(x=glucose_data_mapped["Meal_Status"], y=glucose_data_mapped["HbA1c_Percentage"])
plt.xlabel("Meal Status")
plt.ylabel("HbA1c (%)")
plt.title("HbA1c Levels by Meal Status")
plt.show()

### Time Series of Glucose Levels

In [None]:
glucose_data_mapped["Timestamp"] = pd.to_datetime(glucose_data_mapped["Timestamp"])
plt.figure(figsize=(10, 5))
sns.lineplot(x=glucose_data_mapped["Timestamp"], y=glucose_data_mapped["Glucose_Level_mg_dL"], marker="o")
plt.xlabel("Time")
plt.ylabel("Glucose Level (mg/dL)")
plt.title("Glucose Levels Over Time")
plt.xticks(rotation=45)
plt.show()

### Box Plot of Glucose Level vs. Exercise Level

In [None]:
# Label medians
# Box Plot of Glucose Level vs. Exercise Level with Median Values
plt.figure(figsize=(8, 5))
boxplot_data = [glucose_data_mapped[glucose_data_mapped["Exercise_Level"] == level]["Glucose_Level_mg_dL"] 
                for level in ["Low", "Moderate", "High"]]

# Create the boxplot
bp = plt.boxplot(boxplot_data, labels=["Low", "Moderate", "High"], patch_artist=True,
                 boxprops=dict(facecolor="lightblue", color="black"),
                 medianprops=dict(color="red"))

# Add median values as text
medians = [round(np.median(data), 1) for data in boxplot_data]
for i, median in enumerate(medians, 1):
    plt.text(i, median, str(median), ha="center", va="bottom", fontsize=10, color="red", fontweight="bold")

plt.xlabel("Exercise Level")
plt.ylabel("Glucose Level (mg/dL)")
plt.title("Box Plot: Glucose Level vs. Exercise Level (with Median Values)")
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.show()
