In [1]:
import numpy as np
import pandas as pd

import sys
import os

# Get the parent directory path
parent_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)
    
from src.data_loaders import (
    load_data_iso,
)


In [2]:
patient_ids = [str(i) for i in range(102, 224)]
to_remove = [
    "103",
    "108",
    "110",
    "113",
    "115",
    "122",
    "126",
    "127",
    "128",
    "130",
    "134",
    "139",
    "143",
    "144",
    "146",
    "149",
    "152",
    "155",
    "156",
    "164",
    "174",
    "175",
    "176",
    "177",
    "180",
    "185",
    "186",
    "190",
    "193",
    "194",
    "197",
    "203",
    "206",
    "211",
    "212",
    "220",
]
patient_ids = list(set(patient_ids) - set(to_remove))
all_data_dict = {}

for patient_id in patient_ids:
    all_data = load_data_iso(patient_id)
    all_data_dict[patient_id] = all_data

# patient_ids_cap = ["620"]
# patient_ids_cap1 =[
#     "627",
#     "639",
#     "652",
#     "675",
#     "CGM_007",
#     "CGM_008",
#     "CGM_009",
#     "CGM_011",
#     "CGM_013",
#     "CGM_014",
#     "CGM_017",
#     "CGM_018",
#     "CGM_020",
#     "CGM_021",
#     "CGM_022",
#     "CGM_023",
#     "CGM_024",
#     "CGM_025",
# ]
# for patient_id in patient_ids_cap:
#     all_data = load_data_cap(patient_id)
#     all_data_dict[patient_id] = all_data

# for patient_id in patient_ids_cap1:
#     all_data = load_data_cap1(patient_id)
#     all_data_dict[patient_id] = all_data
    
# patient_ids = patient_ids + patient_ids_cap + patient_ids_cap1

In [3]:
len(patient_ids)

86

In [4]:
# Output the results
for patient_id, df in all_data_dict.items():
    df['Tid'] = df['Tid'].dt.strftime('%d/%m/%Y %H:%M')
    all_data_dict[patient_id] = df

In [7]:
import numpy as np
import pandas as pd

lower_threshold = 3.9  # mmol/L (lower bound for TIR)
upper_threshold = 10.0  # mmol/L (upper bound for TIR)

# Placeholder for glucose statistics
stats = {}

for patient_id, df in all_data_dict.items():
    # Extract glucose values (cbg column)
    glucose_values = df['cbg']
    
    # Calculate mean and standard deviation
    mean_glucose = np.nanmean(glucose_values)
    std_glucose = np.nanstd(glucose_values)
    
    # Calculate time in range, below range, and above range
    total_count = len(glucose_values)
    time_in_range = np.sum((glucose_values >= lower_threshold) & (glucose_values <= upper_threshold)) / total_count * 100
    time_below_range = np.sum(glucose_values < lower_threshold) / total_count * 100
    time_above_range = np.sum(glucose_values > upper_threshold) / total_count * 100
    
    # Store statistics
    stats[patient_id] = {
        "Mean": mean_glucose,
        "SD": std_glucose,
        "TIR (%)": time_in_range,
        "TBR (%)": time_below_range,
        "TAR (%)": time_above_range,
    }

# Convert stats dictionary to DataFrame and display
stats_df = pd.DataFrame(stats).T
# Calculate summary statistics (mean and std of all metrics)
summary_stats = stats_df.mean(axis=0).to_dict()
summary_stats_std = stats_df.std(axis=0).to_dict()

# Combine mean ± std for the summary row
summary_row = {
    metric: f"{summary_stats[metric]:.2f} ± {summary_stats_std[metric]:.2f}" 
    for metric in stats_df.columns
}
summary_row["Patient ID"] = "Overall"  # Add label for the summary row

# Append summary row to DataFrame
summary_df = pd.DataFrame([summary_row]).set_index("Patient ID")
stats_df = pd.concat([stats_df, summary_df])

# Display the final DataFrame
display(stats_df)


Unnamed: 0,Mean,SD,TIR (%),TBR (%),TAR (%)
131,10.722948,1.713936,31.564626,0.0,68.435374
222,9.18018,3.175197,62.882883,2.162162,34.954955
183,5.955875,1.848268,89.817232,6.527415,3.655352
147,10.940777,1.408511,30.420712,0.0,69.579288
181,8.641943,1.36105,85.430464,0.0,14.569536
...,...,...,...,...,...
223,9.543873,3.921762,66.567164,0.0,33.313433
172,7.54694,1.475381,92.060606,0.0,6.969697
168,9.417842,2.9926,62.742283,0.0,37.042355
133,8.668757,1.62548,84.15139,0.0,15.789474


In [8]:
# Function to check intervals and count days if consistent
def check_intervals_and_count_days(data_dict):
    results = {}
    for patient_id, df in data_dict.items():
        df_copy = df.copy()
        # Convert Tid column to datetime
        df_copy['Tid'] = pd.to_datetime(df_copy['Tid'], format='%d/%m/%Y %H:%M')
        
        # Calculate time differences in minutes
        time_diffs = df_copy['Tid'].diff().dropna().dt.total_seconds() / 60
        
        # Check if all intervals are 5 minutes
        consistent = (time_diffs <= 15).all()
        
        if consistent:
            # Calculate the number of days between the first and last timestamp
            total_days = (df_copy['Tid'].max() - df_copy['Tid'].min()).total_seconds() / (24 * 3600)
            results[patient_id] = total_days
        else:
            results[patient_id] = None  # Inconsistent intervals
    return results

# Check intervals for each patient
interval_and_days_results = check_intervals_and_count_days(all_data_dict)

# Output results
print(interval_and_days_results)

{'131': 7.653472222222222, '222': 1.9236111111111112, '183': 1.3263888888888888, '147': 1.0694444444444444, '181': 1.5694444444444444, '198': 7.816666666666666, '121': 9.51388888888889, '217': 3.892361111111111, '208': 9.597222222222221, '173': 6.569444444444445, '160': 4.194444444444445, '219': 5.402777777777778, '201': 6.659722222222222, '199': 2.3993055555555554, '140': 1.7534722222222223, '179': 2.7020833333333334, '221': 2.0416666666666665, '123': 4.584027777777778, '129': 4.802083333333333, '116': 0.61875, '106': 5.520833333333333, '150': 4.847222222222222, '207': 1.9861111111111112, '105': 4.670833333333333, '119': 1.65625, '169': 1.0486111111111112, '215': 2.3506944444444446, '135': 3.0416666666666665, '205': 6.659722222222222, '196': 1.0659722222222223, '151': 3.7465277777777777, '166': 1.6111111111111112, '107': 1.0451388888888888, '153': 3.8541666666666665, '163': 2.482638888888889, '191': 1.7569444444444444, '170': 1.9513888888888888, '154': 4.663194444444445, '210': 1.6527

In [9]:
# Extract the values
values = list(interval_and_days_results.values())

# Calculate the mean
mean_value = np.mean(values)

# Calculate the standard deviation
std_value = np.std(values)

print(f"Mean: {mean_value}")
print(f"Standard Deviation: {std_value}")

Mean: 4.00828488372093
Standard Deviation: 2.3751127106518686


In [10]:
# Extract all patient IDs with None as their result
inconsistent_patients = [patient_id for patient_id, days in interval_and_days_results.items() if days is None]

# Output the inconsistent patient IDs
print(len(inconsistent_patients))


0


In [11]:
# Function to find rows where the interval is more than 5 minutes
def find_intervals_more_than_15(data_dict):
    results = {}
    for patient_id, df in data_dict.items():
        # Convert Tid column to datetime
        df_copy = df.copy()
        df_copy['Tid'] = pd.to_datetime(df_copy['Tid'], format='%d/%m/%Y %H:%M')
        
        # Calculate time differences in minutes and align the index
        time_diffs = df_copy['Tid'].diff().dt.total_seconds() / 60
        
        # Find rows where interval is more than 5 minutes
        more_than_15_minutes = df_copy[time_diffs > 15]
        # print("more_than_5_minutes", more_than_5_minutes)
        
        # Store results, if any row has more than 5 minutes interval
        if not more_than_15_minutes.empty:
            results[patient_id] = more_than_15_minutes
        else:
            results[patient_id] = None  # No intervals more than 5 minutes
    return results

# Check for intervals more than 5 minutes for each patient
intervals_more_than_15_results = find_intervals_more_than_15(all_data_dict)

# Output results
for patient_id, df in intervals_more_than_15_results.items():
    if df is not None:
        print(f"Patient {patient_id} has intervals more than 15 minutes at the following timestamps:")
        print(df[['Tid', 'minutes_elapsed']])