In [2]:
import pandas as pd
import numpy as np
from pathlib import Path

In [3]:
def load_and_clean_dexcom(file_path):
    try:
        dexcom_data = pd.read_csv(file_path)
        
        # Drop initial metadata rows by detecting where actual glucose data starts
        dexcom_data_cleaned = dexcom_data[pd.to_numeric(dexcom_data["Glucose Value (mg/dL)"], errors='coerce').notna()]
        
        # Convert timestamp column to datetime format
        dexcom_data_cleaned['Timestamp (YYYY-MM-DDThh:mm:ss)'] = pd.to_datetime(
            dexcom_data_cleaned['Timestamp (YYYY-MM-DDThh:mm:ss)'], 
            errors='coerce'
        )
        
        # Drop any remaining NaN values in timestamp
        dexcom_data_cleaned = dexcom_data_cleaned.dropna(subset=['Timestamp (YYYY-MM-DDThh:mm:ss)'])
        
        # Keep only necessary columns
        dexcom_data_cleaned = dexcom_data_cleaned[['Timestamp (YYYY-MM-DDThh:mm:ss)', 'Glucose Value (mg/dL)']]
        
        return dexcom_data_cleaned
    except Exception as e:
        print(f"Error processing {file_path}: {str(e)}")
        return None


In [4]:
def calculate_glucose_change(meal_time, glucose_data):
    try:
        # Subset glucose data to 30 minutes before and 3 hours after the meal
        start_time = meal_time - pd.Timedelta(minutes=30)
        end_time = meal_time + pd.Timedelta(hours=3)
        
        # Filter glucose data within the timeframe
        glucose_subset = glucose_data[
            (glucose_data['Timestamp (YYYY-MM-DDThh:mm:ss)'] >= start_time) &
            (glucose_data['Timestamp (YYYY-MM-DDThh:mm:ss)'] <= end_time)
        ]
        
        if glucose_subset.empty:
            return None
        
        # Find the closest glucose value to meal time
        glucose_subset['time_diff'] = (glucose_subset['Timestamp (YYYY-MM-DDThh:mm:ss)'] - meal_time).abs()
        baseline_glucose = glucose_subset.loc[glucose_subset['time_diff'].idxmin(), 'Glucose Value (mg/dL)']
        
        # Calculate glucose change at specific time intervals
        glucose_changes = {}
        for time_interval in [30, 60, 90, 120]:
            target_time = meal_time + pd.Timedelta(minutes=time_interval)
            glucose_subset['time_diff'] = (glucose_subset['Timestamp (YYYY-MM-DDThh:mm:ss)'] - target_time).abs()
            closest_reading = glucose_subset.loc[glucose_subset['time_diff'].idxmin(), 'Glucose Value (mg/dL)']
            glucose_changes[time_interval] = closest_reading - baseline_glucose
            
        return glucose_changes
    except Exception as e:
        print(f"Error calculating glucose change for meal at {meal_time}: {str(e)}")
        return None


In [5]:
def process_patient_data(patient_number):
    try:
        # Construct file paths
        dexcom_file = f"raw-data/Dexcom_{patient_number:03d}.csv"
        food_log_file = f"raw-data/Food_Log_{patient_number:03d}.csv"
        
        # Load and clean data
        dexcom_data = load_and_clean_dexcom(dexcom_file)
        food_log = pd.read_csv(food_log_file)
        
        if dexcom_data is None or food_log.empty:
            return None
        
        # Ensure time_begin is in datetime format
        food_log['time_begin'] = pd.to_datetime(food_log['time_begin'], errors='coerce')
        
        # Calculate glucose changes for each meal
        results = []
        for idx, meal in food_log.iterrows():
            meal_time = meal['time_begin']
            glucose_changes = calculate_glucose_change(meal_time, dexcom_data)
            
            if glucose_changes:
                results.append({
                    'Patient_ID': f"P{patient_number:03d}",
                    'Meal_Time': meal_time,
                    'Meal_Name': meal['logged_food'],
                    'Total_Calories': meal.get('calorie', np.nan), 
                    'Total_Carbs': meal.get('total_carb', np.nan),  
                    'Total_Sugar': meal.get('sugar', np.nan),
                    'Total_Protein': meal.get('protein', np.nan),
                    '30_min_change': glucose_changes.get(30, np.nan),
                    '60_min_change': glucose_changes.get(60, np.nan),
                    '90_min_change': glucose_changes.get(90, np.nan),
                    '120_min_change': glucose_changes.get(120, np.nan)
                })
        
        return pd.DataFrame(results)
    except Exception as e:
        print(f"Error processing patient {patient_number}: {str(e)}")
        return None


In [6]:
def main():
    # Process data for all patients (1-16)
    all_results = []
    for patient_num in range(1, 17):
        print(f"Processing patient {patient_num}...")
        patient_results = process_patient_data(patient_num)
        if patient_results is not None:
            all_results.append(patient_results)
    
    # Combine all results
    if all_results:
        combined_results = pd.concat(all_results, ignore_index=True)
        # Sort by patient ID and meal time
        combined_results = combined_results.sort_values(['Patient_ID', 'Meal_Time'])
        # Save to CSV
        combined_results.to_csv("glucose_changes.csv", index=False)
        print(f"Successfully processed {len(all_results)} patients' data")
        print(f"Total meals analyzed: {len(combined_results)}")
    else:
        print("No valid results to process")

In [7]:
main()

Processing patient 1...
Processing patient 2...
Processing patient 3...
Error processing patient 3: 'time_begin'
Processing patient 4...
Processing patient 5...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  glucose_subset['time_diff'] = (glucose_subset['Timestamp (YYYY-MM-DDThh:mm:ss)'] - meal_time).abs()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  glucose_subset['time_diff'] = (glucose_subset['Timestamp (YYYY-MM-DDThh:mm:ss)'] - target_time).abs()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  glu

Processing patient 6...
Processing patient 7...
Processing patient 8...
Processing patient 9...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  glucose_subset['time_diff'] = (glucose_subset['Timestamp (YYYY-MM-DDThh:mm:ss)'] - meal_time).abs()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  glucose_subset['time_diff'] = (glucose_subset['Timestamp (YYYY-MM-DDThh:mm:ss)'] - target_time).abs()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  glu

Processing patient 10...
Processing patient 11...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  glucose_subset['time_diff'] = (glucose_subset['Timestamp (YYYY-MM-DDThh:mm:ss)'] - meal_time).abs()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  glucose_subset['time_diff'] = (glucose_subset['Timestamp (YYYY-MM-DDThh:mm:ss)'] - target_time).abs()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  glu

Processing patient 12...
Processing patient 13...
Processing patient 14...
Processing patient 15...
Processing patient 16...
Successfully processed 15 patients' data
Total meals analyzed: 1044


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dexcom_data_cleaned['Timestamp (YYYY-MM-DDThh:mm:ss)'] = pd.to_datetime(
