# Patient Data

In [None]:
import pandas as pd
import os
import json
import csv

# Define file paths
csv_file_path = '../data/Demographics.csv'
output_dir = './Output/jsons/'
output_file_path = os.path.join(output_dir, 'Demographics.json')

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Read the CSV file
df = pd.read_csv(csv_file_path)

# Convert the DataFrame to a JSON object
data_json = df.to_dict(orient='records')

# Write the JSON object to a file
with open(output_file_path, 'w') as json_file:
    json.dump(data_json, json_file, indent=4)

print(f"Data has been successfully written to {output_file_path}")

Data has been successfully written to ./Output/jsons/Demographics.json


# Glucose Data

In [13]:
# Define the input file and output directory
data_dir = "../data/"

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Initialize the dictionary to store the data
data_dict = {}

#traverse teh data directory to find the Dexcom CSV file
input_file_dir = {}
for root, dirs, files in os.walk(data_dir):
    for file in files:
        if file.startswith("Dexcom"):
            # get name of directory file is in
            dir_name = os.path.basename(root)

            input_file_dir[dir_name] = os.path.join(root, file)
            break  # Stop after finding the first Dexcom file in directory

# read each Dexcom file and append the data to the dictionary
for key, value in input_file_dir.items():
    # Load the full CSV
    df = pd.read_csv(value)

    # Filter for glucose events (Event Type == 'EGV')
    df_egv = df[df['Event Type'] == 'EGV'].copy()

    # Convert timestamp to datetime format
    df_egv['Timestamp (YYYY-MM-DDThh:mm:ss)'] = pd.to_datetime(df_egv['Timestamp (YYYY-MM-DDThh:mm:ss)'])

    # Rename columns for clarity
    df_egv.rename(columns={
        'Timestamp (YYYY-MM-DDThh:mm:ss)': 'timestamp',
        'Glucose Value (mg/dL)': 'glucose',
    }, inplace=True)

    # Keep only relevant columns
    df_egv = df_egv[['timestamp', 'glucose']].reset_index(drop=True)

    # Convert timestamp to ISO format
    df_egv['timestamp'] = df_egv['timestamp'].dt.strftime('%Y-%m-%dT%H:%M:%S')

    # Convert Datafrsame to dictionary
    data_dict[key] = df_egv.to_dict(orient='records')

# Write the data to a JSON file
output_file_path = os.path.join(output_dir, 'glucose.json')
with open(output_file_path, 'w') as json_file:
    json.dump(data_dict, json_file, indent=4)

# Prelim Food_logs

In [34]:
# Define the input file and output directory
data_dir = "../data/"

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Initialize the list to store the data
data_list = []

#traverse teh data directory to find the Dexcom CSV file
input_file_dir = {}
for root, dirs, files in os.walk(data_dir):
    for file in files:
        if file.startswith("Food_Log"):
            # get name of directory file is in
            dir_name = os.path.basename(root)

            input_file_dir[dir_name] = os.path.join(root, file)
            break  # Stop after finding the first Dexcom file in directory


# read each Dexcom file and append the data to the dictionary
for key, value in input_file_dir.items():

    if key == "003":
        # Skip the first file
        continue

    print(key)
    # Load the CSV file
    df_food_log = pd.read_csv(value)

    # Combine 'date' and 'time' into a single datetime column
    df_food_log['datetime'] = pd.to_datetime(df_food_log['date'] + ' ' + df_food_log['time'])

    # Reorder columns
    cols = ['datetime'] + [col for col in df_food_log.columns if col != 'datetime']
    df_food_log = df_food_log[cols]

    # Convert nutrition columns to numeric
    numeric_cols = ['calorie', 'total_carb', 'dietary_fiber', 'sugar', 'protein', 'total_fat']
    df_food_log[numeric_cols] = df_food_log[numeric_cols].apply(pd.to_numeric, errors='coerce')

    # add coloumn for patiendent ID
    df_food_log['ID'] = key

    #add empty columns for tags
    df_food_log['tags'] = ""


    # Convert timestamp to ISO format
    df_food_log['datetime'] = df_food_log['datetime'].dt.strftime('%Y-%m-%dT%H:%M:%S')

    # Convert Datafrsame to dictionary
    data_list.extend(df_food_log.to_dict(orient='records'))

#save the data to a JSON file
output_file_path = os.path.join(output_dir, 'food_log.json')
with open(output_file_path, 'w') as json_file:
    json.dump(data_list, json_file, indent=4)


001
002
004
005
006
007
008
009
010
011
012
013
014
015
016


{'001': '../data/001\\Food_Log_001.csv',
 '002': '../data/002\\Food_Log_002.csv',
 '003': '../data/003\\Food_Log_003.csv',
 '004': '../data/004\\Food_Log_004.csv',
 '005': '../data/005\\Food_Log_005.csv',
 '006': '../data/006\\Food_Log_006.csv',
 '007': '../data/007\\Food_Log_007.csv',
 '008': '../data/008\\Food_Log_008.csv',
 '009': '../data/009\\Food_Log_009.csv',
 '010': '../data/010\\Food_Log_010.csv',
 '011': '../data/011\\Food_Log_011.csv',
 '012': '../data/012\\Food_Log_012.csv',
 '013': '../data/013\\Food_Log_013.csv',
 '014': '../data/014\\Food_Log_014.csv',
 '015': '../data/015\\Food_Log_015.csv',
 '016': '../data/016\\Food_Log_016.csv'}