# Patient Data

In [None]:
import pandas as pd
import os
import json
import csv

# Define file paths
csv_file_path = '../data/Demographics.csv'
output_dir = './Output/jsons/'
output_file_path = os.path.join(output_dir, 'Demographics.json')

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Read the CSV file
df = pd.read_csv(csv_file_path)

# Convert the DataFrame to a JSON object
data_json = df.to_dict(orient='records')

# Write the JSON object to a file
with open(output_file_path, 'w') as json_file:
    json.dump(data_json, json_file, indent=4)

print(f"Data has been successfully written to {output_file_path}")

Data has been successfully written to ./Output/jsons/Demographics.json


# Glucose Data

In [13]:
# Define the input file and output directory
data_dir = "../data/"

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Initialize the dictionary to store the data
data_dict = {}

#traverse teh data directory to find the Dexcom CSV file
input_file_dir = {}
for root, dirs, files in os.walk(data_dir):
    for file in files:
        if file.startswith("Dexcom"):
            # get name of directory file is in
            dir_name = os.path.basename(root)

            input_file_dir[dir_name] = os.path.join(root, file)
            break  # Stop after finding the first Dexcom file in directory

# read each Dexcom file and append the data to the dictionary
for key, value in input_file_dir.items():
    # Load the full CSV
    df = pd.read_csv(value)

    # Filter for glucose events (Event Type == 'EGV')
    df_egv = df[df['Event Type'] == 'EGV'].copy()

    # Convert timestamp to datetime format
    df_egv['Timestamp (YYYY-MM-DDThh:mm:ss)'] = pd.to_datetime(df_egv['Timestamp (YYYY-MM-DDThh:mm:ss)'])

    # Rename columns for clarity
    df_egv.rename(columns={
        'Timestamp (YYYY-MM-DDThh:mm:ss)': 'timestamp',
        'Glucose Value (mg/dL)': 'glucose',
    }, inplace=True)

    # Keep only relevant columns
    df_egv = df_egv[['timestamp', 'glucose']].reset_index(drop=True)

    # Convert timestamp to ISO format
    df_egv['timestamp'] = df_egv['timestamp'].dt.strftime('%Y-%m-%dT%H:%M:%S')

    # Convert Datafrsame to dictionary
    data_dict[key] = df_egv.to_dict(orient='records')

# Write the data to a JSON file
output_file_path = os.path.join(output_dir, 'glucose.json')
with open(output_file_path, 'w') as json_file:
    json.dump(data_dict, json_file, indent=4)

# Prelim Food_logs

In [34]:
# Define the input file and output directory
data_dir = "../data/"

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Initialize the list to store the data
data_list = []

#traverse teh data directory to find the Dexcom CSV file
input_file_dir = {}
for root, dirs, files in os.walk(data_dir):
    for file in files:
        if file.startswith("Food_Log"):
            # get name of directory file is in
            dir_name = os.path.basename(root)

            input_file_dir[dir_name] = os.path.join(root, file)
            break  # Stop after finding the first Dexcom file in directory


# read each Dexcom file and append the data to the dictionary
for key, value in input_file_dir.items():

    if key == "003":
        # Skip the first file
        continue

    print(key)
    # Load the CSV file
    df_food_log = pd.read_csv(value)

    # Combine 'date' and 'time' into a single datetime column
    df_food_log['datetime'] = pd.to_datetime(df_food_log['date'] + ' ' + df_food_log['time'])

    # Reorder columns
    cols = ['datetime'] + [col for col in df_food_log.columns if col != 'datetime']
    df_food_log = df_food_log[cols]

    # Convert nutrition columns to numeric
    numeric_cols = ['calorie', 'total_carb', 'dietary_fiber', 'sugar', 'protein', 'total_fat']
    df_food_log[numeric_cols] = df_food_log[numeric_cols].apply(pd.to_numeric, errors='coerce')

    # add coloumn for patiendent ID
    df_food_log['ID'] = key

    #add empty columns for tags
    df_food_log['tags'] = ""


    # Convert timestamp to ISO format
    df_food_log['datetime'] = df_food_log['datetime'].dt.strftime('%Y-%m-%dT%H:%M:%S')

    # Convert Datafrsame to dictionary
    data_list.extend(df_food_log.to_dict(orient='records'))

#save the data to a JSON file
output_file_path = os.path.join(output_dir, 'food_log.json')
with open(output_file_path, 'w') as json_file:
    json.dump(data_list, json_file, indent=4)


001
002
004
005
006
007
008
009
010
011
012
013
014
015
016


In [None]:
import json
import os

output_dir = './Output/jsons/'

input_file_path = os.path.join(output_dir, 'food_log.json')
output_file_path = os.path.join(output_dir, 'food_log_tagged.json')

# Load your JSON data (replace with your file path or variable)
with open(input_file_path, 'r', encoding='utf-8') as f:
    data = json.load(f)

# Tag definitions
tag_keywords = {
    'meat': [
        'chicken', 'beef', 'pork', 'turkey', 'sausage', 'bacon', 'ham', 'rib', 'steak', 'chorizo', 'nugget', 'slider', 'wings', 'jerky',
        'pepperoni', 'lamb', 'salami', 'duck', 'lamb gyro', 'duck burrito', 'duck soup with dumpling'
    ],
    'seafood': [
        'anchovies', 'fish', 'salmon', 'shrimp', 'crab', 'tuna', 'seafood', 'sardines', 'squid', 'octopus', 'shellfish', 'lobster', 'scallops',
        'tilapia', 'sushi', 'california roll'
    ],
    'drink': [
        'coca cola', 'beer', 'lemonade', 'boost', 'water', 'soda', 'coffee', 'tea', 'juice', 'milk', 'smoothie', 'shake', 'gatorade', 'powerade',
        'chocolate milk', 'hot chocolate', 'chai', 'sweet tea', 'mello yello', 'mountain dew', 'frozen pop', 'premier protein', 'protein shake',
        'moscato', 'corona', 'mojito', 'chardonnay wine', 'diet coke', 'diet pepsi', 'coke', 'vodka', 'red wine', 'latte', 'decaf latte', 'skim decaf latte',
        'jim beam bourbon', 'gray goose', 'grey goose', 'pepsi', 'diet pepsi', 'diet dr pepper', 'dr pepper', 'sweet tea', 'sweet tea vodka',
        'sweet tea with lemonade', 'sweet tea with vodka', 'sweet tea with sweet tea vodka', 'sweet tea with sweet tea and lemonade', 'sweet tea with sweet tea and vodka',
        'whey protein', 'v8', 'v8 juice', 'v8 splash', 'v8 energy', 'v8 energy drink', 'v8 energy juice', 'v8 energy smoothie', 'v8 energy shake',
    ],
    'entree': [
        'sloppy joe', 'burritos', 'salad', 'wrap', 'bowl', 'sandwich', 'burger', 'pizza', 'mac and cheese', 'ziti', 'sub', 'taco', 'baked chicken',
        'chicken and rice', 'chicken breast', 'chicken thigh', 'chicken wing', 'chicken leg', 'chicken biscuit', 'chicken chorizo', 'chicken nuggets',
        'omelet', 'egg salad', 'baked potato', 'salisbury steak', 'pot pie', 'ravioli', 'taco salad', 'chipotle', 'deluxe cheeseburger macaroni',
        'cheeseburger', 'roast beef', 'baked cheetos', 'totinos pizza', 'lean cuisine', 'lasagna', 'vegetable lasagna', 'stuffing', 'brunswick stew', 'soup'
    ],
    'snack': [
        'fries', 'chips', 'chip', 'trail mix', 'popcorn', 'cookie', 'cookies', 'crackers', 'granola', 'bar', 'jerky', 'muffin', 'nutrigrain', 'fritos',
        'chex mix', 'tootsie', 'oreo', 'fig newton', 'fig bar', 'fruit bar', 'biscotti', 'biscott', 'snickers', 'baby ruth', 'ritz', 'cheetos', 'pistachios',
        'multigrain', 'blue bunny', 'fruit bars', 'string beans', 'grits', 'grapes', 'mandarin', 'tangerine', 'banana bread', 'salsa', 'hummus', 'peanut butter',
        'peppers', 'onions', 'spinach', 'asparagus', 'cabbage', 'rice', 'beans', 'black beans', 'green bean', 'green beans', 'squash', 'corn flakes', 'cornflakes',
        'frosted flakes', 'cereal', 'pita', 'pita bread', 'babybel', 'cheese stick', 'cheese', 'creamers', 'creamer', 'sugar', 'm&ms', 'fruit smoothie',
        'pb protein whey powder', 'blueberries', 'strawberries', 'lemon loaf', 'lemon risotto', 'apple', 'peach', 'orange', 'navel orange', 'townhouse cracker',
        'mini croissants', 'pecan twirl', 'donut', 'hershey kiss', 'pretzel rod', 'mixed nuts', 'cashew nut', 'walnut', 'pecans', 'almonds', 'toffee', 'lifesavers',
        'lifesaver', 'andes creme de menthe', 'sathers caramel creams', 'goetzes caramel cremes', 'peanut m & m', 'm&m\'s', 'sweetner', 'equal', 'stevia', 'honey',
        'raisins', 'gin soaked raisins', 'angel food candy', 'frosted flake', 'sweet potato', 'small sweet potato', 'baked sweet potato', 'thin mints', 'thin mint',
        'mozarrella sticks', 'grape', 'tatziki', 'candy', 'kashi', 'peanut', 'clementine', 'clementines', 'pear', 'grapefruit', 'grapefruits', 'kiwi',
        'kiwis', 'fruit', 'berries', 'berry', 'fruit salad', 'fruit cup', 'fruit cocktail', 'fruit snacks', 'fruit roll up', 'fruit leather', 'fruit smoothie',
    ],
    'dessert': [
        'cake', 'ice cream', 'oreo', 'fig newton', 'fig bar', 'fruit bar', 'biscotti', 'biscott', 'snickers', 'baby ruth', 'ritz', 'cheesecake', 'blue bunny',
        'chocolate', 'tootsie', 'm&ms', 'banana bread', 'lemon loaf', 'fudge', 'cookie', 'cookies', 'brownie', 'pancake', 'waffle', 'cinnamon raisin bagel',
        'muffin', 'pop', 'frozen pop', 'fruit bars', 'nutrigrain', 'granola bar', 'pecan twirl', 'donut', 'hershey kiss', 'toffee', 'angel food candy', 'syrup'
    ],
    'vegetarian': [
        'butter', 'cheese', 'egg', 'eggs', 'milk', 'yogurt', 'pita', 'pita bread', 'cereal', 'corn flakes', 'cornflakes', 'frosted flakes', 'granola', 'bar',
        'muffin', 'nutrigrain', 'fritos', 'chex mix', 'tootsie', 'oreo', 'fig newton', 'fig bar', 'fruit bar', 'biscotti', 'biscott', 'snickers', 'baby ruth',
        'ritz', 'cheetos', 'pistachios', 'multigrain', 'blue bunny', 'fruit bars', 'string beans', 'grits', 'grapes', 'mandarin', 'tangerine', 'banana bread',
        'salsa', 'hummus', 'peanut butter', 'peppers', 'onions', 'spinach', 'asparagus', 'cabbage', 'rice', 'beans', 'black beans', 'green bean', 'green beans',
        'squash', 'apple', 'peach', 'orange', 'navel orange', 'salad', 'waffle', 'pancake', 'cake', 'ice cream', 'chocolate', 'fudge', 'cookies', 'cookie',
        'brownie', 'lemon loaf', 'lemon risotto', 'avocado', 'flaxseed', 'psyllium husks', 'cinnamon', 'extra virgin olive oil', 'olive oil', 'balsamic vinegar',
        'lettuce mix', 'tomatoes', 'beets', 'carrots', 'cucumber', 'red bell pepper', 'brussel sprouts', 'almond nut thins', 'sweet potato', 'stuffing', 'oatmeal',
        'half and half', 'half and half', 'creamer', 'creamers', 'sugar', 'sweetner', 'equal', 'stevia', 'honey', 'raisins', 'gin soaked raisins','mozzarella',
        'corn', 'tortilla', 'bean',  'smart balance'
    ],
    'vegan': [
        'fries', 'onion', 'celery', 'bread', 'potatoes', 'peas', 'rice', 'beans', 'black beans', 'green bean', 'green beans', 'squash', 'apple', 'mandarin',
        'tangerine', 'peach', 'orange', 'navel orange', 'salad', 'asparagus', 'cabbage', 'spinach', 'peppers', 'onions', 'grapes', 'banana', 'trail mix',
        'popcorn', 'pistachios', 'multigrain', 'hummus', 'avocado', 'flaxseed', 'psyllium husks', 'cinnamon', 'extra virgin olive oil', 'olive oil', 'balsamic vinegar',
        'lettuce mix', 'tomatoes', 'beets', 'carrots', 'cucumber', 'red bell pepper', 'brussel sprouts', 'sweet potato', 'faro', 'raisins', 'potato', 'olives', 'pear',
        'grape', 'lemon', 'vegetable', 'vegetables', 'vegetable soup', 'vegetable broth', 'vegetable stir fry', 'vegetable curry', 'vegetable lasagna', 'vegetable fried rice',
        'peanut', 'clementine', 'clementines', 'kiwi', 'kiwis', 'fruit', 'berries', 'berry', 'fruit salad', 'fruit cup', 'fruit cocktail', 'fruit snacks',
    ],
    'breakfast': [
        'egg', 'eggs', 'omelet', 'cereal', 'corn flakes', 'cornflakes', 'frosted flakes', 'muffin', 'pancake', 'waffle', 'biscuit', 'bacon', 'sausage', 'grits',
        'breakfast', 'trail mix', 'yogurt', 'banana bread', 'granola bar', 'nutrigrain', 'toast', 'coffee', 'milk', 'juice', 'oatmeal', 'bagel', 'plain bagel',
        'std bfast','quaker', 'tater tot'
    ],
    'lunch': [
        'burritos', 'wrap', 'sub', 'sandwich', 'bowl', 'salad', 'chicken nuggets', 'chicken wrap', 'chicken salad', 'chicken and rice', 'chicken breast',
        'chicken thigh', 'chicken wing', 'chicken leg', 'chicken biscuit', 'chicken chorizo', 'omelet', 'egg salad', 'baked potato', 'salisbury steak', 'pot pie',
        'ravioli', 'taco salad', 'chipotle', 'deluxe cheeseburger macaroni', 'cheeseburger', 'roast beef', 'pizza', 'mac and cheese', 'ziti', 'baked cheetos',
        'totinos pizza', 'lean cuisine', 'brunswick stew', 'slaw', 'ranch'
    ],
    'dinner': [
        'steak', 'ribs', 'baked chicken', 'chicken and rice', 'chicken breast', 'chicken thigh', 'chicken wing', 'chicken leg', 'chicken biscuit', 'chicken chorizo',
        'chicken nuggets', 'omelet', 'egg salad', 'baked potato', 'salisbury steak', 'pot pie', 'ravioli', 'taco salad', 'chipotle', 'deluxe cheeseburger macaroni',
        'cheeseburger', 'roast beef', 'pizza', 'mac and cheese', 'ziti', 'baked cheetos', 'totinos pizza', 'lean cuisine', 'salmon', 'shrimp', 'seafood', 'lasagna',
        'vegetable lasagna', 'stuffing', 'brunswick stew', 'soup'
    ],
    'spicy': [
        'spicy', 'sriracha', 'jalapeno', 'buffalo', 'hot', 'kimchi', 'chili'
    ],
    'healthy': [
        'salad', 'spinach', 'asparagus', 'broccoli', 'kale', 'fruit', 'berries', 'berry', 'vegan', 'vegetarian', 'grilled', 'lactose free', 'skimmed', 'oatmeal',
        'avocado', 'flaxseed', 'psyllium husks', 'cinnamon', 'extra virgin olive oil', 'olive oil', 'balsamic vinegar', 'lettuce mix', 'tomatoes', 'beets',
        'carrots', 'cucumber', 'red bell pepper', 'brussel sprouts', 'sweet potato', 'faro', 'beet'
    ],
    'fast food': [
        'arby', 'mcdonald', 'wendy', 'burger king', 'subway', 'taco bell', 'chick-fil-a', 'jimmy dean', 'powerade', 'gatorade', 'fritos', 'cheetos', 'totinos',
        'lean cuisine', 'hamburger helper', 'red baron', 'outback steakhouse', 'trader joe', 'smoothie king', 'slim fast', 'ensure plus'
    ],
    'supplement': [
        'protein', 'whey', 'creatine', 'bcaa', 'amino', 'pre workout', 'post workout', 'casein', 'glutamine', 'l-carnitine', 'l-arginine', 'l-tyrosine',
        'mct oil', 'collagen', 'vitamin', 'mineral', 'fish oil', 'omega-3', 'probiotic', 'greens', 'fiber', 'electrolyte', 'meal replacement', 'oil'
    ],
    'medication': [
        'insulin', 'metformin', 'glipizide', 'glyburide', 'sitagliptin', 'linagliptin', 'canagliflozin', 'dapagliflozin', 'empagliflozin', 'liraglutide',
        'ibuprofen', 'aspirin', 'naproxen', 'acetaminophen', 'hydrochlorothiazide', 'lisinopril', 'atorvastatin', 'simvastatin', 'rosuvastatin', 'pravastatin'
    ]
}

def get_tags(logged_food, searched_food):
    tags = set()
    food_str = f"{logged_food or ''} {searched_food or ''}".lower()
    # Tag assignment
    for tag, keywords in tag_keywords.items():
        for kw in keywords:
            if kw in food_str:
                tags.add(tag)
                break
    # Special rules
    # If it's a drink, don't tag as entree/snack/dessert
    if 'drink' in tags:
        tags -= {'entree', 'snack', 'dessert'}
    # If it's a dessert, don't tag as snack
    if 'dessert' in tags:
        tags -= {'snack'}
    # If Vegetarian, remove vegan
    if 'vegetarian' in tags and 'vegan' in tags:
        tags.remove('vegan')
    # If it's vegan, it's also vegetarian
    if 'vegan' in tags:
        tags.add('vegetarian')
    # If it's meat, it's not vegan/vegetarian
    if 'meat' in tags:
        tags -= {'vegan', 'vegetarian'}
    # If it's seafood, it's not meat
    if 'seafood' in tags:
        tags -= {'meat'}
    # If it's breakfast, also consider as snack if not entree
    if 'breakfast' in tags and 'entree' not in tags:
        tags.add('snack')
    return sorted(tags)

# Process each entry
for entry in data:
    logged_food = entry.get('logged_food', '')
    searched_food = entry.get('searched_food', '')
    # Only process if there is a logged food
    if logged_food or searched_food:
        entry['tags'] = get_tags(logged_food, searched_food)
        if not entry['tags']:
            print(logged_food, searched_food)
    else:
        entry['tags'] = []
        print(f"missed logs {entry}")

# Save or print the updated data
with open(output_file_path, 'w', encoding='utf-8') as f:
    json.dump(data, f, indent=4)

Smart Balance Smart Balance


In [41]:
import pandas as pd
import json
import numpy as np

output_dir = './Output/jsons/'

input_file_path = os.path.join(output_dir, 'food_log_tagged.json')
output_file_path = os.path.join(output_dir, 'food_log_tagged_grouped.json')

# Load the data
with open(input_file_path, 'r', encoding='utf-8') as f:
    data = json.load(f)

# Remove empty dicts (some are at the end of your file)
data = [row for row in data if row and isinstance(row, dict)]

# Convert to DataFrame
df = pd.DataFrame(data)

# Drop unwanted columns
drop_cols = ['date', 'time', 'time_begin', 'time_end', 'amount', 'unit']
df = df.drop(columns=[col for col in drop_cols if col in df.columns])

# Fill NaN tags with empty list
df['tags'] = df['tags'].apply(lambda x: x if isinstance(x, list) else [])


def apply_functions(x: pd.DataFrame):
    return_series = pd.Series()

    # sum the columns calorie total_carb dietary_fiber sugar protein total_fat
    numeric_cols = ['calorie', 'total_carb', 'dietary_fiber', 'sugar', 'protein', 'total_fat']
    for col in numeric_cols:
        return_series[col] = x[col].sum()

    # replace NaN with "" in x['logged_food']
    x['logged_food'] = x['logged_food'].replace(np.nan, '', regex=True)
    x['searched_food'] = x['searched_food'].replace(np.nan, '', regex=True)

    # combine the food names
    return_series['logged_food'] = ' and '.join(x['logged_food'])
    return_series['searched_food'] = ' and '.join(x['searched_food'])

    # combine the tags
    return_series['tags'] = [item for sublist in x['tags'] for item in sublist]

    # remove duplicates from tags
    return_series['tags'] = list(set(return_series['tags']))

    return return_series

# Group by datetime and ID
grouped = df.groupby(['datetime', 'ID'], as_index=False).apply(apply_functions, include_groups=False)
grouped = grouped.reset_index(drop=True)

# Write to JSON
grouped.to_json(output_file_path, orient='records', indent=2)

In [16]:
import pandas as pd
import json
import numpy as np

output_dir = './Output/jsons/'

input_file_path = os.path.join(output_dir, 'food_log_tagged.json')
output_file_path = os.path.join(output_dir, 'food_log_tagged_grouped.json')

# Load the data
with open(input_file_path, 'r', encoding='utf-8') as f:
    data = json.load(f)

# Remove empty dicts (some are at the end of your file)
data = [row for row in data if row and isinstance(row, dict)]

# Convert to DataFrame
df = pd.DataFrame(data)
# Drop unwanted columns
drop_cols = ['date', 'time', 'time_begin', 'time_end', 'amount', 'unit']
df = df.drop(columns=[col for col in drop_cols if col in df.columns])

# Fill NaN tags with empty list
df['tags'] = df['tags'].apply(lambda x: x if isinstance(x, list) else [])

df

Unnamed: 0,datetime,logged_food,searched_food,calorie,total_carb,dietary_fiber,sugar,protein,total_fat,ID,tags
0,2020-02-13T18:00:00,Berry Smoothie,Strawberry Smoothie,456.0,85.0,1.7,83.0,16.0,3.3,001,"[drink, healthy, vegan, vegetarian]"
1,2020-02-13T20:30:00,Chicken Leg,chicken leg,475.0,0.0,0.0,0.0,62.0,23.0,001,"[dinner, entree, lunch, meat]"
2,2020-02-13T20:30:00,Asparagus,Asparagus,13.0,2.5,1.2,0.8,1.4,0.1,001,"[healthy, snack, vegetarian]"
3,2020-02-14T07:10:00,Natrel Lactose Free 2 Percent,(Natrel) Lactose Free 2% Partly Skimmed Milk,120.0,9.0,,8.0,12.0,,001,"[breakfast, drink, healthy, snack, vegetarian]"
4,2020-02-14T07:10:00,Standard Breakfast,"(Kellogg's) Frosted Flakes, Cereal",110.0,26.0,,10.0,1.0,,001,"[breakfast, snack, vegetarian]"
...,...,...,...,...,...,...,...,...,...,...,...
1359,2020-02-26T18:30:00,Lemonade,,99.0,26.0,0.0,25.0,0.2,0.1,016,"[drink, vegan, vegetarian]"
1360,2020-02-27T10:30:00,Standard breakfast,,280.0,56.5,1.0,24.0,8.0,2.5,016,"[breakfast, snack]"
1361,2020-02-27T11:30:00,Plain cheese pizza,,452.0,57.0,3.9,6.1,19.0,16.0,016,"[dinner, entree, lunch, snack, vegetarian]"
1362,2020-02-27T11:30:00,cooked black eyed peas,,198.0,35.0,11.0,5.6,13.0,0.9,016,"[vegan, vegetarian]"


In [14]:
type(df['logged_food'][0])


str

In [15]:
str(None)

'None'

In [32]:
# make sample dataframe to test pd.grouby.apply
df = pd.DataFrame({
    'datetime': ['2023-10-01T12:00:00', '2023-10-01T12:00:00', '2023-10-01T13:00:00'],
    'ID': [1, 1, 2],
    'logged_food': ['apple', 'banana', 'carrot'],
    'searched_food': ['apple pie', 'banana split', 'carrot cake'],
    'tags': [['fruit'], ['happy'], ['vegetable']],
    'calorie': [100, 300, 300],
    'total_carb': [20, 30, 40],
})

df.head()


Unnamed: 0,datetime,ID,logged_food,searched_food,tags,calorie,total_carb
0,2023-10-01T12:00:00,1,apple,apple pie,[fruit],100,20
1,2023-10-01T12:00:00,1,banana,banana split,[happy],300,30
2,2023-10-01T13:00:00,2,carrot,carrot cake,[vegetable],300,40


In [None]:
#make function to add numeric tags and append tags to one lisgt and combine food name using groupby.apply
def function_apply (x):
    return_series = pd.Series()

    # sum the columns calorie total_carb dietary_fiber sugar protein total_fat
    numeric_cols = ['calorie', 'total_carb']
    for col in numeric_cols:
        return_series[col] = x[col].sum()

    # combine the food names
    return_series['logged_food'] = ' and '.join(x['logged_food'])
    return_series['searched_food'] = ' and '.join(x['searched_food'])
    print([item for sublist in x['tags'] for item in sublist])

    # combine the tags
    return_series['tags'] = [item for sublist in x['tags'] for item in sublist]

    return return_series

# Group by datetime and ID
grouped = df.groupby(['datetime', 'ID'], as_index=False).apply(function_apply, include_groups=False)
grouped = grouped.reset_index(drop=True)
grouped.head()

['fruit', 'happy']
['vegetable']


Unnamed: 0,datetime,ID,calorie,total_carb,logged_food,searched_food,tags
0,2023-10-01T12:00:00,1,400,50,apple and banana,apple pie and banana split,"[fruit, happy]"
1,2023-10-01T13:00:00,2,300,40,carrot,carrot cake,[vegetable]
