## Food LLM annotation

In [6]:
import pandas as pd
import os
import base64
import re
import numpy as np
import ollama

# Configure pandas to avoid dtype warnings
pd.options.mode.chained_assignment = None  # default='warn'


In [7]:
#picture 008 in patient 004, 008, 009, 010, 016 removed. Double
#picture 021 in patient 001 removed. Double
#picture 003 in patient 008 remoced.

In [8]:
macronutrients_instruction = '''Examine the provided meal image to analyze and estimate its nutritional content accurately. Focus on determining the amounts of simple sugars (like industrial sugar and honey), 
complex sugars (such as starch and whole grains), proteins, fats, and dietary fibers (found in fruits and vegetables), all in grams. Also estimate the total weight of the meal in grams.
To assist in accurately gauging the scale of the meal, a 1 Swiss Franc coin, which has a diameter of 23.22 mm, may be present in the picture. 
Use the size of this coin as a reference to estimate the size of the meal and the amounts of the nutrients more precisely. 
Provide your assessment of each nutritional component in grams. All estimates should be given as a single whole number. If there is no coin in the picture or the meal is covered partially, estimate anyways.
Format your response as follows:
- Simple sugars (g): 
- Complex sugars (g): 
- Proteins (g): 
- Fats (g): 
- Dietary fibers (g): 
- Weight (g): 
- Explanation: 

Example response:
Simple sugars (g): 40
Complex sugars (g): 60
Proteins (g): 25
Fats (g): 30
Dietary fibers (g): 5 
Weight (g): 750
Explanation: The pizza and cola meal, with its refined crust and toppings, is rich in carbs, fats, and proteins. The cola boosts the meal's simple sugars. 
The 1 Swiss Franc coin helps estimate the pizza at 30 cm diameter and the cola at 330 ml, indicating a significant blood sugar impact.'''

In [9]:
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')
  
def parse_nutritional_info(text):
    pattern = r'(Simple sugars \(g\)|Complex sugars \(g\)|Proteins \(g\)|Fats \(g\)|Dietary fibers \(g\)|Weight \(g\)):\s*(\d+)'
    matches = re.findall(pattern, text)
    nutritional_info = {match[0]: int(match[1]) for match in matches}
    simple_sugars = nutritional_info.get('Simple sugars (g)', 0)
    complex_sugars = nutritional_info.get('Complex sugars (g)', 0)
    proteins = nutritional_info.get('Proteins (g)', 0)
    fats = nutritional_info.get('Fats (g)', 0)
    dietary_fibers = nutritional_info.get('Dietary fibers (g)', 0)
    weight = nutritional_info.get('Weight (g)', 0)
    return simple_sugars, complex_sugars, proteins, fats, dietary_fibers, weight

In [13]:
if not os.path.exists('food_data/llama'):
    os.makedirs('food_data/llama')
patients = ['001', '002', '004', '006', '007', '008']

# Loop through each patient
for patient in patients:
    print(f"Processing patient {patient}")
    food_data = pd.read_csv(f'diabetes_subset_pictures-glucose-food-insulin/{patient}/food.csv')
    food_data = food_data[['picture', 'datetime']]
    # Initialize nutrient columns
    food_data[['simple_sugars', 'complex_sugars', 'proteins', 'fats', 'dietary_fibers', 'weight']] = 0.0
    # Initialize message column
    food_data['message'] = ''
    
    all_successful = True  # Flag to track if all images were annotated successfully
    
    for i, row in food_data.iterrows():
        image_path = f"diabetes_subset_pictures-glucose-food-insulin/{patient}/food_pictures/{row['picture']}"
        if not os.path.exists(image_path):
            print(f"Image {row['picture']} missing for patient {patient}")
            all_successful = False
            break  # Stop processing this patient if an image is missing
        
        success = False
        while not success:
            base64_image = encode_image(image_path)
            res = ollama.chat(
                model="llama3.2-vision",
                messages=[
                    {
                        'role': 'user',
                        'content': macronutrients_instruction,
                        'images': [image_path]
                    }
                ]
            )
            message = res['message']['content']
            try:
                parsed_info = parse_nutritional_info(message)
                if np.sum(parsed_info) > 0:
                    food_data.loc[i, ['simple_sugars', 'complex_sugars', 'proteins', 'fats', 'dietary_fibers', 'weight']] = parsed_info
                    food_data.loc[i, 'message'] = message
                    success = True
                else:
                    print(f"Picture {row['picture']} for patient {patient} could not be annotated - no nutritional values found. Retrying...")
            except Exception as e:
                print(f"Error processing picture {row['picture']} for patient {patient}: {e}. Retrying...")
        
    if all_successful:
        # Save the dataframe if all images were annotated successfully
        food_data.to_csv(f'food_data/llama/{patient}.csv', index=False)
        print(f"Patient {patient}: Data saved successfully.")
    else:
        print(f"Patient {patient}: Not all images were annotated successfully. Skipping saving data.")

Processing patient 001
Picture 002.jpg for patient 001 could not be annotated - no nutritional values found. Retrying...
Picture 002.jpg for patient 001 could not be annotated - no nutritional values found. Retrying...
Picture 002.jpg for patient 001 could not be annotated - no nutritional values found. Retrying...
Picture 008.jpg for patient 001 could not be annotated - no nutritional values found. Retrying...
Picture 008.jpg for patient 001 could not be annotated - no nutritional values found. Retrying...
Picture 009.jpg for patient 001 could not be annotated - no nutritional values found. Retrying...
Picture 009.jpg for patient 001 could not be annotated - no nutritional values found. Retrying...
Picture 010.jpg for patient 001 could not be annotated - no nutritional values found. Retrying...
Picture 010.jpg for patient 001 could not be annotated - no nutritional values found. Retrying...
Picture 010.jpg for patient 001 could not be annotated - no nutritional values found. Retrying.