In [2]:
import os
import pandas as pd
from PIL import Image
import re
import json
import google.generativeai as genai


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def get_date_taken_exif(path):
    exif = Image.open(path).getexif()
    if not exif:
        raise Exception(f"Image {path} doesn't have EXIF data ")

def get_date_taken_name(path):
    dt = re.search(r"(\d{8})", path)
    if dt: # Check if a match was found
        extracted_date = dt.group(1)  # Get the captured group (the digits)
        return extracted_date

    else: return None
    print(extracted_date)

def img_details(filepath):
    img = Image.open(filepath)
    # width, height= img.size
    img_nm = filepath
    img_size = os.path.getsize(filepath)
    img_dt =get_date_taken_name(filepath)
    return img_nm, img_size, img_dt

# get_date_taken_name('Food/image(30).jpg')

In [None]:
# Load the Gemini Pro Vision model
model = genai.GenerativeModel('gemini-1.5-flash')

# Access the API key from the environment variable
def configure_api_key():
    """Prompts the user for the Gemini API key and configures it."""
    api_key = input("Please enter your Gemini API key: ")

    if api_key:
        os.environ["GEMINI_API_KEY"] = api_key  # Set the environment variable
        genai.configure(api_key=api_key)
        print("Gemini API key configured for this session.")
        return True
    else:
        print("Error: No API key provided.")
        return False

# Call the configuration function
if configure_api_key():
    print("Success")
else:
    exit()

Error: No API key provided.


: 

In [4]:
def analyze_food_image(image_path,model):
    """
    Analyzes a food image using the Gemini Pro Vision model and generates features
    based on a detailed prompt.

    Args:
        image_path (str): The path to the image file.

    Returns:
        str: The generated text response from the Gemini model.
    """
    try:
        img_nm, img_size, img_dt=img_details(image_path)
        img = Image.open(image_path)

        prompt = f"""
        You are a food analyst and you have collected a bunch of pictures on which you want to collect food intake patterns for:
        Understand Dietary Patterns and Habits: Identify and analyze dietary patterns, habits, and trends from the food image data to gain insights into eating behaviors
        Explore Factors Influencing Food Choices: Investigate factors influencing food choices, such as cuisine type, meal location, mood, and dietary restrictions
        Explore Dietary Diversity
        Investigate the Impact of Food Choices on Mood

        Goal: You want to create a dataframe for all images with each row for one image and corresponding features as columns in the dataframe with columns e.g. image_name, image_date(use it from image name or null), image_size, features e.g. Cuisine, salt, sugar etc.

        Analyze the following food image and generate the following features, adhering strictly to the specified possible values:

        *   Cuisine: (Description: The likely cuisine or culinary style of the dish. Possible Values: Thai, Indian, Italian, Mexican, American, Fusion, Combination, Unknown)
        *   Happiness_Level (1-5): (Description: A subjective rating of how appealing or satisfying the meal appears. Possible Values: Integer scale from 1 (lowest) to 5 (highest))
        *   Meal_Course: (Description: The part of the meal represented by the dish. Possible Values: Starter, Main Course, Side Dish, Dessert, Snack, Drink, Unknown)
        *   Sugar (High/Medium/Low): (Description: A relative indication of the sugar content of the meal. Possible Values: High, Medium, Low, Unknown). If it is a curry then sugar is low.
        *   Salt (High/Medium/Low): (Description: A relative indication of the salt content of the meal. Possible Values: High, Medium, Low, Unknown). If it is a dessert then salt is low, if its curry then medium
        *   Healthy: (Description: A subjective assessment of the overall healthfulness of the meal. Possible Values: Yes, No, Unknown, healthy, unhealthy, balanced, carb-heavy, protein-rich)
        *   Processing_level: (Description: Subjective assessment of how processed the food is. Possible Values: Unprocessed, Minimally Processed, Processed, Highly Processed)
        *   Preparation_Method: (Description: How the food was prepared. Possible Values: Fried, Baked, Grilled, Steamed, Raw)
        *   Dominant_Color: (Description: The most prominent color in the dish. Possible Values: Red, Green, Yellow, Brown, White, etc.)
        *   Food_Diversity: (Description: A measure of how many different types of food are present in the meal. Possible Values: High, Medium, Low)

        Provide your response in a clear, concise format, listing each feature and its value in a dictionary format with key columns as [name:{img_nm}, size:{img_size}, date:{img_dt},Cuisine,Happiness_Level,Meal_Course,Sugar, Healthy, Processing_level,Preparation_Method,  Dominant_Color, Food_Diversity] 
        """

        response = model.generate_content([prompt, img])
        # return response.text
        response_text = response.text
        # Extract the dictionary string using regex
        match = re.search(r'\{(.+)\}', response_text, re.DOTALL)

        if match:
          dict_string = match.group(0)
          # Clean the dictionary string
          # 1. Replace keys with double quotes
          cleaned_dict_string = re.sub(r"'(\w+)':", r'"\1":', dict_string)
          # 2. Replace values with double quotes if are string
          cleaned_dict_string = re.sub(r":\s*'([^']*)'", r':"\1"', cleaned_dict_string)
          # 3. Replace None to null
          cleaned_dict_string = cleaned_dict_string.replace("None", "null")
          # 4. Convert date to string before loading
          cleaned_dict_string = re.sub(r'"date":\s*(\d+)', r'"date": "\1"', cleaned_dict_string)
          # 5. Remove comments
          cleaned_dict_string = re.sub(r'\s*#.*', '', cleaned_dict_string)
          try:
            data_dict = json.loads(cleaned_dict_string)
            data_dict["name"]=img_nm
            data_dict["size"]=img_size
            data_dict["date"]=img_dt
            return data_dict
          except json.JSONDecodeError as e:
            print(f"Error decoding JSON: {e} in:\n{cleaned_dict_string}")
            return None
        else:
            print(f"Error: Could not extract dictionary from: {response_text}")
            return None
    except Exception as e:
        print(f"An unexpected error ocurred: {e}")
        return None

#### LLM API calls on image folder

In [19]:
def img_llm_gen(path):
    for filename in os.listdir(path):
        img_data =[]
        if filename.endswith(('.png','.jpg','.jpeg')):
            filepath = os.path.join(path,filename )
            analysis_result = analyze_food_image(filepath)
            img_data.append(analysis_result)
    return img_data


def analyze_all_images_in_folder(folder_path, model):
    """ Analyzes all images in a folder and returns a list of dictionaries."""
    all_results = []
    if not os.path.exists(folder_path):
      print(f"Error: {folder_path} does not exist.")
      return None
    if not os.path.isdir(folder_path):
      print(f"Error: {folder_path} is not a directory.")
      return None
    image_files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f)) and f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    

    if not image_files:
        print(f"No image files found in {folder_path}")
        return None

    for image_file in image_files:
        image_path = os.path.join(folder_path, image_file)
        print(f"Processing: {image_path}")
        result = analyze_food_image(image_path, model)
        if result:
            all_results.append(result)
        else:
            print(f"Skipping {image_path} due to error.")
    return all_results

def write_to_file(img_data, filenm):
    """ Writes a list of dictionaries to a file, one dictionary per line, in JSON format."""
    try:
        with open(filenm, "w", encoding="utf-8") as f:
            for data_dict in img_data:
                data_dict_to_dump = {k: 'null' if v is None else v for k, v in data_dict.items()}
                json.dump(data_dict, f)  # Write the dictionary as JSON
                f.write('\n')  # Add a newline after each dictionary
        print(f"Results saved to {filenm}")
    except Exception as e:
        print(f"Error saving to file: {e}")

def read_jsonl_to_dataframe(filepath):
    """ Reads a JSON Lines file into a Pandas DataFrame."""
    try:
        df = pd.read_json(filepath, lines=True, convert_dates=False)
        return df
    except Exception as e:
        print(f"Error reading jsonl to dataframe: {e}")
        return None


### Sample call to LLM

In [None]:
image_path = "Food/PXL_20250310_172614757.jpg" #"Food/PXL_20250321_101158506.jpg" #Food/PXL_20240508_100847853.jpg" #"Food/PXL_20230114_131406857.jpg"  # Replace with your image path
output_filename = "output_t.json"


analysis_result = analyze_food_image(image_path,model)
dum =[]
if analysis_result:
    dum.append(analysis_result)
    # print(analysis_result)
    write_to_file(dum, output_filename)
    df = read_jsonl_to_dataframe(output_filename)
df


Results saved to output_t.json


Unnamed: 0,name,size,date,Cuisine,Happiness_Level,Meal_Course,Sugar,Salt,Healthy,Processing_level,Preparation_Method,Dominant_Color,Food_Diversity
0,Food/PXL_20250310_172614757.jpg,2177968,20250310,Fusion,4,Main Course,Low,Medium,healthy,Minimally Processed,Steamed,Orange,Medium


In [None]:
img_folder='Food'
output_filename = "llm_output.json"

# Commented to avoid re-execution
# all_results =analyze_all_images_in_folder(img_folder, model)
# write_to_file(all_results, output_filename)

df = read_jsonl_to_dataframe(output_filename)
df['date'] = pd.to_datetime(df['date'], format='%Y%m%d')
df


# Unused code

#### Write file with LLM results

Avoid calling APIs again and again as Env variables are lost with environment restart

In [2]:
def write_to_file(img_data,filenm):
    try:
        with open(filenm, "w") as f:  # Open the file in write mode ("w")
            for line in img_data:
                f.write(line)  # Write each line to the file, 
                print("Results saved to output.txt")
    except Exception as e:
        print(f"Error saving to file: {e}")
        
def read_from_file(filenm):
    try:
        img_data =[]
        with open(filenm, "r") as f:
            # img_data = f.read
            ln = f.readlines()
            ln_n = [line.strip() for line in ln]
            ln_f = ''.join(ln_n)
            img_data.append(ln_f)
            print("Results read from output.txt")
            return img_data
    except FileNotFoundError:
        print(f"Error: File not found at {filenm}")
        return None
    
def extract_and_clean_dictionaries(img_data):
    """
    Extracts, cleans, and parses dictionaries from a list of strings,
    Returns:A list of dictionaries, or an empty list if no dictionaries are found.
    """
    all_dictionaries = []
    for input_string in img_data:
        # 1. Remove print statements
        input_string = re.sub(r"print\s*\([^\)]*\)", "", input_string)
        input_string = input_string.strip() # Removes whitespace


        # 2. Find the dictionary with a variable name (food_data = or food_image_features = )
        match = re.search(r'(?:[\w_]+\s*=\s*)?({.*?})', input_string, re.DOTALL)

        if match:
            dict_string = match.group(1)
            # Clean the dictionary string
            # 1. Remove comments
            cleaned_dict_string = re.sub(r'#.*', '', dict_string)
            # 2. Replace None to null
            cleaned_dict_string = cleaned_dict_string.replace("None", "null")
            # 3. Replace keys with double quotes
            cleaned_dict_string = re.sub(r"'(\w+)':", r'"\1":', cleaned_dict_string)
            # 4. Replace values with double quotes if are string
            cleaned_dict_string = re.sub(r":\s*'([^']*)'", r':"\1"', cleaned_dict_string)
            # 5. Remove spaces at the begining and end
            cleaned_dict_string = cleaned_dict_string.strip()

            
            try:
                data_dict = json.loads(cleaned_dict_string) # Parse JSON
                all_dictionaries.append(data_dict)
            except json.JSONDecodeError as e:
                print(f"Error decoding JSON after cleaning: {e} in:\n{cleaned_dict_string}")
        else:
           print(f"No dictionary found in: {input_string}")

    return all_dictionaries


def create_dataframe_and_file(dictionaries, filename="output.csv"):
    if not dictionaries:
        print("No dictionaries to create a DataFrame.")
        return

    try:
        df = pd.DataFrame(dictionaries)
        df.to_csv(filename, index=False)
        print(f"DataFrame created and saved to {filename}")
    except Exception as e:
        print(f"Error creating DataFrame or saving to file: {e}")

filenm = 'output.txt'
extracted_dictionaries = extract_and_clean_dictionaries(img_data)

NameError: name 'img_data' is not defined

#### Failed data cleaning Experiments

In [None]:
txt_str = "".join(img_data)
food_item = txt_str.split("}")
food_item[0:2]
test_obj = food_item[0]
test_obj_v1 =test_obj.split("\n")
# test_obj_v1[0:5]
test_obj_v2 =[item for item in test_obj_v1 if ":" in item]
# test_obj_v2

def clean_data_str(str):
    # return re.findall(r'.*(\'.*\'.*\'.*\').*',str)
    return re.findall(r'.*(\'.*\'.*\'.*\').*',str)[0].split(":")
clean_data_str(test_obj_v2[0])

def clean_data_obj(obj):
    test_obj_v1 =obj.split("\n")
    food_list = []
    for item in test_obj_v1:
        if ":" in item:
            food_dict = {}
            for i in item:
                k,v = re.findall(r'.*(\'.*\'.*\'.*\').*',str)[0].split(":")
                food_dict[k] = v
        food_list.append(food_dict)
    return food_list

clean_data_str(test_obj_v2)

NameError: name 'img_data' is not defined

In [None]:
i=0
for item in img_data:
    text = item
    print(text)
    # text = text.replace("```json", "").replace("```", "")
    text = re.sub(r"^[^\{]*\{", "{", text)
    text = text.replace('\n', '').replace("```", "").replace(' ', '').strip()
    # text = json.load(text)
    print(text)

write_to_file(text, 'output_clean.txt')