In [7]:
import os
import glob
from PIL import Image
import google.generativeai as genai
import json

# Configure the Gemini API
genai.configure(api_key='AIzaSyDM2ks9JroT_5fG2WD4tfkHX6bzejhTKqo')

# Initialize the Gemini model
model = genai.GenerativeModel('gemini-1.5-flash')

# Folder path
folder_path = '/home/manasa/Desktop/Code/hackathon/pdf_images/June'

# Get all image files in the folder
image_files = glob.glob(os.path.join(folder_path, '*.[jJ][pP][gG]')) + \
              glob.glob(os.path.join(folder_path, '*.[pP][nN][gG]'))

# Prompt
prompt = """Analyze these grocery bill invoices. First, extract and provide the date of the invoice. Then, for each edible product listed (excluding non-food items like toothpaste), provide a comprehensive nutritional and environmental assessment based on the estimated quantity purchased. Use the following format:

{
    "invoice_date": "YYYY-MM-DD",
    "edible_products": [
        {
            "product_name": "",
            "price_paid": 0,
            "num_items": 0,
            "estimated_quantity": {"value": 0, "unit": ""},
            "serving_size": "",
            "energy": {
                "per_serving": {"value": 0, "unit": "kcal"}
            },
            "macronutrients": {
                "total_fat": {"per_serving": {"value": 0, "unit": "g"}},
                "carbohydrates": {"per_serving": {"value": 0, "unit": "g"}},
                "protein": {"per_serving": {"value": 0, "unit": "g"}}
            },
            "micronutrients": {
                "sodium": {"per_serving": {"value": 0, "unit": "mg"}},
                "key_vitamins_minerals": [
                    {"name": "", "value": 0, "unit": ""}
                ]
            },
            "fiber": {
                "total": {"per_serving": {"value": 0, "unit": "g"}},
                "soluble": {"per_serving": {"value": 0, "unit": "g"}},
                "insoluble": {"per_serving": {"value": 0, "unit": "g"}}
            },
            "allergens": [],
            "nutrient_density_score": {
                "value": 0,
                "category": ""
            },
            "glycemic_index": {
                "value": 0,
                "category": ""
            },
            "glycemic_load": 0,
            "protein_quality": {
                "score": 0,
                "method": "PDCAAS/DIAAS"
            },
            "phytonutrients": [
                {"name": "", "presence": "high/medium/low"}
            ],
            "micronutrient_density": {
                "per_100_calories": [
                    {"nutrient": "", "value": 0, "unit": ""}
                ],
                "per_100_grams": [
                    {"nutrient": "", "value": 0, "unit": ""}
                ]
            },
            "satiety_index": {
                "value": 0,
                "category": ""
            },
            "environmental_impact": {
                "water_usage": {"value": 0, "unit": "L/kg"},
                "carbon_footprint": {
                    "value": 0,
                    "unit": "kg CO2e/kg",
                    "category": ""
                },
                "land_use": {"value": 0, "unit": "m²/kg"}
            },
            "versatility_score": {
                "value": 0,
                "category": ""
            },
            "cost_nutrient_ratio": {
                "value": 0,
                "category": ""
            },
            "nutritional_summary": ""
        }
    ]
}

Estimate the quantity based on the price paid and typical market prices. Provide rough estimates for nutritional content and environmental impact based on this estimated quantity. If certain information is not applicable or available, you may omit those fields. Focus on providing a general nutritional and environmental overview rather than precise values.

For each metric:
1. Nutrient Density Score: Calculate based on vitamin, mineral, fiber, and protein content relative to calorie content. Categorize into four thresholds: "Very Low" (0-25), "Low" (26-50), "Medium" (51-75), "High" (76-100).
2. Glycemic Index: Assess these values, noting that foods like lentils and pulses typically have low values. Categorize as "Low" (0-55), "Medium" (56-69), "High" (70-100), "Very High" (>100).
3. Satiety Index: Provide an estimate of how filling the food is relative to its calorie content. Categorize as "Low" (0-25), "Medium" (26-50), "High" (51-75), "Very High" (76-100).
4. Carbon Footprint: Include metrics on carbon footprint for production. Categorize as "Low" (0-1 kg CO2e/kg), "Medium" (1-5 kg CO2e/kg), "High" (5-10 kg CO2e/kg), "Very High" (>10 kg CO2e/kg).
5. Versatility Score: Develop a score based on the food's adaptability to various dishes and cuisines. Categorize as "Low" (0-25), "Medium" (26-50), "High" (51-75), "Very High" (76-100).
6. Cost-Nutrient Ratio: Calculate the nutritional value provided per unit of cost. Categorize as "Poor" (0-25), "Fair" (26-50), "Good" (51-75), "Excellent" (76-100).

List only edible products from the invoices in this format, highlighting the nutritional benefits of foods like lentils, pulses, and other nutrient-dense options. Exclude non-food items like toothpaste from the analysis.

Additionally, provide a summary of the nutrient density distribution across all edible products in a format suitable for creating a pie chart with the four categories: Very Low, Low, Medium, and High.
"""

def process_images(image_files):
    results = []
    for i in range(0, len(image_files), 2):
        batch = image_files[i:i+2]
        images = [Image.open(image_file) for image_file in batch]
        
        response = model.generate_content([prompt] + images)
        
        results.append(response.text)
    
    return results

# Process images and get results
results = process_images(image_files)

# Function to extract JSON objects from text
def extract_json_objects(text):
    objects = []
    start = 0
    while True:
        try:
            obj = json.loads(text[start:])
            objects.append(obj)
            break
        except json.JSONDecodeError as e:
            if e.msg == 'Extra data':
                objects.append(json.loads(text[start:start+e.pos]))
                start += e.pos
            else:
                start += 1
        if start >= len(text):
            break
    return objects

# Extract JSON objects and update products and nutrient_density_distribution
products = []
nutrient_density_distribution = {
    "Very Low": 0,
    "Low": 0,
    "Medium": 0,
    "High": 0,
    "Unknown": 0  # Added for products without a category
}

for result in results:
    extracted_objects = extract_json_objects(result)
    for obj in extracted_objects:
        if isinstance(obj, dict) and "product_name" in obj:
            products.append(obj)
            if "nutrient_density_score" in obj and obj["nutrient_density_score"] is not None:
                category = obj["nutrient_density_score"].get("category", "Unknown")
                if category in nutrient_density_distribution:
                    nutrient_density_distribution[category] += 1
            else:
                nutrient_density_distribution["Unknown"] += 1

# Prepare the final data structure
final_data = {
    "products": products,
    "nutrient_density_distribution": nutrient_density_distribution,
    "raw_results": results  # Include the raw results from Gemini
}

# Save results to a JSON file
with open('grocery_analysis_results.json', 'w') as f:
    json.dump(final_data, f, indent=2)

print(f"Analysis complete. Results saved to grocery_analysis_results.json")
print(f"Total products analyzed: {len(products)}")
print("Nutrient Density Distribution:")
print(json.dumps(nutrient_density_distribution, indent=2))

Analysis complete. Results saved to grocery_analysis_results.json
Total products analyzed: 9
Nutrient Density Distribution:
{
  "Very Low": 2,
  "Low": 1,
  "Medium": 1,
  "High": 2,
  "Unknown": 0
}
