In [3]:
from PIL import Image
from google import genai
from dotenv import load_dotenv
import os
import json
import re

load_dotenv()

def generate_response(image_path):

    client = genai.Client(api_key=os.environ['GEMINI_API_KEY'])

    prompt = '''
Extract all the text from the picture and return it in a JSON format, using English keys.
Example of desired output:

{
    "name": "...",
    "position": "...",
    "email": "...",
    "company": "...",
    "linkedIn": "...",
    "studyProgram": "...",
    "photo": "..."
}

Translate the field names to English. You don't need to translate the values, just the field names.
'''
    image = Image.open(image_path)
    response = client.models.generate_content(
        model="gemini-2.5-pro",
        contents=[image,prompt ]
    )
    output = response.text
    return output

def convert_to_dict(json_file):

    json_string_clean = re.sub(r'^```json\n|\n```$', '', json_file, flags=re.MULTILINE)
    startup_dict = json.loads(json_string_clean)
    return startup_dict

In [None]:
folder_path = 'images/session'
image_files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

print(f"Total imágenes encontradas: {len(image_files)}")

all_data = []

for img_file in image_files:
    image_path = os.path.join(folder_path, img_file)
    print(f"\nProcesando {image_path}...")
    try:
        gemini_output = generate_response(image_path)
        data_dict = convert_to_dict(gemini_output)
        all_data.append(data_dict)
        print(f"✅ Procesado {img_file}: {data_dict}")
    except Exception as e:
        print(f"❌ Error procesando {img_file}: {e}")

print("\n📊 Datos finales extraídos:")
print(all_data)

Total imágenes encontradas: 5

Procesando images/actividades\investors_day.png...
✅ Procesado investors_day.png: {'event_name': 'Investors Day', 'date': 'Diciembre 2025', 'time': 'TBD', 'location': 'Sala Multiusos, La Harinera', 'description': 'TBD', 'attendance': 'Empty', 'format': 'Presencial'}

Procesando images/actividades\pitch_showcase.png...
✅ Procesado pitch_showcase.png: {'name': 'Pitch Showcase', 'date': 'Julio 2025', 'schedule': 'TBD', 'location': 'Sala Multiusos, La Harinera', 'description': 'Mentores, formadores y agentes del ecosistema que se dividirán en comités de feedback para hacer sesiones de pitches simultáneas que puedan dar una primera valoración del pitch de cada startup.', 'attendance': 'Privado', 'format': 'Empty'}

Procesando images/actividades\reverse_pitches.png...
✅ Procesado reverse_pitches.png: {'title': 'Reverse Pitches', 'date': 'Septiembre 2025', 'schedule': 'TBD', 'location': 'Sala Multiusos, La Harinera', 'description': 'En este evento, los participa