In [None]:
# /// script
# dependencies = [
#   "python-dotenv",
#   "google>=0.3.0",
#   "google-genai>=1.3.0",
#   "pillow",
#   "tqdm",
# ]
# ///

import os
from dotenv import load_dotenv
from modules.file_utils import FileUtil
from modules.ui_components import create_processing_interface
from clients.gemini_client import GeminiClient
from clients.openai_client import OpenAIClient
from clients.ollama_client import OllamaClient

# Load environment variables
load_dotenv()

In [None]:
question = """Resume brevemente la imagen en español (máximo 3-4 oraciones por categoría):  
            - **Texto:** Menciona solo el título y 2-3 puntos clave si hay texto.
            - **Mapas:** Identifica la región principal y máximo 2-3 ubicaciones relevantes.
            - **Diagramas:** Resume el concepto central en 1-2 oraciones.
            - **Logos:** Identifica la entidad y sus características distintivas.
            - **Datos visuales:** Menciona solo los 2-3 valores o tendencias más importantes.
            Prioriza la información esencial sobre los detalles, manteniendo la descripción breve y directa."""

In [None]:
client = GeminiClient(
    model="gemini-2.0-flash",    # Gemini model to use
    max_tokens=256,             # Maximum number of tokens in the output
    temperature=0.6,            # Controls the creativity of the generation
    top_p=0.6,                  # Top_p value for generation
    top_k=20,                   # Top_k value for generation
    response_mime_type="text/plain",  # MIME type of the response
    api_key=os.getenv("GEMINI_API_KEY")  # API key (optional, can be configured later)
)

In [None]:
client = OpenAIClient(
    model="gpt-4o",                      # OpenAI model to use
    max_tokens=512,                      # Maximum number of tokens in the output
    temperature=0.6,                     # Controls the creativity of the generation
    top_p=0.6,                           # Top_p value for generation
    api_key=os.getenv("OPENAI_API_KEY")  # API key (optional, can be configured later)
)

In [None]:
client = OllamaClient(
    model="gemma3:4b",          # Ollama model to use
    max_tokens=512,             # Maximum number of tokens in the output
    temperature=0.5,            # Controls the creativity of the generation
    top_p=0.5,                  # Top_p value for generation
    top_k=20,                   # Top_k value for generation
    num_ctx=8192,               # Context size for the model
    api_key=None                # Not used for Ollama, but kept for compatibility
)

In [None]:
# Create a FileUtil instance with default values (can be modified from the interface)
client.set_question(question)

DEFAULT_ROOT = r"../dof_markdown"

file_util = FileUtil(root_directory=DEFAULT_ROOT, client=client)

# Create and display the interface
controls = create_processing_interface(client, file_util)