In [1]:
from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
import json
import re
import os
import sys
import uuid
from langchain_core.tools import tool
# from langchain_core.pydantic_v1 import BaseModel, Field

notebook_dir = os.getcwd()
parent_dir = os.path.abspath(os.path.join(notebook_dir, '..'))
sys.path.append(parent_dir)

from app.services.databricks_service import DatabricksService
from app.services.azure_search_service import AzureSearchService
from app.services.azure_storage_service import AzureStorageService
from app.services.cosmos_db_service import CosmosDBService # Importamos el servicio de Cosmos
import asyncio

# Instanciamos los servicios una vez para reutilizar la configuración.
databricks_service = DatabricksService()
azure_search_service = AzureSearchService()
cosmos_db_service = CosmosDBService() # Instanciamos Cosmos
storage_service = AzureStorageService()

Servicio de Databricks inicializado.
Servicio de Azure AI Search inicializado.
Servicio de Cosmos DB inicializado.
Servicio de Azure Storage inicializado.


# Pruebas de Databricks Services

In [7]:
def sync_executor(query):
    return databricks_service.execute_query(query)

query = """
SELECT  Count(*),
  CIIU,
  STRCIIU
FROM `ia-foundation`.pilotos.ods_cliente
GROUP BY CIIU, STRCIIU
ORDER BY Count(*) DESC
LIMIT 5
"""
# 1. Ejecutar la consulta para obtener el resultado completo
full_result_data = await asyncio.to_thread(sync_executor, query)
len(full_result_data["rows"])


--- Ejecutando consulta en Databricks: 
SELECT  Count(*),
  CIIU,
  STRCIIU
FROM `ia-foundation`.pilotos.ods_cliente
GROUP BY CIIU, STRCIIU
ORDER BY Count(*) DESC
LIMIT 5
... ---


5

In [None]:
import pandas as pd
df = pd.DataFrame(full_result_data["rows"], columns=full_result_data["columns"])
df

In [9]:
import pandas as pd
# 2. Convertir a DataFrame de Pandas
df = pd.DataFrame(full_result_data["rows"], columns=full_result_data["columns"])

# 3. Subir el CSV COMPLETO a Azure Blob Storage
blob_name = f"mamaomiaaa-meossafe.csv"
download_url = await storage_service.upload_query_results(df, blob_name)
await cosmos_db_service.save_query_result("mamoamiaaa", "meossafe", full_result_data)

Archivo CSV 'mamaomiaaa-meossafe.csv' subido exitosamente a Azure Storage.
Resultado para message_id 'meossafe' guardado en Cosmos DB.


In [None]:
json_str = json.dumps(results["rows"][0:30000], default=str)

size_bytes = len(json_str.encode('utf-8'))

size_kb = size_bytes / 1024
size_mb = size_kb / 1024

print(f"Tamaño del JSON: {size_bytes} bytes / {size_kb:.2f} KB / {size_mb:.2f} MB")

In [27]:
session_id = "session_id_A"
message_id = "message_id_A"
await cosmos_db_service.save_query_result(session_id, message_id, results["rows"][0:30000])

Resultado para message_id 'message_id_A' guardado en Cosmos DB.


# Pruebas de Azure AI Search

In [None]:
import sys
import os
notebook_dir = os.getcwd()
parent_dir = os.path.abspath(os.path.join(notebook_dir, '..'))
sys.path.append(parent_dir)

from app.services.azure_search_service import AzureSearchService
from utils.az_ai_search import AzureIASearch

azure_search_service = AzureSearchService()
azure_search_utils = AzureIASearch()


user_query = "¿Cuáles son los clientes que se encuentran asociados a la cooperativa coomeva?"
result = await azure_search_service.search_similar_queries(user_query, top_k=10)
# result2 = azure_search_utils.hybrid_search(user_query, index_name="index_sqlagent", top_k=3)

for a in result:
    print(a)

In [None]:
result[:3]

# Pruebas CosmosDB

In [33]:
cosmos_service = CosmosDBService()
CONVERSATION_HISTORY_WINDOW = 20
session_id = "052f6517-bd92-472d-afa6-a073f51d952b"
conversation_history = await cosmos_service.get_conversation_history(
            session_id,
            limit=CONVERSATION_HISTORY_WINDOW
        )

Servicio de Cosmos DB inicializado.
Base de datos 'db_agentesql' encontrada.
Contenedor 'historialconversaciones' listo.
Historial recuperado para la sesión 052f6517-bd92-472d-afa6-a073f51d952b: 20 mensajes.


In [None]:
def _sanitize_history_for_api(history: list) -> list:
    """
    Elimina cualquier 'ToolMessage' huérfano del principio del historial
    para asegurar una secuencia de conversación válida para la API de OpenAI.
    """
    sanitized_history = list(history)
    # Mientras el historial no esté vacío y el primer mensaje sea un ToolMessage...
    while sanitized_history and isinstance(sanitized_history[0], ToolMessage):
        # Lo eliminamos, porque su AIMessage correspondiente fue cortado por la ventana.
        sanitized_history.pop(0)
    return sanitized_history
sanitized_history = _sanitize_history_for_api(conversation_history)
messages_for_agent = list(sanitized_history)

messages_for_agent.append(HumanMessage(content='Call tool "foo" twice with no arguments'))

messages_for_agent

In [22]:
len(messages_for_agent)

20

In [20]:
messages_for_agent[len(sanitized_history):]

[HumanMessage(content='Call tool "foo" twice with no arguments', additional_kwargs={}, response_metadata={})]

In [None]:
full_turn_messages = messages_for_agent[len(sanitized_history):] #+ agent_response.get("messages", [])[len(messages_for_agent):]
full_turn_messages

[]