In [None]:
from frontier_challenge.tools.viz_tool.stages import (
    summarize_dataset,
    generate_visualization_proposals,
    generate_visualization_code,
)
import json
import logging
import os
import time
import pandas as pd

logger = logging.getLogger(__name__)


In [None]:
from langchain_openai import ChatOpenAI
from frontier_challenge.settings import OPENAI_API_KEY

llm_light = ChatOpenAI(
        model="gpt-4o-mini",
        temperature=0,
        openai_api_key=OPENAI_API_KEY
).with_retry()

In [None]:
def node_summarize_dataset(df, context) -> dict:
        """Summarize the dataset for visualization."""
        start_time = time.time()
        
        logger.info(f"Summarizing dataset with {len(df)} rows and {len(df.columns)} columns")
        
        data_summary = summarize_dataset(
            df,
            context=context,
            llm=llm_light
        )
        
        elapsed = time.time() - start_time
        logger.info(f"Dataset summarization completed in {elapsed:.2f}s")
        
        return data_summary

In [None]:
sample_data = pd.DataFrame({
    'cnpj': ['00.000.000/0001-01', '00.000.000/0001-02', '00.000.000/0001-03'] * 12,
    'fund_name': ['Fundo A', 'Fundo B', 'Fundo C'] * 12,
    'date': pd.date_range('2024-01-01', periods=36, freq='M').repeat(1).tolist()[:36],
    'return_mtd': [0.5, 1.2, -0.3, 0.8, 1.5, -0.5, 0.9, 1.1, 0.2] * 4,
    'return_ytd': [5.2, 8.5, 3.1, 6.8, 9.2, 4.5, 7.3, 8.9, 5.7] * 4,
    'aum': [1000000, 5000000, 2500000] * 12,
    'sharpe_ratio': [1.2, 1.8, 0.9] * 12,
    'volatility': [8.5, 12.3, 6.2] * 12,
    'category': ['Renda Fixa', 'Multimercado', 'Ações'] * 12,
})

context = "Analyze the performance of different investment funds over the past year."

In [None]:
result = node_summarize_dataset(sample_data, context)
print(json.dumps(result, indent=2))