In [None]:
import os
import pandas as pd
from dotenv import load_dotenv  
from agent import Agent
from tools import Tools
from prompts import Prompts
import tools
from graph_test import Graph
from utils import remover_valores_aleatorios
from AUTODCETS import datasets
from IPython.display import Image, display
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, ToolMessage
import pickle
import base64

In [19]:
load_dotenv()
API_KEY = os.getenv("API_KEY")

name = 'CLIMATIC_2'
df = pd.read_csv('CLIMATIC_2.csv').head(1000)
df = df.drop(columns=['Date'])
df = remover_valores_aleatorios(df, 'ETO', 0.1)

In [20]:
tools.df = df
agent_type = 'pandas'
model = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
# model = "deepseek-ai/DeepSeek-V3.1"

tools_ob = [Tools.automl, Tools.impute_values_with_linear_method, Tools.impute_values_with_mean_method]

chat = Agent(model).build(agent_type, df, tools_ob)

graph_ob = Graph(chat, tools_ob).build_graph()

config = {
    "configurable": {
        "api_key": API_KEY,  # Your actual key
        "model": model,  # Your model
        "thread_id": "1"  # Optional conversation tracking
    }
}


In [21]:
# 1ª Etapa - Imputação de valores faltantes
user_msg =''

prompt = Prompts.get_prompt('Etapa 1', user_msg = user_msg)

estado = graph_ob.invoke({"messages": [HumanMessage(content=prompt)]}, config)





[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To begin, I need to check if there are any missing values in the dataframe `df`. I will use the `python_repl_ast` tool to execute a command that checks for missing values.

Action: python_repl_ast
Action Input: `df.isnull().sum()`[0m[36;1m[1;3mRs        0
u2        0
Tmax      0
Tmin      0
RH        0
pr        0
ETO     100
dtype: int64[0m[32;1m[1;3mThe output indicates that there are missing values in the 'ETO' column, with a total of 100 missing values. Now, I need to decide on the appropriate imputation method. Since 'ETO' is likely a time series and given the context, it might be reasonable to use linear interpolation for imputation if the series is not approximately stationary. I will first check the number of missing values and then decide on the imputation method.

Action: impute_values_with_linear_method
Action Input: `ETO`[0m[38;5;200m[1;3m                                 Rs   u2  Tmax  Tmin    RH

In [22]:
try:
    df_imputado = pd.DataFrame(estado['all_tool_outputs'][-1])
    df_imputado = df_imputado.drop(columns=['index'])
    print(df_imputado.head(3))
except:
    df_imputado = df

     Rs   u2  Tmax  Tmin    RH    pr   ETO
0  15.8  0.8  31.0  24.0  85.5   8.6  3.40
1  18.5  0.6  30.9  24.0  83.0  25.3  2.75
2   9.7  1.0  27.2  24.6  92.1   3.2  2.10


In [23]:
# 2ª Etapa - Previsão
tools_ob2 = [Tools.automl]

model = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"

tools.df = df_imputado

chat2 = Agent(model).build(agent_type, df_imputado, tools_ob2)

graph_ob = Graph(chat2, tools_ob2).build_graph()

config = {
    "configurable": {
        "api_key": API_KEY,  # Your actual key
        "model": model,  # Your model
        "thread_id": "1"  # Optional conversation tracking
    }
}

user_msg ='Faça a previsão para os próximos 5 dias da coluna ETO. Favor não usar decomposição.'

prompt = Prompts.get_prompt('Etapa 2', user_msg = user_msg)

estado2 = graph_ob.invoke({"messages": [HumanMessage(content=prompt)]}, config)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To make a prediction for the next 5 days of the 'ETO' column, I will use the `automl` function. I need to decide on the `max_lags` parameter, which should be between 1 and 30. A common choice for `max_lags` can be around 7 or 15 for daily data, as it represents a week or roughly two weeks of past data. Since the task specifies not to use decomposition, I will set `decomposition` to `False`.

Action: automl
Action Input: {target: 'ETO', step_ahead: 5, max_lags: 15, decomposition: False}[0mStart time: 2025-08-27 08:52:53.213360
FEATURE SELECTION LAYER - CAUSAL
THE CAUSAL GRAPH CONTAINS THE FOLLOWING VARIABLES: ['Rs', 'u2', 'Tmax', 'Tmin', 'RH', 'pr', 'ETO']
MODEL SELECTION LAYER
HPO started...


Processing: 100%|██████████| 10/10 [10:49<00:00, 64.99s/it]


Run time: 0:12:40.913964
[33;1m[1;3m{'predicoes': {'real': {0: 5.8, 1: 5.5, 2: 5.8, 3: 5.8, 4: 5.6}, 'previsto': {0: 0.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}}, 'modelo': 'gASVehgAAAAAAACME0FVVE9EQ0VUUy5hdXRvZGNldHOUjAlBVVRPRENFVFOUk5QpgZR9lCiMDHBhcmFtc19NRU9IUJR9lCiMBG5wb3CUSxSMBG5nZW6USwqMCnNpemVfdHJhaW6US8iMCXNpemVfdGVzdJRLMnWMEWZlYXR1cmVfc2VsZWN0aW9ulIiMFGRpc3RyaWJ1dGl2ZV92ZXJzaW9ulImMCnNhdmVfbW9kZWyUiIwKcGF0aF9tb2RlbJSMBW1vZGVslIwNZGVjb21wb3NpdGlvbpSJjAZHX2xpc3SUfZQojAJSc5SMEXBhbmRhcy5jb3JlLmZyYW1llIwJRGF0YUZyYW1llJOUKYGUfZQojARfbWdylIwecGFuZGFzLmNvcmUuaW50ZXJuYWxzLm1hbmFnZXJzlIwMQmxvY2tNYW5hZ2VylJOUKIwWcGFuZGFzLl9saWJzLmludGVybmFsc5SMD191bnBpY2tsZV9ibG9ja5STlIwWbnVtcHkuX2NvcmUubXVsdGlhcnJheZSMDF9yZWNvbnN0cnVjdJSTlIwFbnVtcHmUjAduZGFycmF5lJOUSwCFlEMBYpSHlFKUKEsBSwFLD4aUaCOMBWR0eXBllJOUjAJiMZSJiIeUUpQoSwOMAXyUTk5OSv////9K/////0sAdJRiiUMPAQEAAAAAAAAAAAAAAAAAlHSUYowIYnVpbHRpbnOUjAVzbGljZZSTlEsASwFLAYeUUpRLAoeUUpRoH2giaCVLAIWUaCeHlFKUKEsBSwFLD4aUaC+JQw8AAAAAAAAAAAAAAAAAAACUdJRiaDZLAUsCSwG

In [24]:
import re

# pega o que vem depois de "Action:"
tools_list = [re.search(r"Action:\s*([^\n]+)", log).group(1) 
         for log in estado2['logs'] if re.search(r"Action:\s*([^\n]+)", log)]

print(tools_list)

['automl']


In [25]:
id_automl = [i for i, tool in enumerate(tools_list) if tool == "automl"]

predicoes = pd.DataFrame(estado2['all_tool_outputs'][id_automl[0]]['predicoes'])

# Reverter Base64 para bytes e carrega o modelo
modelo = pickle.loads(base64.b64decode(estado2['all_tool_outputs'][id_automl[0]]['modelo'])) #decodifica o modelo e carrega

print(predicoes)
print(modelo)

   real  previsto
0   5.8       0.0
1   5.5       0.0
2   5.8       0.0
3   5.8       0.0
4   5.6       0.0
<AUTODCETS.autodcets.AUTODCETS object at 0x000001CF9F7D7B10>


In [26]:
# 3ª Etapa - Explicação
tools_ob3 = [Tools.plot_real_vs_pred]

model = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"

tools.df = predicoes

chat3 = Agent(model).build(agent_type, predicoes, tools_ob3)

graph_ob = Graph(chat3, tools_ob3).build_graph()

config = {
    "configurable": {
        "api_key": API_KEY,  # Your actual key
        "model": model,  # Your model
        "thread_id": "1"  # Optional conversation tracking
    }
}

user_msg =f'''Gere a figura na Base64 que contenha o gráfico comparando os valores reais e previstos da coluna ETO.'''

# prompt = Prompts.get_prompt('Etapa 3', user_msg = user_msg)

estado3 = graph_ob.invoke({"messages": [HumanMessage(content=user_msg)]}, config)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To generate a plot comparing the real and predicted values, I first need to ensure that the dataframe `df` has the correct columns. The output of `print(df.head())` shows that the dataframe has columns named `real` and `previsto`. I will use these columns to generate the plot.

Action: python_repl_ast
Action Input: df.columns[0m[36;1m[1;3mIndex(['real', 'previsto'], dtype='object')[0m[32;1m[1;3mThe dataframe `df` indeed has the columns `real` and `previsto`, which correspond to the real and predicted values, respectively. Now, I can use the `plot_real_vs_pred` function to generate the plot.

Action: python_repl_ast
Action Input: plot_real_vs_pred()[0m[36;1m[1;3mNameError: name 'plot_real_vs_pred' is not defined[0m[32;1m[1;3mIt seems that the function `plot_real_vs_pred` is not defined in the current scope. However, I have been told that it is available as a tool. Let's try to use it directly.

Action: plo

In [27]:
# 4ª Etapa - Explicação
tools_ob4 = [Tools.desenhar_grafo]

model = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"

tools.model = modelo

chat4 = Agent(model).build(agent_type, df, tools_ob4)

graph_ob = Graph(chat4, tools_ob4).build_graph()

config = {
    "configurable": {
        "api_key": API_KEY,  # Your actual key
        "model": model,  # Your model
        "thread_id": "1"  # Optional conversation tracking
    }
}

user_msg =f'''Use a ferramenta desenhar_grafo para gerar o grafo de importância das variáveis do modelo {modelo}.'''

# prompt = Prompts.get_prompt('Etapa 3', user_msg = user_msg)

estado4 = graph_ob.invoke({"messages": [HumanMessage(content=user_msg)]}, config)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To generate the graph of variable importance using the `desenhar_grafo` tool, I first need to ensure that the model provided is a trained AUTODCE-TS model. The question directly provides the model object, so I will use it to generate the graph.

Action: desenhar_grafo
Action Input: <AUTODCETS.autodcets.AUTODCETS object at 0x000001CF9F7D7B10>[0m[33;1m[1;3miVBORw0KGgoAAAANSUhEUgAAAlgAAAH0CAYAAADhUFPUAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAcolJREFUeJzt3Qd0VNXXBfAdSkIg9N6C9CK9g3QUUVCKVGmC0kSKgEhRioqCgljpIEq1gCBdUJAiIL0JKCBFem8JLeFb+/pN/pMQUmfmTdm/td5KMply85LMnDn33HP9Hjx48AAiIiIi4jBJHHdXIiIiIkIKsEREREQcTAGWiIiIiIMpwBIRERFxMAVYIiIiIg6mAEtERETEwRRgiYiIiDiYAiwRERERB1OAJSIiIuJgCrBEREREHEwBloiIiIiDKcASERERcTAFWCIiIiIOpgBLRERExMEUYImIiIg4WDJH36E41oMHD3D9VjhC7zxAYIAf0qRKAj8/P6uHJSIiIjFQgOWmboaEY+Xmm/hx7U2cvng/4vIcmZKhSa0gPF05CEEplYAUERFxR34PmC

In [None]:
tools_ob4 = []

model = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"

chat5 = Agent(model).build(agent_type, df_imputado, tools_ob4)

graph_ob = Graph(chat5, tools_ob4).build_graph()

config = {
    "configurable": {
        "api_key": API_KEY,  # Your actual key
        "model": model,  # Your model
        "thread_id": "1"  # Optional conversation tracking
    }
}

actions = estado['logs']
etapa = 'Imputação de valores faltantes'

user_msg =f"""Você é um avaliador de ações em um processo de análise de series temporais. Seu trabalho é decidir se uma ação deve ser aplicada ou não em uma determinada etapa. 
            Responda apenas Sim ou Não. 
            A ação {actions} deve ser executada para a etapa {etapa}?"""

# prompt = Prompts.get_prompt('Etapa 3', user_msg = user_msg)

estado5 = graph_ob.invoke({"messages": [HumanMessage(content=user_msg)]}, config)





[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To determine if the action 'impute_values_with_linear_method' should be executed for the 'Imputação de valores faltantes' stage, I first need to understand the context and the data. The given dataframe `df` has a column 'ETO' with missing values. I will start by checking the number of missing values in the 'ETO' column.

Action: python_repl_ast
Action Input: `df['ETO'].isnull().sum()`
[0m[36;1m[1;3m100[0m[32;1m[1;3mThought: The output indicates that there are 100 missing values in the 'ETO' column. Given this information, I need to decide if linear interpolation is an appropriate method for imputing these missing values. Linear interpolation is a reasonable approach for time series data if the series is not approximately stationary or if there are not too many consecutive missing values.

To further assess the appropriateness of linear interpolation, I should check if the 'ETO' column is of a suitable data type

In [None]:
tools_ob4 = []

model = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"

chat6 = Agent(model).build(agent_type, df_imputado, tools_ob4)

graph_ob = Graph(chat6, tools_ob4).build_graph()

config = {
    "configurable": {
        "api_key": API_KEY,  # Your actual key
        "model": model,  # Your model
        "thread_id": "1"  # Optional conversation tracking
    }
}

steps = estado['logs']
outputs = estado['all_tool_outputs']

user_msg =f"""
                    Você é o Agente Resumidor. Receberá dois blocos abaixo:

                    - steps: linhas alternadas no padrão 'THOUGHT: ...' e 'ACTION: tool(args)'
                    - outputs: respostas brutas das tools (texto/tabelas/descrições), na ordem em que ocorreram

                    OBJETIVO
                    Escreva um relatório em pt-BR exatamente no formato:

                    Steps:
                    1) Nome do Step em Português: <título curto em pt-BR>
                    Motivo: <copie/sintetize fielmente o THOUGHT correspondente, sem inventar>
                    Ferramenta Escolhida: <nome da tool indicada em ACTION ou 'nenhuma'>
                    Motivo da escolha da Ferramenta: <explique brevemente por que essa tool foi usada; se não houver, escreva 'não aplicável'>
                    Resultado do "Step": <resuma o output associado a este step; se não houver output, escreva '—'>

                    2) Nome do Step em Português: ...
                    Motivo: ...
                    Ferramenta Escolhida: ...
                    Motivo da escolha da Ferramenta: ...
                    Resultado do "Step": ...

                    REGRAS
                    - Mantenha a ordem cronológica dos steps.
                    - Não invente ferramentas, dados ou resultados.
                    - Associe outputs aos steps na mesma ordem (1º output para o 1º step que teve ACTION, e assim por diante).
                    - Seja conciso e técnico; evite floreios.
                    - Se não houver THOUGHT ou ACTION para algum step, preencha com '—' no campo faltante.
                    - Se não houver nada para resumir, responda apenas: "Sem dados para resumir."

                    DADOS
                    Intermediate Steps:
                    {steps}

                    Outputs:
                    {outputs}
                    """.strip()

# prompt = Prompts.get_prompt('Etapa 3', user_msg = user_msg)

estado5 = graph_ob.invoke({"messages": [HumanMessage(content=user_msg)]}, config)