In [1]:
import os
from dotenv import load_dotenv
from langchain_core.tools import tool
from x_sem_ad import XSemAD, load_event_log_from_xes
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage, ToolMessage, SystemMessage

load_dotenv()

if not os.getenv("GOOGLE_API_KEY"):
    raise ValueError("A chave de API do Google não foi encontrada. Defina a variável de ambiente GOOGLE_API_KEY.")

In [2]:
path_to_model="./data/model/"
path_to_log = 'data/InternationalDeclarations.xes'

print("Inicializando o sistema de análise de anomalias...")
log = load_event_log_from_xes(path_to_log)
model = XSemAD(path_to_model=path_to_model)
model.load_event_log(log)

Inicializando o sistema de análise de anomalias...




parsing log, completed traces ::   0%|          | 0/6449 [00:00<?, ?it/s]

Loading model from: ./data/model/ folder


In [3]:
@tool
def analyse_eventlog_anomalies(constraint_type: str, threshold: float=0.75) -> dict:
	"""
	Performs anomaly analysis for a specific constraint type in a previously loaded process event log.

	Args:
	 	log: A pandas DataFrame containing the event log data. 
		constraint_type: The type of constraint to be checked. Based on the user's question, choose one of the following values:
			"Init" - Initiation(a) = The process starts with activity a.
			"End" - Termination(a) = The process ends with activity a.
			"Succession" - Succession(aj,ak)– Activity aj (ak) occurs if and only if it is followed (preceded) by activity ak (aj) in the process.
			"Alternate Succession" - Alternate Succession(aj,ak)– Activities aj and ak occur in the process if and only if the latter follows the former, and they alternate each other in the trace.
			"Choice" - Choice(aj,ak)– Activity aj or activity ak must be in the process.
			"Co-Existence" - Co-Existence(aj,ak)– Both activities aj and ak must be in the process.
			"Exclusive Choice" - Exclusive Choice(aj,ak)– Either activity aj or activity ak (but not both) must be in the process.
			"Response" - Response(aj,ak)– If activity aj occurs in the process, then activity ak occurs after aj.
			"Alternate Response" - Alternate Response(aj,ak)– Each time activity aj occurs in the process instance, activity ak occurs afterwards, before aj recurs.
			"Precedence" - Precedence(aj,ak)– Activity ak occurs in the process instance only if preceded by activity aj.
			"Alternate Precedence" - Alternate Precedence(aj,ak)– Each time activity ak occurs in the process instance, it is preceded by activity aj, and no other ak can recur in between.
		threshold: The threshold value for anomaly analysis. This parameter is used to define the severity level of constraint violations.

	Returns:
		A dictionary with the violated constraints and the count of their violations,
		in the following format: {
			"constraint_type[activity_a, activity_b]": int(violation_count), ...
		}, example:
		{
			"Init[Activity A, Activity B]": 5,
			"End[Activity C, Activity D]": 3
		}
	"""
	print(f"\n>>> analyse_eventlog_anomalies(constraint_type='{constraint_type}', threshold={threshold})")
	result = model.run(constraint_type=constraint_type, threshold=threshold)
	print(f">>> {result}\n")
	return result

In [12]:
# MODEL = "gemini-2.0-flash"
# MODEL = "gemini-2.5-flash"
MODEL = "gemini-2.5-pro"

llm = ChatGoogleGenerativeAI(model=MODEL, temperature=0)
tools = [analyse_eventlog_anomalies]
llm_with_tools = llm.bind_tools(tools)

with open("./data/qa_prompt.txt", "r") as file:
	system_prompt = file.read()
	print(system_prompt)

messages = [SystemMessage(content=system_prompt)]

You are an expert Process Mining assistant. Your primary goal is to help users analyze and understand event logs by using the tools at your disposal.

Follow these rules strictly:
1. Translate the constraints rules and the activities to the language of the user.
2. Answer in natural language, so that any user not familiar with the process mining can understand your answer without any technical knowledge.
3. If context is provided, use it to enhance your response and judge the correctness of the tool responses.
4. End with a brief summary or suggestion, if applicable.

Here are some examples of how you should respond based on the conversation history:

**EXAMPLE 1: ANOMALIES FOUND**

*Question:*
Quais são as anomalias para a regra de início e coexistência?

*Tool Responses:*
{'Start[register request]': 5}
{'Co-Existence[approve order, ship goods]': 12}

*Correct Final Answer:*
Olá! Analisei o log de eventos e encontrei as seguintes violações de regras:

* A regra de *Início* foi encontr

In [13]:
with open("./data/context.txt", "r") as file:
	system_prompt = file.read()

messages.append(SystemMessage(content=system_prompt))

In [14]:
user_prompt = "Verifique se existe alguma anomalia de Succession para as atividades 'declaration approved by supervisor' e 'request payment'"
messages.append(HumanMessage(content=user_prompt))

ai_msg = llm_with_tools.invoke(messages)
messages.append(ai_msg)

print(f"🧑‍💻 Pergunta: {user_prompt}")
if ai_msg.tool_calls:
	for tool_call in ai_msg.tool_calls:
		selected_tool = {t.name: t for t in tools}[tool_call["name"]]
		tool_output = selected_tool.invoke(tool_call["args"])

		tool_message = ToolMessage(content=str(tool_output),
									tool_call_id=tool_call['id'])
		messages.append(tool_message)
	final_response = llm_with_tools.invoke(messages)

	print(f"‍💻 Resposta: {final_response.content}")
else:
	print(f"‍💻 Resposta: {ai_msg.content}")


🧑‍💻 Pergunta: Verifique se existe alguma anomalia de Succession para as atividades 'declaration approved by supervisor' e 'request payment'

>>> analyse_eventlog_anomalies(constraint_type='Succession', threshold=0.75)
Get prediction for constraint type:  Succession
>>> {'Succession[request payment, declaration approved by supervisor]': 6183, 'Succession[declaration rejected by supervisor, declaration approved by supervisor]': 122, 'Succession[send reminder, declaration approved by supervisor]': 406, 'Succession[declaration approved by supervisor, request payment]': 244}

‍💻 Resposta: Olá! Analisei o log de eventos para as anomalias de *Sucessão* e encontrei algumas violações, incluindo as que você solicitou.

A regra de *Sucessão* significa que uma atividade deve ocorrer se, e somente se, for imediatamente seguida ou precedida por outra atividade específica.

Para as atividades que você mencionou, encontrei os seguintes problemas:
*   **Declaração aprovada pelo supervisor** e **solicit