In [1]:
# Cell 1: Setup, Path, and Imports
import sys
import os
import logging
import time
import unittest
from unittest.mock import patch, MagicMock # For mocking LLM calls later if desired

# Configure logging for tests
logging.basicConfig(level=logging.INFO, format='%(asctime)s - VALIDATOR_TEST - %(levelname)s - %(message)s')

# Add the 'src' directory to the Python path to find the modules
module_path = os.path.abspath(os.path.join('..')) # Assumes notebook is in 'notebooks/' dir
if module_path not in sys.path:
    print(f"Adding {module_path} to sys.path")
    sys.path.append(module_path)
else:
    print(f"{module_path} already in sys.path")

print("Basic setup complete.")

Adding f:\interview\acordao\acordao_validator to sys.path
Basic setup complete.


In [2]:
# Cell 2: Imports and Constants

# Standard library imports already done

# --- Project Modules ---
try:
    from src.validator import (
        load_llm_model_and_tokenizer,
        _format_validation_prompt,
        validate_claim_with_llm,
        LLM_MODEL_ID # Import the constant used
    )
    print("Successfully imported from validator.")
except ImportError as e:
    print(f"ERROR importing from validator: {e}")
    # Define fallbacks to avoid NameErrors
    load_llm_model_and_tokenizer = None
    _format_validation_prompt = None
    validate_claim_with_llm = None
    LLM_MODEL_ID = "microsoft/Phi-3-mini-4k-instruct" # Fallback

# --- Third-party ---
try:
    import torch
    from transformers import AutoModelForCausalLM, AutoTokenizer
    print("Successfully imported torch and transformers.")
except ImportError as e:
    print(f"ERROR importing torch/transformers: {e}")
    torch = None
    AutoModelForCausalLM = None
    AutoTokenizer = None

# --- Reset Caches (Important for testing loading) ---
# Access the cached variables in the validator module if it exists
if 'src.validator' in sys.modules:
    print("Resetting validator module's cached model/tokenizer instances for testing.")
    sys.modules['src.validator']._llm_model_instance = None
    sys.modules['src.validator']._llm_tokenizer_instance = None

print("Imports and constant setup complete.")


  from .autonotebook import tqdm as notebook_tqdm


Successfully imported from validator.
Successfully imported torch and transformers.
Resetting validator module's cached model/tokenizer instances for testing.
Imports and constant setup complete.


In [3]:
# Cell 3: Test load_llm_model_and_tokenizer

print("\n" + "="*10 + " Running Test: test_llm_loading " + "="*10)

# --- Test Case 1: Successful Loading ---
print("\n--- Test Case 1: Successful Loading ---")
model_instance = None
tokenizer_instance = None
test_passed_1 = False
load_time = 0
if load_llm_model_and_tokenizer is None or torch is None or not torch.cuda.is_available():
     print("SKIPPING Test Case 1: Validator/Torch import failed or CUDA not available.")
else:
    try:
        start_time = time.time()
        model_instance, tokenizer_instance = load_llm_model_and_tokenizer() # Use default model ID
        load_time = time.time() - start_time

        assert model_instance is not None, "Model should be loaded"
        assert tokenizer_instance is not None, "Tokenizer should be loaded"
        # Check types (might be wrapped in specific classes by transformers)
        # assert isinstance(model_instance, AutoModelForCausalLM) # Type might vary slightly
        # assert isinstance(tokenizer_instance, AutoTokenizer) # Type might vary slightly
        print(f"Model type: {type(model_instance)}")
        print(f"Tokenizer type: {type(tokenizer_instance)}")
        print(f"Model loaded on device: {model_instance.device}")
        # Basic check if it's on GPU
        assert 'cuda' in str(model_instance.device), "Model should be on CUDA device"

        test_passed_1 = True
        print(f"-> Test Case 1 PASSED (Loaded in {load_time:.2f}s)")
    except Exception as e:
        print(f"-> Test Case 1 FAILED: {e}")


# --- Test Case 2: Caching ---
# Requires Test Case 1 to have passed and loaded the model
print("\n--- Test Case 2: Caching ---")
test_passed_2 = False
if not test_passed_1:
     print("SKIPPING Test Case 2: Requires successful load in Test Case 1.")
elif load_llm_model_and_tokenizer is None:
     print("SKIPPING Test Case 2: Validator function not imported.")
else:
    try:
        print("Calling load function again...")
        start_time = time.time()
        model_cached, tokenizer_cached = load_llm_model_and_tokenizer()
        cache_load_time = time.time() - start_time

        # Check if the SAME objects were returned (caching worked)
        assert model_cached is model_instance, "Cached model object should be the same instance"
        assert tokenizer_cached is tokenizer_instance, "Cached tokenizer object should be the same instance"
        print(f"Second call returned objects in {cache_load_time:.4f}s (should be much faster).")
        assert cache_load_time < 1.0, "Cache loading should be very fast (e.g., < 1 second)"

        test_passed_2 = True
        print("-> Test Case 2 PASSED (Caching appears functional)")
    except Exception as e:
        print(f"-> Test Case 2 FAILED: {e}")

# --- Test Case 3: Invalid Model ID ---
print("\n--- Test Case 3: Invalid Model ID ---")
test_passed_3 = False
if load_llm_model_and_tokenizer is None:
     print("SKIPPING Test Case 3: Validator function not imported.")
else:
    # Reset cache before testing failure
    if 'src.validator' in sys.modules:
        sys.modules['src.validator']._llm_model_instance = None
        sys.modules['src.validator']._llm_tokenizer_instance = None

    invalid_model_id = "invalid/model-does-not-exist-at-all-hopefully"
    print(f"Attempting to load invalid model: {invalid_model_id}")
    try:
        load_llm_model_and_tokenizer(model_id=invalid_model_id)
        print("-> Test Case 3 FAILED: Loading invalid model should have raised an error.")
    except RuntimeError as e:
        # We expect a RuntimeError wrapping the underlying Hugging Face error
        print(f"Caught expected RuntimeError: {e}")
        test_passed_3 = True
        print("-> Test Case 3 PASSED (Correctly raised error for invalid ID)")
    except Exception as e:
         print(f"-> Test Case 3 FAILED: Caught unexpected error type {type(e).__name__}: {e}")

# Final Result for this Cell
print("\n" + "="*10 + " End of Test: test_llm_loading " + "="*10)
if test_passed_1 and test_passed_2 and test_passed_3:
    print("Result: ALL test_llm_loading tests PASSED")
else:
    print("Result: SOME test_llm_loading tests FAILED")

# Reset cache again after tests
if 'src.validator' in sys.modules:
    print("Resetting validator module's cached model/tokenizer instances after tests.")
    sys.modules['src.validator']._llm_model_instance = None
    sys.modules['src.validator']._llm_tokenizer_instance = None

2025-04-30 16:32:14,811 - VALIDATOR_TEST - INFO - Loading LLM model and tokenizer for: mistralai/Mistral-7B-Instruct-v0.2
2025-04-30 16:32:14,814 - VALIDATOR_TEST - INFO - Loading model with 4-bit quantization...




--- Test Case 1: Successful Loading ---


2025-04-30 16:32:19,530 - VALIDATOR_TEST - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).
Loading checkpoint shards: 100%|██████████| 3/3 [00:54<00:00, 18.33s/it]
2025-04-30 16:33:14,923 - VALIDATOR_TEST - INFO - Model loaded successfully.
2025-04-30 16:33:14,924 - VALIDATOR_TEST - INFO - Loading tokenizer...
2025-04-30 16:33:15,251 - VALIDATOR_TEST - INFO - Set tokenizer pad_token to eos_token and padding_side to left.
2025-04-30 16:33:15,252 - VALIDATOR_TEST - INFO - Tokenizer loaded successfully.
2025-04-30 16:33:15,255 - VALIDATOR_TEST - INFO - GPU Memory after load: Allocated=3.84 GB, Reserved=4.07 GB
2025-04-30 16:33:15,258 - VALIDATOR_TEST - INFO - Loading LLM model and tokenizer for: invalid/model-does-not-exist-at-all-hopefully
2025-04-30 16:33:15,262 - VALIDATOR_TEST - INFO - Loading model with 4-bit quantization...


Model type: <class 'transformers.models.mistral.modeling_mistral.MistralForCausalLM'>
Tokenizer type: <class 'transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast'>
Model loaded on device: cuda:0
-> Test Case 1 PASSED (Loaded in 60.45s)

--- Test Case 2: Caching ---
Calling load function again...
Second call returned objects in 0.0000s (should be much faster).
-> Test Case 2 PASSED (Caching appears functional)

--- Test Case 3: Invalid Model ID ---
Attempting to load invalid model: invalid/model-does-not-exist-at-all-hopefully


2025-04-30 16:33:15,449 - VALIDATOR_TEST - ERROR - Failed to load LLM model or tokenizer: invalid/model-does-not-exist-at-all-hopefully is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`
Traceback (most recent call last):
  File "f:\interview\acordao\acordao_validator\acordao\Lib\site-packages\huggingface_hub\utils\_http.py", line 409, in hf_raise_for_status
    response.raise_for_status()
    ~~~~~~~~~~~~~~~~~~~~~~~~~^^
  File "f:\interview\acordao\acordao_validator\acordao\Lib\site-packages\requests\models.py", line 1024, in raise_for_status
    raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 404 Client Error: Not Found for url: https://huggingface.co/invalid/model-does-not-exist-at-all-hopefully/resolve/main/config.json

The above ex

Caught expected RuntimeError: Failed to load LLM model or tokenizer: invalid/model-does-not-exist-at-all-hopefully is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`
-> Test Case 3 PASSED (Correctly raised error for invalid ID)

Result: ALL test_llm_loading tests PASSED
Resetting validator module's cached model/tokenizer instances after tests.


In [6]:
# Cell 4: Test _format_validation_prompt

print("\n" + "="*10 + " Running Test: test_prompt_formatting " + "="*10)

test_passed = False
if _format_validation_prompt is None:
    print("SKIPPING test_prompt_formatting: Function not imported.")
else:
    try:
        sample_claim = "Alegação de teste."
        sample_chunks = ["Contexto número um.", "Contexto número dois com\nquebra de linha."]

        # This is the expected formatting of the context *within* the user message
        expected_context_str = "Trecho 1:\nContexto número um.\n\nTrecho 2:\nContexto número dois com\nquebra de linha."

        prompt = _format_validation_prompt(sample_claim, sample_chunks)

        # Check 1: Basic structure for Mistral Instruct
        assert prompt.startswith("<s>[INST]"), "Prompt should start with <s>[INST]"
        assert prompt.endswith("[/INST]"), "Prompt should end with [/INST]"
        # Check it doesn't contain the old markers accidentally
        assert "<|user|>" not in prompt, "Old Phi-3 markers should not be present"
        assert "<|assistant|>" not in prompt, "Old Phi-3 markers should not be present"

        # Check 2: Claim is present within the user message part
        # Need to check between the [INST] tags
        user_message_part = prompt[len("<s>[INST]"): -len(" [/INST]")].strip() # Extract content
        assert f"**Alegação a ser validada:**\n{sample_claim}" in user_message_part, "Claim not found in user message"

        # Check 3: Context is present and formatted within the user message part
        assert f"**Trechos do Documento Original:**\n{expected_context_str}" in user_message_part, "Formatted context not found in user message"

        # Check 4: Instructions are present within the user message part
        assert "Resultado: [Correta/Incorreta]" in user_message_part, "Output format instructions missing"
        assert "Justificativa: [Explique brevemente" in user_message_part, "Output format instructions missing"

        print("Prompt Preview:\n" + "-"*20 + f"\n{prompt}\n" + "-"*20)
        test_passed = True
        print("-> Test PASSED")

    except AssertionError as e:
         print(f"-> Test FAILED: Assertion Error: {e}")
    except Exception as e:
         print(f"-> Test FAILED: Unexpected Error: {e}")


print("\n" + "="*10 + " End of Test: test_prompt_formatting " + "="*10)
if test_passed:
    print("Result: test_prompt_formatting PASSED")
else:
    print("Result: test_prompt_formatting FAILED")


Prompt Preview:
--------------------
<s>[INST] Você é um assistente especialista em analisar documentos jurídicos do TCU. Sua tarefa é avaliar se uma alegação feita em um resumo é verdadeira ou falsa, baseando-se *estritamente* nos trechos fornecidos do documento original.

**Instruções:**
1. Analise a 'Alegação' abaixo.
2. Verifique se a alegação pode ser confirmada ou refutada usando *apenas* as informações contidas nos 'Trechos do Documento Original'. Não use conhecimento externo.
3. Responda no seguinte formato EXATO:
   Resultado: [Correta/Incorreta]
   Justificativa: [Explique brevemente o motivo com base nos trechos, ou 'N/A' se for Correta]

**Trechos do Documento Original:**
Trecho 1:
Contexto número um.

Trecho 2:
Contexto número dois com
quebra de linha.

**Alegação a ser validada:**
Alegação de teste. [/INST]
--------------------
-> Test PASSED

Result: test_prompt_formatting PASSED


In [7]:
# Cell 5: Test validate_claim_with_llm (Integration Style)
# NOTE: This cell runs the ACTUAL LLM. It requires the model to be loaded
# (which might happen here for the first time if Cell 3 was skipped or failed)
# and requires a functioning GPU with sufficient VRAM.
# It will be SLOW the first time it loads the model.

print("\n" + "="*10 + " Running Test: test_llm_validation (Integration) " + "="*10)

test_passed = False
# --- Dummy Data (Simulating retriever output) ---
# Use the same dummy data as the validator's main block for consistency
sample_claim_valid = "O BNDES é uma estatal dependente da União"
sample_context_valid = {
    'ids': [['doc_id_1', 'doc_id_2']],
    'documents': [[
        "Trecho 1: O Banco Nacional de Desenvolvimento Econômico e Social (BNDES) é uma empresa pública federal...",
        "Trecho 2: Conforme análise do Ministério da Fazenda, o BNDES se enquadra como empresa estatal dependente, sujeita ao teto remuneratório."
    ]],
    'metadatas': [[{'source': 'doc.pdf', 'page': 1}, {'source': 'doc.pdf', 'page': 5}]],
    'distances': [[0.1, 0.2]]
}

sample_claim_invalid = "O BNDES opera apenas com recursos próprios."
sample_context_invalid = { # Same context for simplicity
    'ids': [['doc_id_1', 'doc_id_2']],
    'documents': [[
        "Trecho 1: O banco utiliza recursos públicos, como os do FAT e FMM, embora parte dos recursos também provenha de captações próprias.",
        "Trecho 2: A estrutura de financiamento do BNDES inclui fontes do Tesouro Nacional e fundos governamentais."
    ]],
    'metadatas': [[{'source': 'doc.pdf', 'page': 10}, {'source': 'doc.pdf', 'page': 11}]],
    'distances': [[0.15, 0.25]]
}


if validate_claim_with_llm is None or torch is None or not torch.cuda.is_available():
    print("SKIPPING test_llm_validation: Validator/Torch import failed or CUDA not available.")
else:
    all_subtests_passed = True
    validation_result_valid = None
    validation_result_invalid = None
    try:
        # --- Subtest 1: Claim expected to be Correct ---
        print("\n--- Subtest 1: Validating Correct Claim ---")
        print(f"Claim: {sample_claim_valid}")
        validation_result_valid = validate_claim_with_llm(sample_claim_valid, sample_context_valid)

        assert validation_result_valid is not None, "Validation failed, returned None"
        assert isinstance(validation_result_valid, dict), "Result should be a dict"
        assert "Resultado" in validation_result_valid, "Result dict missing 'Resultado'"
        assert "Justificativa" in validation_result_valid, "Result dict missing 'Justificativa'"
        print(f"Parsed Result 1: {validation_result_valid}")
        # We expect the LLM (if working well) to classify this as Correta based on context
        assert validation_result_valid["Resultado"] == "Correta", f"Expected 'Correta', got '{validation_result_valid['Resultado']}'"
        print("-> Subtest 1 PASSED (Structurally)")

    except AssertionError as e:
        print(f"-> Subtest 1 FAILED: Assertion Error: {e}")
        all_subtests_passed = False
    except Exception as e:
        print(f"-> Subtest 1 FAILED: Unexpected Error: {e}")
        all_subtests_passed = False


    try:
        # --- Subtest 2: Claim expected to be Incorrect ---
        print("\n--- Subtest 2: Validating Incorrect Claim ---")
        print(f"Claim: {sample_claim_invalid}")
        validation_result_invalid = validate_claim_with_llm(sample_claim_invalid, sample_context_invalid)

        assert validation_result_invalid is not None, "Validation failed, returned None"
        assert isinstance(validation_result_invalid, dict), "Result should be a dict"
        assert "Resultado" in validation_result_invalid, "Result dict missing 'Resultado'"
        assert "Justificativa" in validation_result_invalid, "Result dict missing 'Justificativa'"
        print(f"Parsed Result 2: {validation_result_invalid}")
        # We expect the LLM to classify this as Incorreta
        assert validation_result_invalid["Resultado"] == "Incorreta", f"Expected 'Incorreta', got '{validation_result_invalid['Resultado']}'"
        # Justification should not be empty or N/A for incorrect claims
        assert validation_result_invalid["Justificativa"] != "N/A", "Justificativa should not be 'N/A' for incorrect claim"
        assert len(validation_result_invalid["Justificativa"]) > 5, "Justificativa seems too short"
        print("-> Subtest 2 PASSED (Structurally)")

    except AssertionError as e:
        print(f"-> Subtest 2 FAILED: Assertion Error: {e}")
        all_subtests_passed = False
    except Exception as e:
        print(f"-> Subtest 2 FAILED: Unexpected Error: {e}")
        all_subtests_passed = False

    # Set overall test pass status
    test_passed = all_subtests_passed


print("\n" + "="*10 + " End of Test: test_llm_validation (Integration) " + "="*10)
if test_passed:
    print("Result: test_llm_validation PASSED")
else:
    print("Result: test_llm_validation FAILED")


2025-04-30 16:34:59,053 - VALIDATOR_TEST - INFO - Loading LLM model and tokenizer for: mistralai/Mistral-7B-Instruct-v0.2
2025-04-30 16:34:59,055 - VALIDATOR_TEST - INFO - Loading model with 4-bit quantization...




--- Subtest 1: Validating Correct Claim ---
Claim: O BNDES é uma estatal dependente da União


2025-04-30 16:34:59,426 - VALIDATOR_TEST - INFO - Based on the current allocation process, no modules could be assigned to the following devices due to insufficient memory:
  - 0: 327680000.0 bytes required
These minimum requirements are specific to this allocation attempt and may vary. Consider increasing the available memory for these devices to at least the specified minimum, or adjusting the model config.
2025-04-30 16:34:59,426 - VALIDATOR_TEST - ERROR - Failed to load LLM model or tokenizer: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. 
Traceback (most recent call last):
  File "f:\inte

-> Subtest 1 FAILED: Assertion Error: Validation failed, returned None

--- Subtest 2: Validating Incorrect Claim ---
Claim: O BNDES opera apenas com recursos próprios.


2025-04-30 16:34:59,876 - VALIDATOR_TEST - INFO - Based on the current allocation process, no modules could be assigned to the following devices due to insufficient memory:
  - 0: 327680000.0 bytes required
These minimum requirements are specific to this allocation attempt and may vary. Consider increasing the available memory for these devices to at least the specified minimum, or adjusting the model config.
2025-04-30 16:34:59,877 - VALIDATOR_TEST - ERROR - Failed to load LLM model or tokenizer: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. 
Traceback (most recent call last):
  File "f:\inte

-> Subtest 2 FAILED: Assertion Error: Validation failed, returned None

Result: test_llm_validation FAILED


In [8]:
# Cell 6: Test validate_claim_with_llm Parsing Logic (Mocked)
# This tests the parsing part of the validator without running the actual LLM.

print("\n" + "="*10 + " Running Test: test_llm_parsing (Mocked) " + "="*10)

# Dummy claim/context needed for function call structure
dummy_claim = "Test claim for parsing."
dummy_context = { 'documents': [['Dummy context document.']] }

# --- Mocking Setup ---
# We need to mock the behavior *within* validate_claim_with_llm
# Specifically: load_llm_model_and_tokenizer and model.generate

# Mock model and tokenizer objects
mock_model = MagicMock()
mock_tokenizer = MagicMock()
mock_tokenizer.decode = MagicMock() # Mock the decode method
mock_tokenizer.eos_token_id = 123 # Example ID
mock_tokenizer.pad_token_id = 123

# Mock the input tokenization result (only shape is usually needed for slicing)
mock_inputs = {'input_ids': torch.tensor([[1, 2, 3]])} # Dummy tensor
mock_tokenizer.return_value = mock_inputs # Mock the tokenizer call

# Mock the output tensor from model.generate
# Needs to have the input shape + generated shape
# Shape: (batch_size, sequence_length)
# Let's simulate generating 10 tokens after the 3 input tokens
mock_output_ids = torch.tensor([[1, 2, 3, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]])
mock_model.generate.return_value = mock_output_ids

test_passed = False
if validate_claim_with_llm is None:
     print("SKIPPING test_llm_parsing: Validator function not imported.")
else:
    all_mock_tests_passed = True
    # --- Subtest 1: Mock Correct Response ---
    print("\n--- Subtest 1: Mocking Correct Response ---")
    # Define what tokenizer.decode should return for the generated part
    mock_response_correct = "Resultado: Correta\nJustificativa: N/A"
    mock_tokenizer.decode.return_value = mock_response_correct
    try:
        with patch('src.validator.load_llm_model_and_tokenizer', return_value=(mock_model, mock_tokenizer)):
             # The patch replaces the function temporarily within this block
             result_correct = validate_claim_with_llm(dummy_claim, dummy_context)

        assert result_correct == {"Resultado": "Correta", "Justificativa": "N/A"}, f"Parsing failed for Correct case, got {result_correct}"
        print(f"Parsed Result: {result_correct}")
        print("-> Subtest 1 PASSED")
    except Exception as e:
         print(f"-> Subtest 1 FAILED: {e}")
         all_mock_tests_passed = False


    # --- Subtest 2: Mock Incorrect Response ---
    print("\n--- Subtest 2: Mocking Incorrect Response ---")
    mock_response_incorrect = "Resultado: Incorreta\nJustificativa: O trecho 1 contradiz a alegação."
    mock_tokenizer.decode.return_value = mock_response_incorrect
    try:
        with patch('src.validator.load_llm_model_and_tokenizer', return_value=(mock_model, mock_tokenizer)):
             result_incorrect = validate_claim_with_llm(dummy_claim, dummy_context)

        expected_incorrect = {"Resultado": "Incorreta", "Justificativa": "O trecho 1 contradiz a alegação."}
        assert result_incorrect == expected_incorrect, f"Parsing failed for Incorrect case, got {result_incorrect}"
        print(f"Parsed Result: {result_incorrect}")
        print("-> Subtest 2 PASSED")
    except Exception as e:
         print(f"-> Subtest 2 FAILED: {e}")
         all_mock_tests_passed = False


    # --- Subtest 3: Mock Malformed Response ---
    print("\n--- Subtest 3: Mocking Malformed Response ---")
    mock_response_malformed = "Uh oh, I forgot the format.\nResultado : maybe correct\nJustif: idk"
    mock_tokenizer.decode.return_value = mock_response_malformed
    try:
        with patch('src.validator.load_llm_model_and_tokenizer', return_value=(mock_model, mock_tokenizer)):
             result_malformed = validate_claim_with_llm(dummy_claim, dummy_context)

        # Expect the default error state
        expected_malformed = {"Resultado": "Erro", "Justificativa": "Falha ao parsear resposta do LLM."}
        assert result_malformed == expected_malformed, f"Parsing failed for Malformed case, got {result_malformed}"
        print(f"Parsed Result: {result_malformed}")
        print("-> Subtest 3 PASSED")
    except Exception as e:
         print(f"-> Subtest 3 FAILED: {e}")
         all_mock_tests_passed = False

    test_passed = all_mock_tests_passed


print("\n" + "="*10 + " End of Test: test_llm_parsing (Mocked) " + "="*10)
if test_passed:
    print("Result: test_llm_parsing PASSED")
else:
    print("Result: test_llm_parsing FAILED")

2025-04-30 16:35:36,289 - VALIDATOR_TEST - ERROR - An unexpected error occurred during LLM validation: 'dict' object has no attribute 'to'
Traceback (most recent call last):
  File "f:\interview\acordao\acordao_validator\src\validator.py", line 181, in validate_claim_with_llm
    inputs = tokenizer(prompt, return_tensors="pt", padding=False, truncation=False).to(model.device)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'dict' object has no attribute 'to'
2025-04-30 16:35:36,361 - VALIDATOR_TEST - ERROR - An unexpected error occurred during LLM validation: 'dict' object has no attribute 'to'
Traceback (most recent call last):
  File "f:\interview\acordao\acordao_validator\src\validator.py", line 181, in validate_claim_with_llm
    inputs = tokenizer(prompt, return_tensors="pt", padding=False, truncation=False).to(model.device)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError



--- Subtest 1: Mocking Correct Response ---
-> Subtest 1 FAILED: Parsing failed for Correct case, got None

--- Subtest 2: Mocking Incorrect Response ---
-> Subtest 2 FAILED: Parsing failed for Incorrect case, got None

--- Subtest 3: Mocking Malformed Response ---
-> Subtest 3 FAILED: Parsing failed for Malformed case, got None

Result: test_llm_parsing FAILED
