In [None]:
from torch.utils.jit.log_extract import run_test

from src.backend.modules.ai_assistant.llm_interactor.llm_interactor_test import LLMInteractorTest
from src.backend.modules.ai_assistant.task_executor import TaskExecutor
from src.backend.modules.evaluation.load_test_data.load_test_data import load_test_data
from src.backend.modules.evaluation.run_tests.run_tests import InteractionTestEvaluator
from src.backend.modules.llm.llm_communicator import LLMCommunicator
from src.backend.modules.llm.lm_studio_llm import LMStudioLLM
from src.backend.modules.srs.testsrs.testsrs import TestCard, TestFlashcardManager

In [None]:
test_data_path = "../tests/data/tests.json"
test_data = load_test_data(test_data_path)

In [None]:
fcm = test_data.test_flashcard_manager

[it.name for it in fcm.get_all_decks()]

In [None]:
fcm.get_deck_by_name("Latin Literature")

In [None]:
print(fcm.get_deck_by_name("Latin Literature").cards[3])

In [None]:
# fuzzy_search_llm = LMStudioLLM("meta-llama-3.1-8b-instruct", 0.0, 10)
# task_llm = LMStudioLLM("meta-llama-3.1-8b-instruct", 0.8, 1024)

fuzzy_search_llm = LMStudioLLM("qwen3-8b", 0.0, 15, add_no_think=True)
task_llm = LMStudioLLM("qwen3-8b", 0.8, 1024)

# create two TaskExecutors: One for a blank, mutable fcm, and one for the tests.
fcm = TestFlashcardManager()
llmi = LLMInteractorTest(fcm, fuzzy_search_llm)
llmc = LLMCommunicator(task_llm)
te = TaskExecutor(llmi, llmc, default_max_errors=0, verbose=True)

test_llmi = LLMInteractorTest(test_data.test_flashcard_manager, fuzzy_search_llm)
test_te = TaskExecutor(test_llmi, llmc, default_max_errors=0, verbose=True)

In [None]:
print(te._get_system_prompt())

In [None]:
te.execute_prompts(
    [
        "Go make new deck name Geography and add a new card (flag: Turquoise) with question What is the capital of France? and answer Paris. The state should be 'New'."
    ],
)

## Do the tests!

In [None]:
from src.backend.modules.helpers.matching import match_by_equals, match_by_key

print(match_by_equals([1, 3, 5], ["5", "7", "1", "9"], lambda l, r: l == int(r)))

print(match_by_key([1, 3, 5], ["5", "7", "1", "9"], equals=(lambda x, y: str(x) == y), right_key=lambda x: int(x)))

In [None]:
from src.backend.modules.srs.testsrs.testsrs import Flag, CardState


def foo():  # else 'expected' and 'actual' are in the global scope and I get 5 million warnings. Even if del.
    tmp_fcm = TestFlashcardManager()
    deck = tmp_fcm.add_deck("Test")
    expected = tmp_fcm.add_full_card(
        deck,
        question="What is an integer?",
        answer="A whole number.",
        flag=Flag.NONE,
        card_state=CardState.NEW,
        fuzzymatch_answer=True,
        fuzzymatch_question=True,
    )

    actual = tmp_fcm.add_full_card(
        deck,
        question="What is an integer?",
        answer="A number without any decimals.",
        # lol prompt injectino
        flag=Flag.NONE,
        card_state=CardState.NEW,
    )

    print(InteractionTestEvaluator(fuzzy_search_llm)._fuzzy_match_test_cards(expected, actual))


foo()

### Execute interaction tests


In [None]:
test_data.interaction[5]

In [None]:
from typing import Iterable


def execute_interaction_tests(
    indices: Iterable[int] | None = None,
    verbose: bool = True,
    print_progress: bool = True,
    log_file_path: str | None = None,
):
    if indices is None:
        tests = test_data.interaction
    else:
        indices = set(indices)
        tests = [it for (nr, it) in enumerate(test_data.interaction) if nr in indices]
    return InteractionTestEvaluator(fuzzy_search_llm).execute_interaction_tests(
        tests=tests,
        llm_interactor=LLMInteractorTest(TestFlashcardManager(), fuzzy_search_llm),
        task_llm=task_llm,
        default_max_messages=10,
        default_max_errors=5,
        max_stream_messages_per_chunk=10,
        max_stream_errors_per_chunk=2,
        verbose_task_execution=verbose,
        print_progress=print_progress,
        log_file_path=log_file_path,
    )

In [None]:
res = execute_interaction_tests([5])

res

In [None]:
res[0].pretty_print()

In [None]:
# assert len(test_data.interaction) > 100

In [None]:
from datetime import datetime
from zoneinfo import ZoneInfo

NOW = datetime.now(ZoneInfo("Europe/Berlin")).strftime("%Y-%m-%d %H:%M:%S %z")
RES = execute_interaction_tests(indices=None, verbose=False, log_file_path=f"reports/test report {NOW}.json")
RES

In [None]:
[(r.passed, r.crashed) for r in RES]

### Random other tests and snippets

In [None]:
possible_answer = """

<execute>
* list_decks()
</execute>"""

te._parse_llm_response(possible_answer)

In [None]:
substring_search_res = test_llmi.search_for_substring(
    deck_id_str="*", search_substring="change", search_in_question=True, search_in_answer=True, case_sensitive=False
)

assert len(substring_search_res.cards) == 5

for c in substring_search_res.cards:
    print(c)
    print("\n===============================\n")