In [76]:
from scripts import setup_environment

setup_environment()

In [None]:
import textwrap

from langchain.prompts.chat import (
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
)

# System message templates (priming)
system_templates = {
    "default": "You are a helpful assistant.",
    "step_by_step": "You are a meticulous problem-solver.",
    "creative": "You excel at lateral thinking. Treat this as a riddle.",
    "elimination": "Eliminate wrong options internally.",
    "metaphor": "Interpret keywords metaphorically.",
    "confidence": "Score options internally.",
    "perspective_shift": "Analyze through multiple perspectives silently.",
    "common_sense": "Combine logic and creativity.",
    "assumption_challenge": "Challenge hidden assumptions internally.",
    "pattern_matching": "Find patterns silently.",
    "intuitive": "Critique your intuition internally.",
}


def get_system_prompt_template(template_name: str):
    system_prompt = system_templates[template_name]
    system_prompt = textwrap.dedent(system_prompt)

    system_prompt_template = SystemMessagePromptTemplate.from_template(
        system_prompt, id=template_name
    )
    return system_prompt_template


def get_humand_prompt_template():
    prompt = """
    Please pick the best choice for the brain teaser. Each brain teaser has only one possible solution including the choice none of above, answer should only provide the choice:

    Question: {question}
    Choice:
    {choices}
    Answer:
    """

    prompt = textwrap.dedent(prompt)

    prompt_template = HumanMessagePromptTemplate.from_template(prompt)
    return prompt_template

In [78]:
from scripts.dataset import BrainteaserDataset

dataset = BrainteaserDataset("data")

In [79]:
import string

from scripts.dataset import RiddleQuestion


def args_generator(riddle_question: RiddleQuestion):
    template_args = {
        "question": riddle_question.question,
        "choices": "\n".join(
            [
                f"({string.ascii_uppercase[j]}) {choice}"
                for j, choice in enumerate(riddle_question.choice_list)
            ]
        ),
        "answer": string.ascii_uppercase[riddle_question.label],
    }

    return template_args

In [80]:
# We now create the few shot exampel but following the best practices from https://python.langchain.com/docs/how_to/few_shot_examples_chat/
# Thus we do not provide the examples in the initial client prompt but as a message history of the user asking and the system answering

from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate


def get_few_shot_chat_template(
    dataset: list[RiddleQuestion],
    number_of_shots: int = 4,
    system_template: str = "default",
):
    riddles_as_examples = dataset[:number_of_shots]
    riddles_to_solve = dataset[number_of_shots:]
    example_prompt = ChatPromptTemplate.from_messages(
        [
            get_humand_prompt_template(),
            ("ai", "{answer}"),
        ]
    )
    few_shot_prompt_naive = FewShotChatMessagePromptTemplate(
        example_prompt=example_prompt,
        examples=[args_generator(example) for example in riddles_as_examples],
    )

    chat_prompt_template = ChatPromptTemplate.from_messages(
        [
            get_system_prompt_template(system_template),
            few_shot_prompt_naive,
            get_humand_prompt_template(),
        ]
    )
    return (chat_prompt_template, riddles_to_solve)

In [81]:
# from langchain_chroma import Chroma
# from langchain_ollama import OllamaEmbeddings
# from langchain_core.example_selectors import SemanticSimilarityExampleSelector

# riddles = dataset.sp
# examples_full = [args_generator(riddle) for riddle in riddles]
# example_selector = SemanticSimilarityExampleSelector.from_examples(
#     examples,
#     OllamaEmbeddings(
#         model="bge-m3"
#     ),  # bge-m3 excel at handling context-rich queries due to higher embedding dimensions, as fallback we could also use models like  nomic-embed-text for short semantic queries
#     Chroma,
#     k=4,
# )


In [None]:
from scripts.lmm import OllamaModel
from scripts.executor import Executor

executor = Executor(
    models=[
        # Llama3.1
        OllamaModel("llama3.1:8b"),
        # Llama3.2
        OllamaModel("llama3.2:1b"),
        OllamaModel("llama3.2:3b"),
        # Phi3.5
        OllamaModel("phi3.5:3.8b"),
        # Phi4
        OllamaModel("phi4:14b"),
        # Qwen2.5
        OllamaModel("qwen2.5:0.5b"),
        OllamaModel("qwen2.5:1.5b"),
        OllamaModel("qwen2.5:3b"),
        OllamaModel("qwen2.5:7b"),
        OllamaModel("qwen2.5:14b"),
        OllamaModel("qwen2.5:32b"),
        # Gemma2
        OllamaModel("gemma2:2b"),
        OllamaModel("gemma2:9b"),
        OllamaModel("gemma2:27b"),
        # Mistral Nemo
        OllamaModel("mistral-nemo:12b"),
    ]
)

2025-02-15 11:07:06,030 - INFO - Initialized executor with 18 models.


In [83]:
n_shots = 4
chat_prompt_template, riddles_for_eval = get_few_shot_chat_template(dataset.sp, n_shots)

sp_results = await executor.aexecute(
    riddles_for_eval,
    chat_prompt_template,
    args_generator,
    dump_to_pickle=True,
    create_checkpoints=True,
    result_file_name=f"sp_results_few_shot_n_{n_shots}",
)

2025-02-15 11:07:06,047 - INFO - Starting asynchronous execution
2025-02-15 11:07:06,047 - INFO - Split dataset of 623 items into 125 batches of size 5
2025-02-15 11:07:06,048 - INFO - Processing llama3.1:8b
2025-02-15 11:07:06,048 - INFO - Pulling Ollama model: llama3.1:8b


llama3.1:8b:   0%|          | 0/623 [00:00<?, ?it/s]

2025-02-15 11:07:44,743 - INFO - Creating checkpoint: results/checkpoints/sp_results_few_shot_n_4/llama3.1:8b_sp_results_few_shot_n_4.pkl
2025-02-15 11:07:45,286 - INFO - Cleaning up llama3.1:8b
2025-02-15 11:07:45,286 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-15 11:07:45,286 - INFO - Processing llama3.2:1b
2025-02-15 11:07:45,287 - INFO - Pulling Ollama model: llama3.2:1b


llama3.2:1b:   0%|          | 0/623 [00:00<?, ?it/s]

2025-02-15 11:08:10,417 - INFO - Creating checkpoint: results/checkpoints/sp_results_few_shot_n_4/llama3.2:1b_sp_results_few_shot_n_4.pkl
2025-02-15 11:08:10,857 - INFO - Cleaning up llama3.2:1b
2025-02-15 11:08:10,858 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-15 11:08:10,858 - INFO - Processing llama3.2:3b
2025-02-15 11:08:10,858 - INFO - Pulling Ollama model: llama3.2:3b


llama3.2:3b:   0%|          | 0/623 [00:00<?, ?it/s]

2025-02-15 11:08:53,357 - INFO - Creating checkpoint: results/checkpoints/sp_results_few_shot_n_4/llama3.2:3b_sp_results_few_shot_n_4.pkl
2025-02-15 11:08:53,688 - INFO - Cleaning up llama3.2:3b
2025-02-15 11:08:53,689 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-15 11:08:53,689 - INFO - Processing phi3.5:3.8b
2025-02-15 11:08:53,689 - INFO - Pulling Ollama model: phi3.5:3.8b


phi3.5:3.8b:   0%|          | 0/623 [00:00<?, ?it/s]

2025-02-15 11:15:14,994 - INFO - Creating checkpoint: results/checkpoints/sp_results_few_shot_n_4/phi3.5:3.8b_sp_results_few_shot_n_4.pkl
2025-02-15 11:15:15,323 - INFO - Cleaning up phi3.5:3.8b
2025-02-15 11:15:15,323 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-15 11:15:15,324 - INFO - Processing phi4:14b
2025-02-15 11:15:15,324 - INFO - Pulling Ollama model: phi4:14b


phi4:14b:   0%|          | 0/623 [00:00<?, ?it/s]

2025-02-15 11:17:29,425 - INFO - Creating checkpoint: results/checkpoints/sp_results_few_shot_n_4/phi4:14b_sp_results_few_shot_n_4.pkl
2025-02-15 11:17:29,738 - INFO - Cleaning up phi4:14b
2025-02-15 11:17:29,738 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-15 11:17:29,739 - INFO - Processing qwen2.5:0.5b
2025-02-15 11:17:29,739 - INFO - Pulling Ollama model: qwen2.5:0.5b


qwen2.5:0.5b:   0%|          | 0/623 [00:00<?, ?it/s]

2025-02-15 11:19:10,844 - INFO - Creating checkpoint: results/checkpoints/sp_results_few_shot_n_4/qwen2.5:0.5b_sp_results_few_shot_n_4.pkl
2025-02-15 11:19:11,142 - INFO - Cleaning up qwen2.5:0.5b
2025-02-15 11:19:11,143 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-15 11:19:11,143 - INFO - Processing qwen2.5:1.5b
2025-02-15 11:19:11,144 - INFO - Pulling Ollama model: qwen2.5:1.5b


qwen2.5:1.5b:   0%|          | 0/623 [00:00<?, ?it/s]

2025-02-15 11:20:51,475 - INFO - Creating checkpoint: results/checkpoints/sp_results_few_shot_n_4/qwen2.5:1.5b_sp_results_few_shot_n_4.pkl
2025-02-15 11:20:52,007 - INFO - Cleaning up qwen2.5:1.5b
2025-02-15 11:20:52,007 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-15 11:20:52,008 - INFO - Processing qwen2.5:3b
2025-02-15 11:20:52,008 - INFO - Pulling Ollama model: qwen2.5:3b


qwen2.5:3b:   0%|          | 0/623 [00:00<?, ?it/s]

2025-02-15 11:22:37,350 - INFO - Creating checkpoint: results/checkpoints/sp_results_few_shot_n_4/qwen2.5:3b_sp_results_few_shot_n_4.pkl
2025-02-15 11:22:37,947 - INFO - Cleaning up qwen2.5:3b
2025-02-15 11:22:37,947 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-15 11:22:37,947 - INFO - Processing qwen2.5:7b
2025-02-15 11:22:37,947 - INFO - Pulling Ollama model: qwen2.5:7b


qwen2.5:7b:   0%|          | 0/623 [00:00<?, ?it/s]

2025-02-15 11:24:39,800 - INFO - Creating checkpoint: results/checkpoints/sp_results_few_shot_n_4/qwen2.5:7b_sp_results_few_shot_n_4.pkl
2025-02-15 11:24:40,149 - INFO - Cleaning up qwen2.5:7b
2025-02-15 11:24:40,150 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-15 11:24:40,150 - INFO - Processing qwen2.5:14b
2025-02-15 11:24:40,150 - INFO - Pulling Ollama model: qwen2.5:14b


qwen2.5:14b:   0%|          | 0/623 [00:00<?, ?it/s]

2025-02-15 11:27:09,605 - INFO - Creating checkpoint: results/checkpoints/sp_results_few_shot_n_4/qwen2.5:14b_sp_results_few_shot_n_4.pkl
2025-02-15 11:27:10,177 - INFO - Cleaning up qwen2.5:14b
2025-02-15 11:27:10,178 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-15 11:27:10,178 - INFO - Processing qwen2.5:32b
2025-02-15 11:27:10,178 - INFO - Pulling Ollama model: qwen2.5:32b


qwen2.5:32b:   0%|          | 0/623 [00:00<?, ?it/s]

2025-02-15 11:30:38,513 - INFO - Creating checkpoint: results/checkpoints/sp_results_few_shot_n_4/qwen2.5:32b_sp_results_few_shot_n_4.pkl
2025-02-15 11:30:39,105 - INFO - Cleaning up qwen2.5:32b
2025-02-15 11:30:39,105 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-15 11:30:39,105 - INFO - Processing gemma2:2b
2025-02-15 11:30:39,105 - INFO - Pulling Ollama model: gemma2:2b


gemma2:2b:   0%|          | 0/623 [00:00<?, ?it/s]

2025-02-15 11:31:34,618 - INFO - Creating checkpoint: results/checkpoints/sp_results_few_shot_n_4/gemma2:2b_sp_results_few_shot_n_4.pkl
2025-02-15 11:31:34,918 - INFO - Cleaning up gemma2:2b
2025-02-15 11:31:34,918 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-15 11:31:34,919 - INFO - Processing gemma2:9b
2025-02-15 11:31:34,919 - INFO - Pulling Ollama model: gemma2:9b


gemma2:9b:   0%|          | 0/623 [00:00<?, ?it/s]

2025-02-15 11:33:54,524 - INFO - Creating checkpoint: results/checkpoints/sp_results_few_shot_n_4/gemma2:9b_sp_results_few_shot_n_4.pkl
2025-02-15 11:33:55,159 - INFO - Cleaning up gemma2:9b
2025-02-15 11:33:55,160 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-15 11:33:55,160 - INFO - Processing gemma2:27b
2025-02-15 11:33:55,160 - INFO - Pulling Ollama model: gemma2:27b


gemma2:27b:   0%|          | 0/623 [00:00<?, ?it/s]

2025-02-15 11:36:47,414 - INFO - Creating checkpoint: results/checkpoints/sp_results_few_shot_n_4/gemma2:27b_sp_results_few_shot_n_4.pkl
2025-02-15 11:36:47,727 - INFO - Cleaning up gemma2:27b
2025-02-15 11:36:47,727 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-15 11:36:47,728 - INFO - Processing mistral-nemo:12b
2025-02-15 11:36:47,728 - INFO - Pulling Ollama model: mistral-nemo:12b


mistral-nemo:12b:   0%|          | 0/623 [00:00<?, ?it/s]

2025-02-15 11:38:21,713 - INFO - Creating checkpoint: results/checkpoints/sp_results_few_shot_n_4/mistral-nemo:12b_sp_results_few_shot_n_4.pkl
2025-02-15 11:38:22,340 - INFO - Cleaning up mistral-nemo:12b
2025-02-15 11:38:22,341 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-15 11:38:22,341 - INFO - Processing deepseek-r1:1.5b
2025-02-15 11:38:22,341 - INFO - Pulling Ollama model: deepseek-r1:1.5b


deepseek-r1:1.5b:   0%|          | 0/623 [00:00<?, ?it/s]

2025-02-15 11:42:45,902 - INFO - Cleaning up deepseek-r1:1.5b
2025-02-15 11:42:45,903 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!


CancelledError: 

In [None]:
chat_prompt_template, riddles_for_eval = get_few_shot_chat_template(dataset.wp, n_shots)

wp_results = await executor.aexecute(
    riddles_for_eval,
    chat_prompt_template,
    args_generator,
    dump_to_pickle=True,
    create_checkpoints=True,
    result_file_name=f"wp_results_few_shot_n_{n_shots}",
)

2025-02-13 18:16:37,538 - INFO - Starting execution
2025-02-13 18:16:37,538 - INFO - Processing llama3.1:8b
2025-02-13 18:16:37,539 - INFO - Pulling Ollama model: llama3.1:8b


llama3.1:8b:   0%|          | 0/492 [00:00<?, ?it/s]

2025-02-13 18:18:15,731 - INFO - Creating checkpoint: results/checkpoints/wp_results_naive/llama3.1:8b_wp_results_naive.pkl
2025-02-13 18:18:15,922 - INFO - Cleaning up llama3.1:8b
2025-02-13 18:18:15,923 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-13 18:18:15,923 - INFO - Processing llama3.2:1b
2025-02-13 18:18:15,923 - INFO - Pulling Ollama model: llama3.2:1b


llama3.2:1b:   0%|          | 0/492 [00:00<?, ?it/s]

2025-02-13 18:19:55,016 - INFO - Creating checkpoint: results/checkpoints/wp_results_naive/llama3.2:1b_wp_results_naive.pkl
2025-02-13 18:19:55,127 - INFO - Cleaning up llama3.2:1b
2025-02-13 18:19:55,128 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-13 18:19:55,128 - INFO - Processing llama3.2:3b
2025-02-13 18:19:55,128 - INFO - Pulling Ollama model: llama3.2:3b


llama3.2:3b:   0%|          | 0/492 [00:00<?, ?it/s]

2025-02-13 18:21:48,487 - INFO - Creating checkpoint: results/checkpoints/wp_results_naive/llama3.2:3b_wp_results_naive.pkl
2025-02-13 18:21:48,666 - INFO - Cleaning up llama3.2:3b
2025-02-13 18:21:48,666 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-13 18:21:48,667 - INFO - Processing phi3.5:3.8b
2025-02-13 18:21:48,667 - INFO - Pulling Ollama model: phi3.5:3.8b


phi3.5:3.8b:   0%|          | 0/492 [00:00<?, ?it/s]

2025-02-13 18:36:22,407 - INFO - Creating checkpoint: results/checkpoints/wp_results_naive/phi3.5:3.8b_wp_results_naive.pkl
2025-02-13 18:36:22,516 - INFO - Cleaning up phi3.5:3.8b
2025-02-13 18:36:22,516 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-13 18:36:22,517 - INFO - Processing phi4:14b
2025-02-13 18:36:22,517 - INFO - Pulling Ollama model: phi4:14b


phi4:14b:   0%|          | 0/492 [00:00<?, ?it/s]

2025-02-13 18:51:51,261 - INFO - Creating checkpoint: results/checkpoints/wp_results_naive/phi4:14b_wp_results_naive.pkl
2025-02-13 18:51:51,364 - INFO - Cleaning up phi4:14b
2025-02-13 18:51:51,365 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-13 18:51:51,365 - INFO - Processing qwen2.5:0.5b
2025-02-13 18:51:51,365 - INFO - Pulling Ollama model: qwen2.5:0.5b


qwen2.5:0.5b:   0%|          | 0/492 [00:00<?, ?it/s]

2025-02-13 18:54:12,117 - INFO - Creating checkpoint: results/checkpoints/wp_results_naive/qwen2.5:0.5b_wp_results_naive.pkl
2025-02-13 18:54:12,227 - INFO - Cleaning up qwen2.5:0.5b
2025-02-13 18:54:12,228 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-13 18:54:12,228 - INFO - Processing qwen2.5:1.5b
2025-02-13 18:54:12,228 - INFO - Pulling Ollama model: qwen2.5:1.5b


qwen2.5:1.5b:   0%|          | 0/492 [00:00<?, ?it/s]

2025-02-13 18:56:07,012 - INFO - Creating checkpoint: results/checkpoints/wp_results_naive/qwen2.5:1.5b_wp_results_naive.pkl
2025-02-13 18:56:07,211 - INFO - Cleaning up qwen2.5:1.5b
2025-02-13 18:56:07,211 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-13 18:56:07,212 - INFO - Processing qwen2.5:3b
2025-02-13 18:56:07,212 - INFO - Pulling Ollama model: qwen2.5:3b


qwen2.5:3b:   0%|          | 0/492 [00:00<?, ?it/s]

2025-02-13 18:58:28,646 - INFO - Creating checkpoint: results/checkpoints/wp_results_naive/qwen2.5:3b_wp_results_naive.pkl
2025-02-13 18:58:28,757 - INFO - Cleaning up qwen2.5:3b
2025-02-13 18:58:28,758 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-13 18:58:28,759 - INFO - Processing qwen2.5:7b
2025-02-13 18:58:28,759 - INFO - Pulling Ollama model: qwen2.5:7b


qwen2.5:7b:   0%|          | 0/492 [00:00<?, ?it/s]

2025-02-13 19:02:40,346 - INFO - Creating checkpoint: results/checkpoints/wp_results_naive/qwen2.5:7b_wp_results_naive.pkl
2025-02-13 19:02:40,451 - INFO - Cleaning up qwen2.5:7b
2025-02-13 19:02:40,452 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-13 19:02:40,452 - INFO - Processing qwen2.5:14b
2025-02-13 19:02:40,452 - INFO - Pulling Ollama model: qwen2.5:14b


qwen2.5:14b:   0%|          | 0/492 [00:00<?, ?it/s]

2025-02-13 19:10:21,899 - INFO - Creating checkpoint: results/checkpoints/wp_results_naive/qwen2.5:14b_wp_results_naive.pkl
2025-02-13 19:10:22,006 - INFO - Cleaning up qwen2.5:14b
2025-02-13 19:10:22,006 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-13 19:10:22,007 - INFO - Processing qwen2.5:32b
2025-02-13 19:10:22,007 - INFO - Pulling Ollama model: qwen2.5:32b
2025-02-13 19:11:58,642 - ERROR - Error pulling Ollama model: write /root/.ollama/models/blobs/sha256-eabc98a9bcbfce7fd70f3e07de599f8fda98120fefed5881934161ede8bd1a41-partial: no space left on device (status code: 500)
2025-02-13 19:11:58,643 - INFO - Deleting all ollama models to free up space


qwen2.5:32b:   0%|          | 0/492 [00:00<?, ?it/s]

2025-02-13 19:28:58,750 - INFO - Creating checkpoint: results/checkpoints/wp_results_naive/qwen2.5:32b_wp_results_naive.pkl
2025-02-13 19:28:58,859 - INFO - Cleaning up qwen2.5:32b
2025-02-13 19:28:58,859 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-13 19:28:58,860 - INFO - Processing gemma2:2b
2025-02-13 19:28:58,860 - INFO - Pulling Ollama model: gemma2:2b


gemma2:2b:   0%|          | 0/492 [00:00<?, ?it/s]

2025-02-13 19:31:16,516 - INFO - Creating checkpoint: results/checkpoints/wp_results_naive/gemma2:2b_wp_results_naive.pkl
2025-02-13 19:31:16,739 - INFO - Cleaning up gemma2:2b
2025-02-13 19:31:16,739 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-13 19:31:16,739 - INFO - Processing gemma2:9b
2025-02-13 19:31:16,740 - INFO - Pulling Ollama model: gemma2:9b


gemma2:9b:   0%|          | 0/492 [00:00<?, ?it/s]

2025-02-13 19:35:37,128 - INFO - Creating checkpoint: results/checkpoints/wp_results_naive/gemma2:9b_wp_results_naive.pkl
2025-02-13 19:35:37,231 - INFO - Cleaning up gemma2:9b
2025-02-13 19:35:37,231 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-13 19:35:37,231 - INFO - Processing gemma2:27b
2025-02-13 19:35:37,231 - INFO - Pulling Ollama model: gemma2:27b


gemma2:27b:   0%|          | 0/492 [00:00<?, ?it/s]

2025-02-13 19:43:17,563 - INFO - Creating checkpoint: results/checkpoints/wp_results_naive/gemma2:27b_wp_results_naive.pkl
2025-02-13 19:43:17,664 - INFO - Cleaning up gemma2:27b
2025-02-13 19:43:17,665 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-13 19:43:17,665 - INFO - Processing mistral-nemo:12b
2025-02-13 19:43:17,665 - INFO - Pulling Ollama model: mistral-nemo:12b


mistral-nemo:12b:   0%|          | 0/492 [00:00<?, ?it/s]

2025-02-13 19:50:35,816 - INFO - Creating checkpoint: results/checkpoints/wp_results_naive/mistral-nemo:12b_wp_results_naive.pkl
2025-02-13 19:50:36,042 - INFO - Cleaning up mistral-nemo:12b
2025-02-13 19:50:36,042 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-13 19:50:36,043 - INFO - Execution complete
2025-02-13 19:50:36,043 - INFO - Dumping results to results/wp_results_naive.pkl
