In [1]:
from scripts import setup_environment

setup_environment()

In [2]:
import textwrap

from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
)

# System message templates (priming)
system_templates = {
    "default": "You are a helpful assistant.",
    "step_by_step": "You are a meticulous problem-solver.",
    "creative": "You excel at lateral thinking. Treat this as a riddle.",
    "elimination": "Eliminate wrong options internally.",
    "metaphor": "Interpret keywords metaphorically.",
    "confidence": "Score options internally.",
    "perspective_shift": "Analyze through multiple perspectives silently.",
    "common_sense": "Combine logic and creativity.",
    "assumption_challenge": "Challenge hidden assumptions internally.",
    "pattern_matching": "Find patterns silently.",
    "intuitive": "Critique your intuition internally.",
}


def get_system_prompt(template_name: str):
    system_prompt = system_templates[template_name]
    system_prompt = textwrap.dedent(system_prompt)

    system_prompt_template = SystemMessagePromptTemplate.from_template(
        system_prompt, id=template_name
    )
    return system_prompt_template


def get_user_prompt():
    prompt = """
    Please pick the best choice for the brain teaser. Each brain teaser has only one possible solution including the choice none of above, answer should only provide the choice:

    Question: {question}
    Choice:
    {choices}
    Answer:
    """

    prompt = textwrap.dedent(prompt)

    prompt_template = HumanMessagePromptTemplate.from_template(prompt)
    return prompt_template


def create_prompt_template(
    system_prompt_template_name: str = "default",
):
    system_prompt_template = get_system_prompt(system_prompt_template_name)
    user_prompt_template = get_user_prompt()
    chat_prompt_template = ChatPromptTemplate.from_messages(
        [system_prompt_template, user_prompt_template]
    )

    return chat_prompt_template

In [3]:
from scripts.dataset import BrainteaserDataset

dataset = BrainteaserDataset("data")

In [4]:
import string

from scripts.dataset import RiddleQuestion


def args_generator(riddle_question: RiddleQuestion):
    template_args = {
        "question": riddle_question.question,
        "choices": "\n".join(
            [
                f"({string.ascii_uppercase[j]}) {choice}"
                for j, choice in enumerate(riddle_question.choice_list)
            ]
        ),
    }

    return template_args


chat_prompt_template = create_prompt_template("default")

In [9]:
from scripts.lmm import vLLMModel
from scripts.executor import Executor

executor = Executor(
    models=[
        vLLMModel("deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"),
    ]
)

2025-02-14 11:02:53,859 - INFO - Initialized executor with 1 models.


In [10]:
sp_results = await executor.aexecute(
    dataset.sp,
    chat_prompt_template,
    args_generator,
    dump_to_pickle=True,
    create_checkpoints=True,
    result_file_name="sp_results_naive_async",
    batch_size=10,
)

2025-02-14 11:02:57,277 - INFO - Starting asynchronous execution
2025-02-14 11:02:57,278 - INFO - Split dataset of 627 items into 63 batches of size 10
2025-02-14 11:02:57,278 - INFO - Processing deepseek-ai/DeepSeek-R1-Distill-Qwen-14B


deepseek-ai/DeepSeek-R1-Distill-Qwen-14B:   0%|          | 0/627 [00:00<?, ?it/s]

2025-02-14 11:02:57,631 - INFO - Cleaning up deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
2025-02-14 11:02:57,631 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!


NotFoundError: Error code: 404 - {'object': 'error', 'message': 'The model `deepseek-ai/DeepSeek-R1-Distill-Qwen-14B` does not exist.', 'type': 'NotFoundError', 'param': None, 'code': 404}

In [8]:
wp_results = await executor.aexecute(
    dataset.wp,
    chat_prompt_template,
    args_generator,
    dump_to_pickle=True,
    create_checkpoints=True,
    result_file_name="wp_results_naive_async",
)

2025-02-14 10:54:33,083 - INFO - Starting asynchronous execution
2025-02-14 10:54:33,083 - INFO - Split dataset of 492 items into 99 batches of size 5
2025-02-14 10:54:33,083 - INFO - Processing deepseek-ai/DeepSeek-R1-Distill-Llama-8B


deepseek-ai/DeepSeek-R1-Distill-Llama-8B:   0%|          | 0/492 [00:00<?, ?it/s]

2025-02-14 11:02:26,935 - INFO - Creating checkpoint: results/checkpoints/wp_results_naive_async/deepseek-ai/DeepSeek-R1-Distill-Llama-8B_wp_results_naive_async.pkl
2025-02-14 11:02:27,003 - INFO - Cleaning up deepseek-ai/DeepSeek-R1-Distill-Llama-8B
2025-02-14 11:02:27,003 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!
2025-02-14 11:02:27,004 - INFO - Asynchronous execution complete
2025-02-14 11:02:27,004 - INFO - Dumping results to results/wp_results_naive_async.pkl


In [None]:
from scripts.lmm import OllamaLlm
from scripts.executor import Executor

executor_deepseek = Executor(
    models=[
        # Deepseek R1
        OllamaLlm("deepseek-r1:8b"),
        OllamaLlm("deepseek-r1:14b"),
        OllamaLlm("deepseek-r1:32b"),
        OllamaLlm("deepseek-r1:1.5b"),
    ]
)

wp_results = executor_deepseek.execute(
    dataset.sp,
    chat_prompt_template,
    args_generator,
    dump_to_pickle=True,
    create_checkpoints=True,
    result_file_name="sp_results_naive_deepseek",
)

wp_results = executor_deepseek.execute(
    dataset.wp,
    chat_prompt_template,
    args_generator,
    dump_to_pickle=True,
    create_checkpoints=True,
    result_file_name="wp_results_naive_deepseek",
)

2025-02-14 07:22:14,310 - INFO - Initialized executor with 4 models.
2025-02-14 07:22:14,324 - INFO - Starting execution
2025-02-14 07:22:14,325 - INFO - Processing deepseek-r1:8b
2025-02-14 07:22:14,325 - INFO - Pulling Ollama model: deepseek-r1:8b
2025-02-14 07:22:42,308 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/pull "HTTP/1.1 200 OK"


deepseek-r1:8b:   0%|          | 0/627 [00:00<?, ?it/s]

2025-02-14 07:22:55,583 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-02-14 07:28:02,795 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-02-14 07:28:08,877 - INFO - Cleaning up deepseek-r1:8b
2025-02-14 07:28:08,878 - INFO - Ollama models will be deleted on demand and therefore this step is skipped!


KeyboardInterrupt: 