In [2]:
from scripts.dataset import BrainteaserDataset

dataset = BrainteaserDataset("data")

In [3]:
import string
import textwrap

from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
)

from scripts.dataset import RiddleQuestion

system_prompt_template = SystemMessagePromptTemplate.from_template(
    "You are a helpful assistant."
)

prompt = """
    Please pick the best choice for the brain teaser. Each brain teaser has only one possible solution including the choice none of above, answer should only provide the choice:

    Question: {question}
    Choice:
    {choices}
    Answer:
    """

prompt = textwrap.dedent(prompt)
user_prompt_template = HumanMessagePromptTemplate.from_template(prompt)

chat_prompt_template = ChatPromptTemplate.from_messages(
    [system_prompt_template, user_prompt_template]
)


def args_generator(riddle_question: RiddleQuestion):
    template_args = {
        "question": riddle_question.question,
        "choices": "\n".join(
            [
                f"({string.ascii_uppercase[j]}) {choice}"
                for j, choice in enumerate(riddle_question.choice_list)
            ]
        ),
    }

    return template_args

In [6]:
import ollama

model = "llama3.2:3b-instruct-fp16"

ollama.pull(model)

ProgressResponse(status='success', completed=None, total=None, digest=None)

In [None]:
from ollama import pull
from tqdm.notebook import tqdm

from scripts.lmm import OllamaLlm

models = [
    OllamaLlm("llama3.1"),
    OllamaLlm("llama3.2"),
    OllamaLlm("phi3.5"),
    OllamaLlm("phi4"),
    OllamaLlm("deepseek-r1"),
    OllamaLlm("qwen2.5"),
    OllamaLlm("gemma2"),
]

sample_riddles = dataset.sp_train[:20]

total_results = []
results = []
for model in models:
    pull(model.name)
    model_results = []
    for riddle in tqdm(sample_riddles, desc=model.name):
        result = model.generate(chat_prompt_template, args_generator(riddle))
        model_results.append(result)
    results.append((model.name, model_results))
total_results.append((chat_prompt_template, results))