In [1]:
!module load model-huggingface
!module load scicomp-llm-env

zsh:1: command not found: module
zsh:1: command not found: module


In [2]:
import os
import sys
import argparse
import json
import pandas as pd
import transformers

sys.path.append(
    os.path.abspath(os.path.join(os.getcwd(), ".."))
)  # Add parent directory to path

from utils.prompts import (
    JUDGE_SYSTEM_PROMPT,
    JUDGE_TEMPLATE,
    GENERATE_EXERCISES_SYSTEM_PROMPT,
    GENERATE_EXERCISES_TEMPLATE_EXPLICIT,
    GENERATE_EXERCISES_TEMPLATE_FEWSHOT,
    GENERATE_EXERCISES_TEMPLATE_IMPLICIT,
    GENERATE_EXERCISES_TEMPLATE_ZEROSHOT,
)

In [3]:
DEFAULT_DATA = r"../../data/complete_dataset.csv"
DEFAULT_MODEL = "Qwen/Qwen2.5-14B-Instruct"

system_prompts = {
    "judge": JUDGE_SYSTEM_PROMPT,
    "zeroshot": GENERATE_EXERCISES_SYSTEM_PROMPT,
    "fewshot": GENERATE_EXERCISES_SYSTEM_PROMPT,
    "explicit": GENERATE_EXERCISES_SYSTEM_PROMPT,
    "implicit": GENERATE_EXERCISES_SYSTEM_PROMPT,
}

In [4]:
def make_prompt(row, task_type):
    _, topic, theme, concept, problem_description, example_solution, *_ = row

    match task_type:
        case "judge":
            return (
                JUDGE_TEMPLATE.replace("$THEME$", theme)
                .replace("$TOPIC$", topic)
                .replace("$CONCEPT$", concept)
                .replace("$TEXT$", problem_description)
                .replace("$CODE$", example_solution)
            )
        case "zeroshot":
            return GENERATE_EXERCISES_TEMPLATE_ZEROSHOT
        case "fewshot":
            return GENERATE_EXERCISES_TEMPLATE_FEWSHOT
        case "explicit":
            return GENERATE_EXERCISES_TEMPLATE_EXPLICIT
        case "implicit":
            return GENERATE_EXERCISES_TEMPLATE_IMPLICIT
        case _:
            raise ValueError(f"Task type '{_}' not recognised as valid task type!")


def run_model(pipe, data, task_type):
    system_prompt = system_prompts.get(task_type, None)

    if system_prompt is None:
        raise ValueError(f"Task type '{task_type}' not recognised as valid task type!")

    response = pipe(
        [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": data["prompt"]},
        ],
        return_full_text=False,
        max_new_tokens=500,
    )

    result = response[0]["generated_text"]
    result_dict = json.loads(result)

    for k, v in result_dict.items():
        data[k] = v

    return data

In [5]:
task = "judge"

In [6]:
params = {
    "model": DEFAULT_MODEL,
    "device_map": 0,  # Force GPU
    "max_new_tokens": 500,
    "temperature": 0.3,
}

pipeline = transformers.pipeline("text-generation", **params)

2026-02-20 13:17:48.061828: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Fetching 8 files:   0%|          | 0/8 [00:00<?, ?it/s]

model-00001-of-00008.safetensors:   0%|          | 0.00/3.89G [00:00<?, ?B/s]

model-00005-of-00008.safetensors:   0%|          | 0.00/3.98G [00:00<?, ?B/s]

model-00006-of-00008.safetensors:   0%|          | 0.00/4.00G [00:00<?, ?B/s]

model-00007-of-00008.safetensors:   0%|          | 0.00/4.00G [00:00<?, ?B/s]

model-00008-of-00008.safetensors:   0%|          | 0.00/1.70G [00:00<?, ?B/s]

model-00004-of-00008.safetensors:   0%|          | 0.00/4.00G [00:00<?, ?B/s]

model-00002-of-00008.safetensors:   0%|          | 0.00/4.00G [00:00<?, ?B/s]

model-00003-of-00008.safetensors:   0%|          | 0.00/4.00G [00:00<?, ?B/s]

Cancellation requested; stopping current tasks.

KeyboardInterrupt



In [None]:
eval_df = pd.read_csv(DEFAULT_DATA, sep=";")

In [None]:
eval_df["prompt"] = eval_df.apply(lambda row: make_prompt(row, task), axis=1)

In [None]:
#result = eval_df.apply(lambda row: run_model(pipeline, row, task), axis=1)