In [1]:
import json

from peft import PeftConfig, PeftModel
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
MAX_TOKENS = 512

In [3]:
def load_data(path):
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

In [None]:
def load_model(model_path):
    try:
        config = PeftConfig.from_pretrained(model_path)
        base_model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, device_map="auto")
        model = PeftModel.from_pretrained(base_model, model_path)
    except Exception as e:
        print(f"Error loading PEFT model: {e}")
        model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")

    tokenizer = AutoTokenizer.from_pretrained(model_path)
    tokenizer.pad_token = tokenizer.eos_token
    return model, tokenizer

In [5]:
def format_prompt(question, chunk):
    return f"Context: {chunk}\nQuestion: {question}\nGenerate Python code to solve:"


In [6]:
def generate_code(pipe, question, chunk):
    prompt = format_prompt(question, chunk)
    output = pipe(prompt, max_new_tokens=200, do_sample=False, return_full_text=False)
    return output[0]["generated_text"].strip()

In [8]:
def main(model_path, dataset_path, output_path):
    data = load_data(dataset_path)
    model, tokenizer = load_model(model_path)
    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")

    results = []
    for item in tqdm(data, desc="Generating code"):
        question = item["question"]
        chunk = item["golden_chunk"]
        target = item.get("golden_program_generated", "")
        answer = item.get("answer", "")

        generated = generate_code(pipe, question, chunk)

        results.append({
            "question_id": item["question_id"],
            "question": question,
            "golden_chunk": chunk,
            "answer": answer,
            "golden_program_generated": target,
            "generated_program": generated
        })

    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(results, f, ensure_ascii=False, indent=4)
    print(f"Saved {len(results)} generations to {output_path}")

In [None]:
# if __name__ == "__main__":
#     parser = argparse.ArgumentParser()
#     parser.add_argument("--model_path", type=str, required=True, help="Path to model (base or LoRA fine-tuned)")
#     parser.add_argument("--dataset_path", type=str, required=True, help="Path to golden dataset")
#     parser.add_argument("--output_path", type=str, required=True, help="Path to save generations")
#     args = parser.parse_args()
#     main(args.model_path, args.dataset_path, args.output_path)

In [12]:
main("../models/falcon-rw-1b-finetuned", "../data/dev_golden.json", "../results/falcon-rw-1b-finetuned-dev-golden.json")

The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers and GPU quantization are unavailable.
Device set to use cpu
Generating code:   0%|          | 0/1 [00:00<?, ?it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1299 > 1024). Running this sequence through the model will result in indexing errors
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating code: 100%|██████████| 1/1 [00:52<00:00, 52.14s/it]

Saved 1 generations to ../results/falcon-rw-1b-finetuned-dev-golden.json



