In [2]:
import json
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM
import nltk
from smooth_bleu import bleu_fromstr

# Ensure NLTK punkt tokenizer is downloaded
nltk.download('punkt')

# Load the model and tokenizer
model_id = "igli12-B/cSharpReviewer"
tokenizer = LlamaTokenizer.from_pretrained(model_id)
model = LlamaForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)
model.eval()

prompt_style = """Below is an instruction that describes a task,
paired with an input that provides further context. Write a response
that appropriately completes the request.

### Instruction:
{instruction}

### Input:
{input}

### Response:
"""

test_data = []
with open("/home/vm-admin/CodeReviewer-Model/CodeLlama/dataset/test_alpaca.jsonl", "r") as f:
    for line in f:
        test_data.append(json.loads(line))

references = []
predictions = []

for example in test_data:
    instruction_text = example["instruction"]
    diff_hunk = example["input"]
    reference_comment = example["output"]

    # Format the prompt
    inference_prompt = prompt_style.format(
        instruction=instruction_text,
        input=diff_hunk,
    )

    inputs = tokenizer(inference_prompt, return_tensors="pt").to(model.device)

    # Generate output from the model
    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_new_tokens=256,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            eos_token_id=tokenizer.eos_token_id
        )

    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    if "### Response:" in generated_text:
        generated_text = generated_text.split("### Response:")[-1].strip()

    references.append([reference_comment])
    predictions.append(generated_text)

references_flat = [ref[0] for ref in references]

bleu = bleu_fromstr(predictions, references_flat, rmstop=False)

print(f"BLEU score: {bleu:.2f}")

[nltk_data] Downloading package punkt to /home/vm-admin/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for o

BLEU score: 6.58


In [2]:
import json
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM
import nltk
from smooth_bleu import bleu_fromstr
from tqdm import tqdm

nltk.download('punkt')

model_id = "codellama/CodeLlama-7b-hf"
tokenizer = LlamaTokenizer.from_pretrained(model_id)
model = LlamaForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)
model.eval()

prompt_style = """Below is an instruction that describes a task,
paired with an input that provides further context. Write a response
that appropriately completes the request.

### Instruction:
{instruction}

### Input:
{input}

### Response:
"""

test_data = []
with open("/home/vm-admin/CodeReviewer-Model/CodeLlama/dataset/test_alpaca.jsonl", "r") as f:
    for line in f:
        test_data.append(json.loads(line))

references = []
predictions = []

for example in tqdm(test_data, desc="Processing test examples"):
    instruction_text = example["instruction"]
    diff_hunk = example["input"]
    reference_comment = example["output"]

    # Format the prompt
    inference_prompt = prompt_style.format(
        instruction=instruction_text,
        input=diff_hunk,
    )

    inputs = tokenizer(inference_prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_new_tokens=128,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            eos_token_id=tokenizer.eos_token_id
        )

    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    if "### Response:" in generated_text:
        generated_text = generated_text.split("### Response:")[-1].strip()

    references.append([reference_comment])
    predictions.append(generated_text)

references_flat = [ref[0] for ref in references]

bleu = bleu_fromstr(predictions, references_flat, rmstop=False)

print(f"BLEU score: {bleu:.2f}")


[nltk_data] Downloading package punkt to /home/vm-admin/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'CodeLlamaTokenizer'. 
The class this function is called from is 'LlamaTokenizer'.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                          | 0/1685 [00:00<?, ?it/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                | 1/1685 [00:04<2:09:06,  4.60s/it]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                | 2/1685 [00:09<2:10:08,  4.64s/it]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                | 3/1685 [00:14<2:15:11,  4.82s/it]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                | 4/1685 [00:19<2:15:35,  4.84s/it]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                | 5/1685 [00:23<2:13:36,  4.77s/it]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                | 6/1685 [00:28<2:14:15,  4.80s/it]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.                | 7/1685 [00:33<2:17:58,  4.93s/it]
Setting `pad_token_id` to `eos_token_id`:2 for o

BLEU score: 1.94
