In [None]:
import json
from typing import Optional
from dataclasses import dataclass, field
from pathlib import Path

import torch
import transformers
from peft import PeftModel
from transformers import (
    AutoTokenizer, 
    AutoModelForCausalLM, 
    GenerationConfig, 
    HfArgumentParser, 
    BitsAndBytesConfig,
)
from tqdm import tqdm

In [2]:
# If you need to use a specific GPU, you can set it here
# if torch.cuda.is_available():
#     # Set GPU:1 as the device
#     torch.cuda.set_device(1)
#     print(f"Using GPU: {torch.cuda.current_device()}")
# else:
#     print("CUDA is not available.")

device = "cuda" if torch.cuda.is_available() else "cpu"

Using GPU: 1


In [3]:
tokenizer = AutoTokenizer.from_pretrained("/home/tp/tomas/issta/CodeLlama-7B", trust_remote_code=True)

model = AutoModelForCausalLM.from_pretrained(
    "/home/tp/tomas/issta/CodeLlama-7B",
    torch_dtype=torch.float16,
    # load_in_8bit=True,
    trust_remote_code=True,
    quantization_config=BitsAndBytesConfig(
        load_in_8bit=True,
        llm_int8_threshold=6.0
    ),
)

cuda


In [6]:
model = PeftModel.from_pretrained(
    model,
    '../repairllama-lora',
    torch_dtype=torch.float16,
)
model.config.pad_token = tokenizer.pad_token = tokenizer.unk_token
model.to(device)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32016, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaSdpaAttention(
              (q_proj): lora.Linear8bitLt(
                (base_layer): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): Linear8bitLt(in_features=4096, out_features=4096, bi

In [None]:
model = PeftModel.from_pretrained(
    model,
    '../repairllama-lora',
    torch_dtype=torch.float16,
)
model.config.pad_token = tokenizer.pad_token = tokenizer.unk_token
model.to(device)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32016, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaSdpaAttention(
              (q_proj): lora.Linear8bitLt(
                (base_layer): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): Linear8bitLt(in_features=4096, out_features=4096, bi

In [20]:
# Bug 05

buggy_code5 = """
public int calculate(String op, int op1, int op2) {

// buggy code
//                if (op.equals("+")) {
                <FILL_ME>
                        return op1 + op2;
                } else if (op.equals("-")) {
                        return op1 - op2;
                } else if (op.equals("*")) {
                        return op1 / op2;//buggy
                } else if (op.equals("/")) {
                        return op1 * op2;//buggy
                } else if (op.equals("%")) {
                        return op1 % op2;
                }
                throw new UnsupportedOperationException(op);
        }
"""

In [21]:
inputs = tokenizer(buggy_code5, return_tensors="pt")
inputs_len = inputs["input_ids"].shape[1]
inputs_ids = inputs["input_ids"].to(device)

In [25]:
generation_config = GenerationConfig(
    num_beams=10,
    early_stopping=True,
)

outputs = model.generate(
    inputs_ids,
    max_new_tokens=256,
    num_return_sequences=10,
    pad_token_id=tokenizer.pad_token_id,
    eos_token_id=tokenizer.eos_token_id,
    generation_config=generation_config,
)

In [26]:
output_ids = outputs[:, inputs_len:]
output_patch = tokenizer.batch_decode(output_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)

In [28]:
for each in output_patch:
    print(each)
    print('-----------------')

</s>
-----------------
       if (op.equals("+")) {
</s>
-----------------
               if (op.equals("+")) {
</s>
-----------------
		if (op.equals("+")) {
</s>
-----------------
               if (op.equals("+") || op.equals("-")) {
</s>
-----------------
       try {
                if (op.equals("+")) {
</s>
-----------------
               if (op.equals("+") || op.equals("plus")) {
</s>
-----------------
               if (op.equals("+") || op.equals("add")) {
</s>
-----------------
       if (op.equals("+") || op.equals("-")) {
</s>
-----------------
	if (op.equals("+")) {
</s>
-----------------
