In [None]:
%cd axolotl

In [None]:
!pip install -qqq -e '.[flash-attn,deepspeed]' --progress-bar off

In [None]:
import yaml

new_model = "codellama/codellama-c#-Reviewer"
yaml_string = """
base_model: codellama/CodeLlama-7b-hf
base_model_config: codellama/CodeLlama-7b-hf
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
is_llama_derived_model: true
hub_model_id: EvolCodeLlama-7b

load_in_8bit: false
load_in_4bit: true
strict: false

datasets:
    - path: /home/vm-admin/CodeReviewer-Model/CodeLlama/dataset/train-and-valid_merged-alpaca.jsonl
      type: alpaca
dataset_prepared_path:
val_set_size: 0.081 
output_dir: ./qlora-out-last

adapter: qlora
lora_model_dir:

sequence_len: 2048
sample_packing: true

lora_r: 32
lora_alpha: 16
lora_dropout: 0.05
lora_target_modules:
lora_target_linear: true
lora_fan_in_fan_out:

wandb_project: axolotl-test-last
wandb_entity:
wandb_watch:
wandb_run_id:
wandb_log_model:

gradient_accumulation_steps: 1
micro_batch_size: 10
num_epochs: 3
optimizer: paged_adamw_32bit
lr_scheduler: cosine
learning_rate: 0.0002

train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
tf32: false

gradient_checkpointing: true
early_stopping_patience:
resume_from_checkpoint:
local_rank:
logging_steps: 1911
xformers_attention:
flash_attention: true

resume_from_checkpoint: ./qlora-out-last/checkpoint-384

warmup_steps: 100
eval_steps: 1911
save_strategy: epoch
save_steps:
debug:
deepspeed:
weight_decay: 0.0
fsdp:
fsdp_config:
special_tokens:
    bos_token: "<s>"
    eos_token: "</s>"
    unk_token: "<unk>"

"""

yaml_dict = yaml.safe_load(yaml_string)

yaml_file = 'config3.yaml'

with open(yaml_file, 'w') as file:
    yaml.dump(yaml_dict, file)

In [None]:
!accelerate launch -m axolotl.cli.train config3.yaml

In [None]:
!python3 -m axolotl.cli.merge_lora config3.yaml --lora_model_dir="./qlora-out-last"

In [None]:
from huggingface_hub import HfApi

new_model = "B/cSharpReviewer"
HF_TOKEN = ""

api = HfApi()

api.create_repo(
    repo_id=new_model,
    repo_type="model",
    exist_ok=True,
    private=True, 
    token=HF_TOKEN
)

api.upload_folder(
    repo_id=new_model,
    folder_path="qlora-out-last/merged",
    token=HF_TOKEN
)


In [None]:
import torch

from transformers import LlamaForCausalLM, LlamaTokenizer
model_id = "codellama/CodeLlama-7b-hf"
tokenizer = LlamaTokenizer.from_pretrained(model_id)

model = LlamaForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,  # or bf16 if GPU supports it
    device_map="auto"           
)

model.eval()


In [None]:
prompt_style = """Below is an instruction that describes a task,
paired with an input that provides further context. Write a response
that appropriately completes the request.

### Instruction:
{instruction}

### Input:
{input}

### Response:
"""

instruction_text = "You are a powerful code reviewer model for the c# programming language. Your job is to suggest 1 review comment in natural language. You are given a context regarding a diff hunk or code change in programming language. You must output appropriate, contextual review comment for that code change."

diff_hunk = """Diff Hunk:
namespace XXX.PlugIns.YYYY.Web
{
    totalSumItem.TotalRunSetupDays,
    totalSumItem.TotalWorkdays,

    // if the number of tasks exceeds the defined LIMIT, don't retrieve them as they can't be displayed
-   totalSumItem.Tasks.Count() <= TASK_DETAILS_OBJECTS_COUNT_LIMIT 
-       ? totalSumItem.Tasks.Select(WebConverter.ToWebRwoId).ToList() 
-       : new List<WebRwoId>(),
-   totalSumItem.Tasks.Count(),
+   (totalSumItem.Tasks != null && totalSumItem.NumberOfTasks <= TASK_DETAILS_OBJECTS_COUNT_LIMIT)
+       ? totalSumItem.Tasks.Select(WebConverter.ToWebRwoId).ToList() 
+       : new List<WebRwoId>(),
+   totalSumItem.NumberOfTasks,

    Components.TaskWork.Properties.CommonResources.Total);
}
"""
inference_prompt = prompt_style.format(
    instruction=instruction_text,
    input=diff_hunk,
)
inputs = tokenizer(inference_prompt, return_tensors="pt").to(model.device)

with torch.no_grad():
    output_ids = model.generate(
        **inputs,
        max_new_tokens=256,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        eos_token_id=tokenizer.eos_token_id
    )

generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

print(" Model Output ")
print(generated_text)


In [None]:
import json
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM
import sacrebleu

model_id = "B/cSharpReviewer"
tokenizer = LlamaTokenizer.from_pretrained(model_id)
model = LlamaForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)
model.eval()

prompt_style = """Below is an instruction that describes a task,
paired with an input that provides further context. Write a response
that appropriately completes the request.

### Instruction:
{instruction}

### Input:
{input}

### Response:
"""

test_data = []
with open("/home/vm-admin/CodeReviewer-Model/CodeLlama/dataset/test_alpaca.jsonl", "r") as f:
    for line in f:
        test_data.append(json.loads(line))

references = []
predictions = []

for example in test_data:
    instruction_text = example["instruction"]
    diff_hunk = example["input"]
    reference_comment = example["output"]

    inference_prompt = prompt_style.format(
        instruction=instruction_text,
        input=diff_hunk,
    )

    inputs = tokenizer(inference_prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_new_tokens=256,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            eos_token_id=tokenizer.eos_token_id
        )

    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    # Try: remove the prompt, “### Response:”, etc.
    # if the model repeats them
    # if "### Response:" in generated_text:
    #     generated_text = generated_text.split("### Response:")[-1].strip()

    references.append([reference_comment])
    predictions.append(generated_text)

bleu = sacrebleu.corpus_bleu(predictions, references)
print(f"BLEU score: {bleu.score:.2f}")


In [None]:
import json
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM
import sacrebleu

model_id = "codellama/CodeLlama-7b-hf"
tokenizer = LlamaTokenizer.from_pretrained(model_id)
model = LlamaForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    device_map="auto"
)
model.eval()

prompt_style = """Below is an instruction that describes a task,
paired with an input that provides further context. Write a response
that appropriately completes the request.

### Instruction:
{instruction}

### Input:
{input}

### Response:
"""

test_data = []
with open("/home/vm-admin/CodeReviewer-Model/CodeLlama/dataset/test_alpaca.jsonl", "r") as f:
    for line in f:
        test_data.append(json.loads(line))

references = []
predictions = []

for example in test_data:
    instruction_text = example["instruction"]
    diff_hunk = example["input"]
    reference_comment = example["output"]

    inference_prompt = prompt_style.format(
        instruction=instruction_text,
        input=diff_hunk,
    )

    inputs = tokenizer(inference_prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_new_tokens=256,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            eos_token_id=tokenizer.eos_token_id
        )

    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    # Try: remove the prompt, “### Response:”, etc.
    # if the model repeats them
    # if "### Response:" in generated_text:
    #     generated_text = generated_text.split("### Response:")[-1].strip()

    references.append([reference_comment])
    predictions.append(generated_text)

bleu = sacrebleu.corpus_bleu(predictions, references)
print(f"BLEU score: {bleu.score:.2f}")
