In [None]:
from huggingface_hub import login
login(token="")

In [None]:
from peft import PeftModel
from unsloth import FastLanguageModel

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="YY/DeepSeek-R1-reviewer-Csharp",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_16bit=True,
    token=""
)

CHECKPOINT_PATH = "/home/vm-admin/CodeReviewer-Model/outputs/checkpoint-3822"
model = PeftModel.from_pretrained(model, CHECKPOINT_PATH)

model = model.to("cuda")

FastLanguageModel.for_inference(model)

In [None]:
import wandb

wb_token = ""

wandb.login(key=wb_token)
run = wandb.init(
    project='Fine-tune-DeepSeek-R1-Distill-Llama-8B lastRun-ever', 
    job_type="training", 
    anonymous="allow"
)

In [None]:
from unsloth import FastLanguageModel

max_seq_length = 2048 
dtype = None 
load_in_4bit = True


model, tokenizer = FastLanguageModel.from_pretrained(
    #model_name = "unsloth/DeepSeek-R1-Distill-Llama-8B",
    model_name = "B/DeepSeek-R1-reviewer-Csharp",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = "" 
)

In [None]:
#FastLanguageModel.for_inference(model)
prompt_style = """Below is an instruction that describes a task,
paired with an input that provides further context. Write a response
that appropriately completes the request.

### Instruction:
{instruction}

### Input:
{input}

### Response:
{output}
"""

instruction_text = "You are a powerful code reviewer model for the c# programming language. Your job is to suggest 1 review comment in natural language. You are given a context regarding a diff hunk or code change in programming language. You must output appropriate, contextual review comment for that code change."
diff_hunk = """Diff Hunk: "@@ -985,8 +984,9 @@ namespace Lovion.PlugIns.XXX.Web\n                                                                 totalSumItem.TotalRunSetupDays,\n                                                                 totalSumItem.TotalWorkdays,\n                                                                 // if the number of tasks exceeds the defined LIMIT, don't retrieve them as they can't be displayed\n-                                                                totalSumItem.Tasks.Count() <= TASK_DETAILS_OBJECTS_COUNT_LIMIT ? totalSumItem.Tasks.Select(WebConverter.ToWebRwoId).ToList() : new List<WebRwoId>(),\n-                                                                totalSumItem.Tasks.Count(),\n+                                                                (totalSumItem.Tasks != null && totalSumItem.NumberOfTasks <= TASK_DETAILS_OBJECTS_COUNT_LIMIT)\n+                                                                    ? totalSumItem.Tasks.Select(WebConverter.ToWebRwoId).ToList() : new List<WebRwoId>(),\n+                                                                totalSumItem.NumberOfTasks,\n                                                                 Components.TaskWork.Properties.CommonResources.Total);\n                 }\n \n"
"""

inference_prompt = prompt_style.format(
    instruction=instruction_text,
    input=diff_hunk,
    output="" 
)

inputs = tokenizer([inference_prompt], return_tensors="pt").to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=2048,
    use_cache=True,
)

decoded_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

response_part = decoded_text.split("### Response:")[-1].strip()

print("Raw Generation:\n", decoded_text)
#print("\nFinal Extracted Response:\n", response_part)


In [None]:
import json
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM
import sacrebleu
from unsloth import FastLanguageModel

max_seq_length = 2048 
dtype = None 
load_in_16bit = True


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/DeepSeek-R1-Distill-Llama-8B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = "" 
)
model.eval()
FastLanguageModel.for_inference(model)
prompt_style = """Below is an instruction that describes a task,
paired with an input that provides further context. Write a response
that appropriately completes the request.

### Instruction:
{instruction}

### Input:
{input}

### Response:
"""

test_data = []
with open("/home/vm-admin/CodeReviewer-Model/CodeLlama/dataset/test_alpaca.jsonl", "r") as f:
    for line in f:
        test_data.append(json.loads(line))

references = []
predictions = []

for example in test_data:
    instruction_text = example["instruction"]
    diff_hunk = example["input"]
    reference_comment = example["output"]

    inference_prompt = prompt_style.format(
        instruction=instruction_text,
        input=diff_hunk,
    )

    inputs = tokenizer(inference_prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_new_tokens=256,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            eos_token_id=tokenizer.eos_token_id
        )

    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    # Try: remove the prompt, “### Response:”, etc.
    # if the model repeats them
    # if "### Response:" in generated_text:
    #     generated_text = generated_text.split("### Response:")[-1].strip()

    references.append([reference_comment])
    predictions.append(generated_text)

bleu = sacrebleu.corpus_bleu(predictions, references)
print(f"BLEU score: {bleu.score:.2f}")


In [None]:
from datasets import load_dataset
EOS_TOKEN = tokenizer.eos_token

train_prompt_style = """Below is an instruction that describes a task,
paired with an input that provides further context. Write a response
that appropriately completes the request. 

### Instruction:
{}

### Input:
{}

### Response:
{}
"""

def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    
    texts = []
    for inst, inp, out in zip(instructions, inputs, outputs):
        text = train_prompt_style.format(inst, inp, out) + EOS_TOKEN
        texts.append(text)
    
    return {"text": texts}
    
dataset = load_dataset(
    "json",
    data_files={
        "train": "/home/vm-admin/CodeReviewer-Model/CodeLlama/dataset/train-alpaca.jsonl",
        "valid": "/home/vm-admin/CodeReviewer-Model/CodeLlama/dataset/valid_alpaca.jsonl",
        "test":  "/home/vm-admin/CodeReviewer-Model/CodeLlama/dataset/test_alpaca.jsonl",
    }
)
print(type(dataset["train"]))
print(dataset["train"][0])
dataset["train"] = dataset["train"].map(formatting_prompts_func, batched=True)
dataset["valid"] = dataset["valid"].map(formatting_prompts_func, batched=True)
dataset["test"]  = dataset["test"].map(formatting_prompts_func, batched=True)



In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=32,  
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0.05,  
    bias="none",  
    use_gradient_checkpointing="unsloth",
    random_state=2233,
    use_rslora=False,  
    loftq_config=None,
)

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset["train"],
    eval_dataset=dataset["valid"],
    dataset_text_field="text",
    max_seq_length=2048,
    dataset_num_proc=16,  #24 cpu cores are in the vm available
    args=TrainingArguments(
        per_device_train_batch_size=10,
        gradient_accumulation_steps=1,
        num_train_epochs=3,
        save_strategy="epoch",
        eval_strategy="epoch",
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=1500,
        optim="paged_adamw_32bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=2233,
        output_dir="outputs",
    ),
)

In [None]:
trainer_stats = trainer.train(resume_from_checkpoint="outputs/checkpoint-3822")

In [None]:
from peft import PeftModel
from unsloth import FastLanguageModel

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/DeepSeek-R1-Distill-Llama-8B",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=True,
    token=""
)

CHECKPOINT_PATH = "/home/vm-admin/CodeReviewer-Model/outputs/checkpoint-5733"
model = PeftModel.from_pretrained(model, CHECKPOINT_PATH)

model = model.to("cuda")

FastLanguageModel.for_inference(model)

In [None]:
prompt_style = """Below is an instruction that describes a task,
paired with an input that provides further context. Write a response
that appropriately completes the request.

### Instruction:
{instruction}

### Input:
{input}

### Response:
{output}
"""

instruction_text = "You are a powerful code reviewer model for the c# programming language. Your job is to suggest 1 review comment in natural language. You are given a context regarding a diff hunk or code change in programming language. You must output appropriate, contextual review comment for that code change."
diff_hunk = """Diff Hunk:@@ -148,10 +143,9 @@ namespace XX.GuiComponents.RwoGrid\n             var newRwoTypes = Enumerable.Empty<IRwoType>();\r\n             if (rwos != null)\r\n             {\r\n-                newRwoTypes = rwos.Select(r => r.RwoType)\r\n-                                  .Distinct();\r\n+                newRwoTypes = rwos.Select(r => r.RwoType);\r\n \r\n-                dataGrid._CurrentRwoTypes = new HashSet<IRwoType>(newRwoTypes);\r\n+                dataGrid._CurrentRwoTypes = newRwoTypes.ToHashSet();\r\n             }\r\n             return newRwoTypes;\r\n         }\r\n","""

inference_prompt = prompt_style.format(
    instruction=instruction_text,
    input=diff_hunk,
    output="" 
)

inputs = tokenizer([inference_prompt], return_tensors="pt").to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1200,
    use_cache=True,
)

decoded_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

response_part = decoded_text.split("### Response:")[-1].strip()

print("Raw Generation:\n", decoded_text)
print("\nFinal Extracted Response:\n", response_part)


In [None]:
new_model_online = "B/DeepSeek-R1-reviewer-Csharp"
model.push_to_hub(new_model_online)
tokenizer.push_to_hub(new_model_online)

model.push_to_hub_merged(new_model_online, tokenizer, save_method = "merged_4bit_forced")

In [None]:
new_model_online = "B/DeepSeek-R1-reviewer-Csharp-16bit"
model.push_to_hub(new_model_online)
tokenizer.push_to_hub(new_model_online)

model.push_to_hub_merged(new_model_online, tokenizer, save_method = "merged_16bit")