In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model
from data_processing import get_dataset, tokenize_dataset
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import torch
import os

In [None]:
model_name = "Qwen/Qwen2.5-Coder-1.5B-Instruct"
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quantization_config,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

train, validation, _ = get_dataset()
train_dataset = tokenize_dataset(train, tokenizer)
validation_dataset = tokenize_dataset(validation, tokenizer)

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

In [None]:
class FinetuneTrainer(Trainer):
    def evaluate(self, eval_dataset=None, ignore_keys=None, metric_key_prefix="eval"):
        eval_dataset = eval_dataset if eval_dataset is not None else self.eval_dataset
        preds, labels = [], []
        self.model.eval()
        dataloader = self.get_eval_dataloader(eval_dataset)
        for batch in dataloader:
            input_ids = batch["input_ids"].to(self.model.device)
            attention_mask = batch.get("attention_mask")
            if attention_mask is not None:
                attention_mask = attention_mask.to(self.model.device)
            with torch.no_grad():
                outputs = self.model.generate(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    max_new_tokens=1024
                )

            for out_ids, in_ids in zip(outputs, input_ids):
                new_ids = out_ids[len(in_ids):]
                preds.append(self.tokenizer.decode(new_ids, skip_special_tokens=True).strip().lower())

            for lb in batch["labels"]:
                lb = lb.cpu().numpy()
                lb = lb[lb != -100]
                labels.append(self.tokenizer.decode(lb, skip_special_tokens=True).strip().lower())

        metrics = {
            "accuracy": accuracy_score(labels, preds),
            "precision": precision_score(labels, preds, average="weighted", zero_division=0),
            "recall": recall_score(labels, preds, average="weighted", zero_division=0),
            "f1": f1_score(labels, preds, average="weighted", zero_division=0),
        }

        self.log({f"{metric_key_prefix}/{k}": float(v) for k, v in metrics.items()})
        return {k: float(v) for k, v in metrics.items()}

In [None]:
args = TrainingArguments(
    output_dir="qwen25_coder_1_5b_instruct",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    learning_rate=2e-4,
    num_train_epochs=1,
    save_strategy="steps",
    save_steps=100,
    eval_strategy="steps",
    eval_steps=100,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    greater_is_better=True,
    report_to="wandb",
    fp16=torch.cuda.is_available()
)

os.environ["WANDB_ENTITY"]="VulnRL"

trainer = FinetuneTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,
    processing_class=tokenizer
)

In [None]:
trainer.train()
model.save_pretrained("qwen25_coder_1_5b_instruct")
tokenizer.save_pretrained("qwen25_coder_1_5b_instruct")