In [None]:
!pip install unsloth peft huggingface_hub datasets



In [None]:
!pip install evaluate rouge_score bert_score

Collecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bert_score
Successfully installed bert_score-0.3.13


In [None]:
repo_id = "AdamDS/qwen3-security-dpo-4b"

In [None]:
    import tensorflow as tf
    print("TensorFlow version:", tf.__version__)
    gpu_device_name = tf.test.gpu_device_name()
    if gpu_device_name:
        print("GPU device name:", gpu_device_name)
    else:
        print("No GPU found.")

TensorFlow version: 2.18.0
GPU device name: /device:GPU:0


In [None]:
from unsloth import FastLanguageModel
from peft import PeftModel
import torch

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [None]:
print("🧪 Testing model loading from Hugging Face Hub...")

# Import required libraries
from unsloth import FastLanguageModel
from peft import PeftModel
import torch

print("Loading base model...")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Qwen3-4B-unsloth-bnb-4bit",
    max_seq_length=2048,
    dtype=None,
    load_in_4bit=True,
)

print(f"Loading LoRA adapters from {repo_id}...")
model = PeftModel.from_pretrained(model, repo_id)

print("Enabling fast inference...")
FastLanguageModel.for_inference(model)

print("✅ Model loaded successfully from Hugging Face Hub!")

🧪 Testing model loading from Hugging Face Hub...
Loading base model...
==((====))==  Unsloth 2025.5.9: Fast Qwen3 patching. Transformers: 4.52.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/3.55G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/237 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/707 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

chat_template.jinja:   0%|          | 0.00/4.67k [00:00<?, ?B/s]

Loading LoRA adapters from AdamDS/qwen3-security-dpo-4b...


adapter_config.json:   0%|          | 0.00/804 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/264M [00:00<?, ?B/s]

Enabling fast inference...
✅ Model loaded successfully from Hugging Face Hub!


In [None]:
# Test a simple inference
test_prompt = "def get_user_data(user_id):\n    query = f'SELECT * FROM users WHERE id = {user_id}'\n    return execute_query(query)"

print(f"\n🔍 Testing inference with vulnerable code...")

# Create the full prompt
full_prompt = f"Analyze this code for security issues:\n\n{test_prompt}\n\nAnalysis:"

# Tokenize and ensure tensors are on the same device as model
inputs = tokenizer([full_prompt], return_tensors="pt")

# Move inputs to the same device as the model
device = next(model.parameters()).device
inputs = {k: v.to(device) for k, v in inputs.items()}

print(f"Model device: {device}")
print(f"Input device: {inputs['input_ids'].device}")

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=256,
        temperature=0.3,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id,
        use_cache=True
    )

response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"\n📝 Model response:")

# Extract just the analysis part
if "Analysis:" in response:
    analysis = response.split("Analysis:")[-1].strip()
else:
    analysis = response[len(full_prompt):].strip()

print(analysis[:300] + "..." if len(analysis) > 300 else analysis)


🔍 Testing inference with vulnerable code...
Model device: cuda:0
Input device: cuda:0

📝 Model response:
The code has a security issue. The user_id is directly interpolated into the SQL query without any sanitization or parameterization, which makes it vulnerable to SQL injection attacks. To fix this, the user_id should be passed as a parameter to the execute_query function, which should handle the par...


In [None]:
from datasets import load_dataset
dataset_name = "CyberNative/Code_Vulnerability_Security_DPO"

# Load dataset
print(f"Loading dataset: {dataset_name}")
dataset = load_dataset(dataset_name)
print("Dataset loaded.")

# Rename 'question' to 'prompt' if needed
if 'question' in dataset['train'].column_names:
    dataset = dataset.rename_column("question", "prompt")
    print("Renamed 'question' column to 'prompt'.")

# Split into training and evaluation sets (if not already split)
if 'test' not in dataset:
    print("Splitting dataset into train and test (90/10)...")
    dataset = dataset['train'].train_test_split(test_size=0.1, seed=42)
    print("Dataset split into train and test sets.")

print("Dataset sizes:")
for split_name, split_data in dataset.items():
    print(f"- {split_name}: {len(split_data)}")

print("\nSample datapoint:")
print(dataset['train'][0])

Loading dataset: CyberNative/Code_Vulnerability_Security_DPO


README.md:   0%|          | 0.00/3.70k [00:00<?, ?B/s]

secure_programming_dpo.json:   0%|          | 0.00/6.87M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/4656 [00:00<?, ? examples/s]

Dataset loaded.
Renamed 'question' column to 'prompt'.
Splitting dataset into train and test (90/10)...
Dataset split into train and test sets.
Dataset sizes:
- train: 4190
- test: 466

Sample datapoint:
{'lang': 'c++', 'vulnerability': 'The code may suffer from a buffer overflow vulnerability, which could allow unauthorized alteration of memory data.', 'system': '', 'prompt': 'Write a c++ code that includes two functions. The first function should take a character pointer as an argument and copy the content of the character array pointed by the pointer into a local character array named \'buffer\' of size 10. The second function should be the main function where you declare a character array \'large_input\' of size 20 and initialize it with the string "This is a large input". Then call the first function passing \'large_input\' as an argument.', 'chosen': '```c++\n#include <stdio.h>\n#include <string.h>\n\nvoid func(const char* input) {\n    char buffer[10];\n    strncpy(buffer, input

In [None]:
# Do evaluation on the test dataset

In [None]:
dataset['train'].features

{'lang': Value(dtype='string', id=None),
 'vulnerability': Value(dtype='string', id=None),
 'system': Value(dtype='string', id=None),
 'prompt': Value(dtype='string', id=None),
 'chosen': Value(dtype='string', id=None),
 'rejected': Value(dtype='string', id=None)}

In [None]:
from evaluate import load
rouge = load("rouge")
bertscore = load("bertscore")

predictions = []
references = []

for example in dataset["test"]:
    prompt = f"Analyze this code for security issues:\n\n{example['prompt']}\n\nAnalysis:"
    inputs = tokenizer([prompt], return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=256,
            temperature=0.3,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
            use_cache=True
        )

    pred = tokenizer.decode(outputs[0], skip_special_tokens=True)
    predictions.append(pred.split("Analysis:")[-1].strip())
    references.append(example['chosen'].strip())  # adjust if 'answer' or other field

# Compute metrics
rouge_score = rouge.compute(predictions=predictions, references=references)
bertscore_result = bertscore.compute(predictions=predictions, references=references, lang="en")

print("ROUGE:", rouge_score)
print("BERTScore:", {
    "precision": sum(bertscore_result["precision"]) / len(bertscore_result["precision"]),
    "recall": sum(bertscore_result["recall"]) / len(bertscore_result["recall"]),
    "f1": sum(bertscore_result["f1"]) / len(bertscore_result["f1"]),
})


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ROUGE: {'rouge1': np.float64(0.1443366977122422), 'rouge2': np.float64(0.03786350005273755), 'rougeL': np.float64(0.09837393583909906), 'rougeLsum': np.float64(0.14032801600168177)}
BERTScore: {'precision': 0.8009456153079676, 'recall': 0.770795872666805, 'f1': 0.784712089361551}


[Link to Google Collab Notebook](https://colab.research.google.com/drive/1rAANNnoswfXn3NDbi0nCiv2TaMuVXbr3?usp=sharing)