# üõ°Ô∏è SENTINEL-Guard Dual-Mode Training

Fine-tune **Qwen3-8B** on SENTINEL security dataset for:
- **DEFENSE**: Classify threats
- **ATTACK**: Generate payloads
- **EXPLAIN**: Vulnerability analysis

**Requirements:** Kaggle T4 x2 GPU, ~45-60 min training

In [None]:
!pip install -q torch transformers datasets accelerate peft bitsandbytes trl sentencepiece

In [None]:
import json, torch
from datetime import datetime
from datasets import Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, TaskType
from trl import SFTTrainer

print(f"CUDA: {torch.cuda.is_available()}, GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'N/A'}")

In [None]:
# CONFIG
MODEL_NAME = "Qwen/Qwen3-8B"
OUTPUT_DIR = "./sentinel_guard_lora"
DATASET_PATH = "/kaggle/input/sentinel-guard-v3/sentinel_guard_v3_dual.jsonl"

LORA_R, LORA_ALPHA, LORA_DROPOUT = 16, 32, 0.05
LORA_TARGETS = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
NUM_EPOCHS, BATCH_SIZE, GRAD_ACCUM, LR, MAX_SEQ = 2, 2, 4, 2e-4, 768

In [None]:
# Load dataset
samples = [json.loads(l) for l in open(DATASET_PATH) if l.strip()]
print(f"Samples: {len(samples)}")
dataset = Dataset.from_list(samples).train_test_split(test_size=0.05, seed=42)
print(f"Train: {len(dataset['train'])}, Eval: {len(dataset['test'])}")

In [None]:
# Load Qwen3-8B with 4-bit
bnb = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=True)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, quantization_config=bnb, device_map="auto", trust_remote_code=True, torch_dtype=torch.bfloat16)
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, LoraConfig(r=LORA_R, lora_alpha=LORA_ALPHA, lora_dropout=LORA_DROPOUT, target_modules=LORA_TARGETS, bias="none", task_type=TaskType.CAUSAL_LM))
model.print_trainable_parameters()
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
# Train
args = TrainingArguments(output_dir=OUTPUT_DIR, num_train_epochs=NUM_EPOCHS, per_device_train_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=GRAD_ACCUM, learning_rate=LR, warmup_ratio=0.1, optim="paged_adamw_8bit",
    bf16=True, logging_steps=25, save_steps=500, eval_steps=500, eval_strategy="steps", save_total_limit=2,
    load_best_model_at_end=True, report_to="none", gradient_checkpointing=True)
trainer = SFTTrainer(model=model, args=args, train_dataset=dataset["train"], eval_dataset=dataset["test"],
    tokenizer=tokenizer, dataset_text_field="text", max_seq_length=MAX_SEQ)
print(f"Starting training at {datetime.now()}")
trainer.train()
print(f"Done at {datetime.now()}")

In [None]:
# Save
trainer.save_model()
tokenizer.save_pretrained(OUTPUT_DIR)
!zip -r sentinel_guard_lora.zip ./sentinel_guard_lora
print("Done! Download sentinel_guard_lora.zip")

In [None]:
# Test ATTACK mode
prompt = "<|im_start|>system\nYou are SENTINEL-Guard. Mode: ATTACK<|im_end|>\n<|im_start|>user\nGenerate SQL injection payloads<|im_end|>\n<|im_start|>assistant\n"
out = model.generate(**tokenizer(prompt, return_tensors="pt").to(model.device), max_new_tokens=200, temperature=0.7)
print(tokenizer.decode(out[0], skip_special_tokens=True))