# üõ°Ô∏è SENTINEL-Guard Training (Colab)

**Model:** Qwen3-8B | **Method:** QLoRA 4-bit | **Dataset:** 16.8K samples

‚ö†Ô∏è Enable GPU: Runtime ‚Üí Change runtime type ‚Üí T4 GPU

In [None]:
# Mount Google Drive for dataset
from google.colab import drive
drive.mount('/content/drive')

# Install deps
!pip install -q torch transformers datasets accelerate peft bitsandbytes trl sentencepiece

In [None]:
import json, torch
from datetime import datetime
from datasets import Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, TaskType
from trl import SFTTrainer

print(f"GPU: {torch.cuda.get_device_name(0)}, VRAM: {torch.cuda.get_device_properties(0).total_memory/1e9:.1f}GB")

In [None]:
# CONFIG - CHANGE PATH TO YOUR DATASET!
DATASET_PATH = "/content/drive/MyDrive/sentinel_guard_v3_dual.jsonl"  # <-- Upload here
MODEL_NAME = "Qwen/Qwen3-8B"
OUTPUT_DIR = "/content/drive/MyDrive/sentinel_guard_lora"

# LoRA
LORA_R, LORA_ALPHA, LORA_DROPOUT = 16, 32, 0.05
LORA_TARGETS = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]

# Training (optimized for Colab T4)
NUM_EPOCHS = 2
BATCH_SIZE = 1  # Lower for single GPU
GRAD_ACCUM = 8  # Effective batch = 8
LR = 2e-4
MAX_SEQ = 512  # Shorter to fit VRAM

In [None]:
# Load dataset
samples = [json.loads(l) for l in open(DATASET_PATH) if l.strip()]
print(f"Loaded {len(samples)} samples")
dataset = Dataset.from_list(samples).train_test_split(test_size=0.05, seed=42)
print(f"Train: {len(dataset['train'])}, Eval: {len(dataset['test'])}")

In [None]:
# Load Qwen3-8B with 4-bit quantization
bnb = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", 
                          bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=True)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, quantization_config=bnb, 
                                              device_map="auto", trust_remote_code=True)
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

# Apply LoRA
lora_cfg = LoraConfig(r=LORA_R, lora_alpha=LORA_ALPHA, lora_dropout=LORA_DROPOUT,
                      target_modules=LORA_TARGETS, bias="none", task_type=TaskType.CAUSAL_LM)
model = get_peft_model(model, lora_cfg)
model.print_trainable_parameters()

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
# Trainer
args = TrainingArguments(
    output_dir=OUTPUT_DIR, num_train_epochs=NUM_EPOCHS, per_device_train_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=GRAD_ACCUM, learning_rate=LR, warmup_ratio=0.1,
    optim="paged_adamw_8bit", bf16=True, logging_steps=50, save_steps=500,
    eval_strategy="steps", eval_steps=500, save_total_limit=2, gradient_checkpointing=True, report_to="none")

trainer = SFTTrainer(model=model, args=args, train_dataset=dataset["train"], 
                     eval_dataset=dataset["test"], tokenizer=tokenizer, 
                     dataset_text_field="text", max_seq_length=MAX_SEQ)

print(f"\nüöÄ Starting training at {datetime.now()}")
trainer.train()
print(f"\n‚úÖ Done at {datetime.now()}")

In [None]:
# Save to Drive
trainer.save_model()
tokenizer.save_pretrained(OUTPUT_DIR)
print(f"Saved to {OUTPUT_DIR}")

In [None]:
# Quick test
prompt = "<|im_start|>system\nYou are SENTINEL-Guard. Mode: ATTACK<|im_end|>\n<|im_start|>user\nGenerate SQL injection payloads<|im_end|>\n<|im_start|>assistant\n"
out = model.generate(**tokenizer(prompt, return_tensors="pt").to("cuda"), max_new_tokens=150, temperature=0.7)
print(tokenizer.decode(out[0], skip_special_tokens=True))