# LLM Ticket Triage- Google Colab

**Repo:** [https://github.com/Kangarroar/LLM-Ticket-Triage](https://github.com/Kangarroar/LLM-Ticket-Triage)

## 1. Setup Environment & Repository

In [None]:
#@title Clone Repository & Install Dependencies
import os

# Clone repo
if not os.path.exists("LLM-Ticket-Triage"):
    !git clone https://github.com/Kangarroar/LLM-Ticket-Triage
else:
    %cd LLM-Ticket-Triage
    !git pull
    %cd ..

# Enter repo directory
%cd LLM-Ticket-Triage

# Install dependencies from requirements.txt
!pip install -r requirements.txt

# Verify GPU
import torch
print(f"\nGPU Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"Device: {torch.cuda.get_device_name(0)}")

## 2. Generate Dataset (Optional)
Run this if you need to create synthetic data.

In [None]:
#@title Run Synthetic Data Generator
Generate_Data = True #@param {type:"boolean"}

if Generate_Data:
    !python training/synthetic_data_generation.py

## 3. Training Configuration
Configure your training parameters using the dropdowns below.

In [None]:
#@title Configuration Parameters
import yaml

# Model Config
model_name = "Qwen/Qwen2.5-1.5B" #@param ["Qwen/Qwen2.5-1.5B", "Qwen/Qwen2.5-0.5B", "TinyLlama/TinyLlama-1.1B-Chat-v1.0"]
max_length = 512 #@param {type:"slider", min:128, max:512, step:64}

# LoRA Config
lora_r = 16 #@param [8, 16, 32, 64]
lora_alpha = 32 #@param [16, 32, 64]
lora_dropout = 0.05 #@param {type:"number"}

# Training Hyperparameters
num_epochs = 3 #@param {type:"slider", min:1, max:10, step:1}
batch_size = 4 #@param [1, 2, 4, 8, 16]
eval_batch_size = 4 #@param [1, 2, 4, 8]
gradient_accumulation_steps = 4 #@param [1, 2, 4, 8, 16]
learning_rate = 2e-4 #@param {type:"number"}
warmup_steps = 100 #@param {type:"number"}
weight_decay = 0.01 #@param {type:"number"}

# Logging & Evaluation
logging_steps = 10 #@param {type:"number"}
eval_steps = 100 #@param {type:"number"}
save_steps = 100 #@param {type:"number"}

# Precision
precision = "fp16" #@param ["fp32", "fp16", "bf16"]

# Early Stopping
early_stopping_patience = 3 #@param {type:"integer"}
early_stopping_threshold = 0.001 #@param {type:"number"}


# Construct Dictionary
config = {
    "model": {
        "name": model_name,
        "max_length": max_length
    },
    "lora": {
        "r": lora_r,
        "lora_alpha": lora_alpha,
        "target_modules": ["q_proj", "v_proj"],
        "lora_dropout": lora_dropout,
        "bias": "none",
        "task_type": "CAUSAL_LM"
    },
    "quantization": {
        "load_in_4bit": True,
        "compute_dtype": "float16",
        "quant_type": "nf4",
        "use_double_quant": True
    },
    "training": {
        "output_dir": "./models/adapters/qwen_1.5b_it_tickets",
        "prediction_loss_only": True, #If false easy to OOM.
        "num_train_epochs": num_epochs,
        "per_device_train_batch_size": batch_size,
        "per_device_eval_batch_size": eval_batch_size,
        "gradient_accumulation_steps": gradient_accumulation_steps,
        "learning_rate": learning_rate,
        "lr_scheduler_type": "cosine",
        "warmup_steps": warmup_steps,
        "weight_decay": weight_decay,
        
        "logging_steps": logging_steps,
        "logging_first_step": True,
        "log_level": "info",
        "eval_strategy": "steps",
        "eval_steps": eval_steps,
        "eval_accumulation_steps": 1,
        "save_strategy": "steps",
        "save_steps": save_steps,
        "save_total_limit": 3,
        
        "precision": precision,
        "optim": "adamw_torch",
        "gradient_checkpointing": True,
        "max_grad_norm": 1.0,
        
        "seed": 42,
        "load_best_model_at_end": True,
        "metric_for_best_model": "eval_loss",
        "greater_is_better": False,
        "remove_unused_columns": False,
        "disable_tqdm": False,
        
        "report_to": "tensorboard",
        "logging_dir": "./logs/tensorboard"
    },
    "early_stopping": {
        "patience": early_stopping_patience,
        "threshold": early_stopping_threshold
    }
}

# Save to yaml
with open("configs/training_config.yaml", "w") as f:
    yaml.dump(config, f)

print("Configuration saved to configs/training_config.yaml")

## 4. Train

In [None]:
#@title Launch TensorBoard
%load_ext tensorboard
%tensorboard --logdir ./logs/tensorboard

In [None]:
#@title Start Training
!python training/train_lora.py

## 5. Inference Test

In [None]:
import torch
import json
from peft import PeftModel
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig
)
model_name = "Qwen/Qwen2.5-1.5B"
adapter_path = "./models/adapters/qwen_1.5b_it_tickets_final"  #@param {type:"string"}

# schema
CATEGORIES = ["Hardware", "Software", "Access", "Network", "Security", "Workplace"]
SUBCATEGORIES = ["Login Issues", "Permissions", "Malware", "Physical Security", "Policy Violation", "Other"]
PRIORITIES = ["Low", "Medium", "High", "Critical"]
ASSIGNMENT_GROUPS = ["End User Applications", "Network Operations", "Desktop Support", "Email / Messaging", "Security / Access", "Hardware Support", "Facilities"]

instruction = """You MUST output valid JSON matching this schema:
{
  "summary": string,
  "category": one of %s,
  "subcategory": one of %s,
  "priority": one of %s,
  "assignment_group": one of %s,
  "request_type": "Incident" | "Service Request"
}

No extra keys. No explanations.""" % (json.dumps(CATEGORIES), json.dumps(SUBCATEGORIES), json.dumps(PRIORITIES), json.dumps(ASSIGNMENT_GROUPS))

user_input = "fire on server place"  #@param {type:"string"}

max_new_tokens = 196  #@param {type:"integer"}
use_cache=True
use_sampling = False  #@param {type:"boolean"}

print(f"Loading tokenizer: {model_name}")
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True
)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float32,
    bnb_4bit_use_double_quant=True
)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map={"": 0},
    quantization_config=bnb_config,
    trust_remote_code=True
)

model = PeftModel.from_pretrained(model, adapter_path)
model.eval()

prompt = f"""### Instruction:
{instruction}

### Input:
{user_input}

### Output:
"""

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        do_sample=use_sampling,
        temperature=0.7 if use_sampling else 0.0,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id
    )

result = tokenizer.decode(outputs[0], skip_special_tokens=True)

print("\n===== MODEL OUTPUT =====\n")
print(result)


## 6. Save Model

In [None]:
#@title Option A: Download Adapter (Zip)
import shutil
from google.colab import files

shutil.make_archive("adapter_model", 'zip', "./models/adapters/qwen_1.5b_it_tickets_final")
files.download("adapter_model.zip")

In [None]:
#@title Option B: Save to Google Drive
from google.colab import drive
import shutil
import os

# Mount Drive
drive.mount('/content/drive')

drive_path = "/content/drive/MyDrive/LLM_Adapters/qwen_1.5b_it_tickets_final" #@param {type:"string"}

if os.path.exists("./models/adapters/qwen_1.5b_it_tickets_final"):
    # Create destination directory if it doesn't exist
    os.makedirs(drive_path, exist_ok=True)
    
    # Copy files
    src_dir = "./models/adapters/qwen_1.5b_it_tickets_final"
    for item in os.listdir(src_dir):
        s = os.path.join(src_dir, item)
        d = os.path.join(drive_path, item)
        if os.path.isdir(s):
            shutil.copytree(s, d, dirs_exist_ok=True)
        else:
            shutil.copy2(s, d)
    
    print(f"Adapter saved to {drive_path}")
else:
    print("Adapter not found")