In [None]:
try:
    import rapidfireai
    print("✅ rapidfireai already installed")
except ImportError:
    %pip install rapidfireai  # Takes 1 min
    %pip install bert_score
    !rapidfireai init # Takes 1 min

In [None]:
import subprocess
from time import sleep
import socket
try:
  s = [socket.socket(socket.AF_INET, socket.SOCK_STREAM), socket.socket(socket.AF_INET, socket.SOCK_STREAM), socket.socket(socket.AF_INET, socket.SOCK_STREAM)]
  s[0].connect(("127.0.0.1", 8851))
  s[1].connect(("127.0.0.1", 8852))
  s[2].connect(("127.0.0.1", 8853))
  s[0].close()
  s[1].close()
  s[2].close()
  print("RapidFire Services are running")
except OSError as error:
  print("RapidFire Services are not running, launching now...")
  subprocess.Popen(["rapidfireai", "start"])
  sleep(30)

In [None]:
import os

# Load TensorBoard extension
%load_ext tensorboard

# TensorBoard log directory will be auto-created in experiment path

In [None]:
from rapidfireai import Experiment
from rapidfireai.automl import List, RFGridSearch, RFModelConfig, RFLoraConfig, RFSFTConfig

# NB: If you get "AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'" from Colab, just rerun this cell

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer

model_id = "Qwen/Qwen2.5-1.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

dataset = load_dataset("mariiazhiv/cybersecurity_qa")

train_dataset = dataset["train"]
eval_dataset = dataset["validation"]

In [None]:
class QwenChatFormatter:
    """
    A robust formatter that carries its own tokenizer.
    This works safely across worker processes (multiprocessing).
    """
    def __init__(self, model_id):
        self.model_id = model_id
        self._tokenizer = None # Load lazily

    @property
    def tokenizer(self):
        if self._tokenizer is None:
            from transformers import AutoTokenizer
            # Load locally inside the worker process
            self._tokenizer = AutoTokenizer.from_pretrained(
                self.model_id,
                trust_remote_code=True
            )
            # Ensure pad token exists
            if self._tokenizer.pad_token is None:
                self._tokenizer.pad_token = self._tokenizer.eos_token
        return self._tokenizer

    def __call__(self, example):
        user_input = example.get('input', "") or ""
        instruction = example.get('instruction', "")

        full_content = f"{instruction}\n{user_input}".strip()

        # Create the message structure
        messages = [
            {"role": "user", "content": full_content},
            {"role": "assistant", "content": example['output']}
        ]

        text = self.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=False
        )

        return {
            "text": text,
            "user": full_content,
            "assistant": example['output']
        }

# Instantiate the formatter once
qwen_formatter = QwenChatFormatter("Qwen/Qwen2.5-1.5B-Instruct")

In [None]:
def compute_metrics(eval_preds):
    """Lightweight metrics computation"""
    predictions, labels = eval_preds

    try:
        import evaluate

        rouge = evaluate.load("rouge")
        rouge_output = rouge.compute(
            predictions=predictions,
            references=labels,
            use_stemmer=True,
            rouge_types=["rougeL"]
        )

        return {
            "rougeL": round(rouge_output["rougeL"], 4),
        }
    except Exception as e:
        print(f"Metrics computation failed: {e}")
        return {}

## Initialize Experiment

In [None]:
# Create experiment with unique name
my_experiment = "harshit_sft_demo"
experiment = Experiment(experiment_name=my_experiment)

## Get TensorBoard Log Directory

The TensorBoard logs are stored in the experiment directory. Let's get the path:

In [None]:
# Get experiment path
from rapidfireai.fit.db.rf_db import RfDb

db = RfDb()
experiment_path = db.get_experiments_path(my_experiment)
tensorboard_log_dir = f"{experiment_path}/tensorboard_logs/{my_experiment}"

print(f"TensorBoard logs will be saved to: {tensorboard_log_dir}")

## Define Model Configurations

This tutorial showcases GPT-2 (124M parameters), which is perfect for Colab's memory constraints:

In [None]:
# Runs:
#   1: Fewer adaptors, aggressive learning
#   2: More adaptors, aggressive learning
#   3: Fewer adaptors, stable learning
#   4: More adaptors, stable learning

peft_configs_qwen = List([
    # Config A: Lightweight - Targets only Query/Value projections
    RFLoraConfig(
        r=8,
        lora_alpha=16,
        lora_dropout=0.05,
        target_modules=["q_proj", "v_proj"],
        bias="none",
        task_type="CAUSAL_LM"
    ),
    # Config B: Heavy - Targets all linear layers (more parameters to learn)
    RFLoraConfig(
        r=32,
        lora_alpha=64,
        lora_dropout=0.05,
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
        bias="none",
        task_type="CAUSAL_LM"
    )
])

config_set_qwen = List([
    # Strategy 1: Aggressive (Higher LR, Linear Decay)
    RFModelConfig(
        model_name=model_id,
        peft_config=peft_configs_qwen,
        training_args=RFSFTConfig(
            learning_rate=2e-4,     # Standard Qwen LoRA rate
            lr_scheduler_type="linear",
            per_device_train_batch_size=2,
            gradient_accumulation_steps=4,
            max_steps=100,           # Short run for the competition demo
            logging_steps=1,
            eval_strategy="steps",
            eval_steps=10,
            fp16=True,              # Required for T4
            gradient_checkpointing=True, # Saves VRAM
            report_to="none",
            num_train_epochs=10,
        ),
        model_type="causal_lm",
        model_kwargs={
            "device_map": "auto",
            "torch_dtype": "auto",
            "trust_remote_code": True,
            "use_cache": False      # Must be False for Gradient Checkpointing
        },
        formatting_func=qwen_formatter,
        compute_metrics=compute_metrics,
        generation_config={
            "max_new_tokens": 256,
            "temperature": 0.6,     # Lower temp for factual QA
            "top_p": 0.9,
            "repetition_penalty": 1.1,
            "pad_token_id": tokenizer.pad_token_id,
            "eos_token_id": tokenizer.eos_token_id,
        }
    ),

    # Strategy 2: Stable (Lower LR, Cosine Schedule, Warmup)
    RFModelConfig(
        model_name=model_id,
        peft_config=peft_configs_qwen,
        training_args=RFSFTConfig(
            learning_rate=5e-5,     # Conservative rate
            lr_scheduler_type="cosine",
            warmup_steps=10,        # Gentle start
            per_device_train_batch_size=2,
            gradient_accumulation_steps=4,
            max_steps=100,
            logging_steps=1,
            eval_strategy="steps",
            eval_steps=10,
            fp16=True,
            gradient_checkpointing=True,
            report_to="none",
            num_train_epochs=10,
        ),
        model_type="causal_lm",
        model_kwargs={
            "device_map": "auto",
            "torch_dtype": "auto",
            "trust_remote_code": True,
            "use_cache": False
        },
        formatting_func=qwen_formatter,
        compute_metrics=compute_metrics,
        generation_config={
            "max_new_tokens": 256,
            "temperature": 0.6,
            "top_p": 0.9,
            "repetition_penalty": 1.1,
            "pad_token_id": tokenizer.pad_token_id,
            "eos_token_id": tokenizer.eos_token_id,
        }
    )
])

In [None]:
def create_model(model_config):
    from transformers import AutoModelForCausalLM, AutoTokenizer

    model_name = model_config["model_name"]
    model_kwargs = model_config["model_kwargs"]

    model = AutoModelForCausalLM.from_pretrained(model_name, **model_kwargs)

    trust_remote = model_kwargs.get("trust_remote_code", False)
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=trust_remote)

    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
        model.config.pad_token_id = tokenizer.eos_token_id

    tokenizer.padding_side = "left"

    return (model, tokenizer)

In [None]:
config_group = RFGridSearch(
    configs=config_set_qwen,
    trainer_type="SFT"
)

## Start TensorBoard

**IMPORTANT: Make sure to start TensorBoard BEFORE invoking run_fit() below so that you can watch metrics appear in real-time!**

In [None]:
%tensorboard --logdir {tensorboard_log_dir}

## Run Training + Validation

Now we get to the main function for running multi-config training and evals. The metrics will appear in TensorBoard above in real-time.

In [None]:
experiment.run_fit(
    config_group,
    create_model,
    train_dataset,
    eval_dataset,
    num_chunks=2,
    seed=67
)

## Launch Interactive Run Controller

RapidFire AI provides an Interactive Controller that lets you manage executing runs dynamically in real-time from the notebook:

- ⏹️ **Stop**: Gracefully stop a running config
- ▶️ **Resume**: Resume a stopped run
- 🗑️ **Delete**: Remove a run from this experiment
- 📋 **Clone**: Create a new run by editing the config dictionary of a parent run to try new knob values; optional warm start of parameters
- 🔄 **Refresh**: Update run status and metrics

The Controller uses ipywidgets and is compatible with both Colab (ipywidgets 7.x) and Jupyter (ipywidgets 8.x).

In [None]:
# Create Interactive Controller
sleep(15)
from rapidfireai.fit.utils.interactive_controller import InteractiveController

controller = InteractiveController(dispatcher_url="http://127.0.0.1:8851")
controller.display()

## End Experiment

In [None]:
from google.colab import output
from IPython.display import display, HTML

display(HTML('''
<button id="continue-btn" style="padding: 10px 20px; font-size: 16px;">Click to End Experiment</button>
'''))

# eval_js blocks until the Promise resolves
output.eval_js('''
new Promise((resolve) => {
    document.getElementById("continue-btn").onclick = () => {
        document.getElementById("continue-btn").disabled = true;
        document.getElementById("continue-btn").innerText = "Continuing...";
        resolve("clicked");
    };
})
''')

# Actually end the experiment after the button is clicked
experiment.end()
print("Done!")

## View TensorBoard Plots and Logs

After your experiment is ended, you can still view the full logs in TensorBoard:

In [None]:
# View final logs
%tensorboard --logdir {tensorboard_log_dir}

## View RapidFire AI Log Files

You can track the work being done by the system via the RapidFire AI-produced log files in logs/experiments/ folder.

In [None]:
# Get the experiment-specific log file
from IPython.display import display, Pretty
log_file = experiment.get_log_file_path()

display(Pretty(f"📄 Experiment Log File: {log_file}"))

if log_file.exists():
    display(Pretty("=" * 80))
    display(Pretty(f"Last 30 lines of {log_file.name}:"))
    display(Pretty("=" * 80))
    with open(log_file, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        for line in lines[-30:]:
            display(Pretty(line.rstrip()))
else:
    display(Pretty(f"❌ Log file not found: {log_file}"))

In [None]:
# Get the training-specific log file
log_file = experiment.get_log_file_path("training")

display(Pretty(f"📄 Training Log File: {log_file}"))

if log_file.exists():
    display(Pretty("=" * 80))
    display(Pretty(f"Last 30 lines of {log_file.name}:"))
    display(Pretty("=" * 80))
    with open(log_file, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        for line in lines[-30:]:
            display(Pretty(line.rstrip()))
else:
    display(Pretty(f"❌ Log file not found: {log_file}"))

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import evaluate
from tqdm import tqdm
import pandas as pd
import os
import gc

# ---------------------------------------------------------
# SETUP
# ---------------------------------------------------------
base_exp_path = f"{experiment_path}/{my_experiment}"
runs_to_eval = ["Baseline", "1", "2", "3", "4"]
final_results = []

BATCH_SIZE = 16

# Load Tokenizer once
model_id = "Qwen/Qwen2.5-1.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left" # <--- CRITICAL for batch generation

eval_subset = eval_dataset
references = [ex['output'] for ex in eval_subset]

# Pre-format all prompts to save time inside the loop
formatted_prompts = []
for example in eval_subset:
    user_input = example.get('input', "") or ""
    full_content = f"{example['instruction']}\n{user_input}".strip()
    prompt = f"<|im_start|>user\n{full_content}<|im_end|>\n<|im_start|>assistant\n"
    formatted_prompts.append(prompt)

# ---------------------------------------------------------
# EVALUATION LOOP (With BERTScore)
# ---------------------------------------------------------
# Load metrics *once* before the loop to save time
rouge = evaluate.load("rouge")
bertscore = evaluate.load("bertscore")

for run_id in runs_to_eval:
    print(f"\n" + "="*50)
    print(f"PROCESSING RUN: {run_id}")
    print("="*50)

    # 1. Clean Memory
    if 'model' in globals(): del model
    if 'base_model' in globals(): del base_model
    torch.cuda.empty_cache()
    gc.collect()

    # 2. Load Base Model
    base_model = AutoModelForCausalLM.from_pretrained(
        model_id,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True
    )

    # 3. Apply Adapter
    if run_id == "Baseline":
        model = base_model
    else:
        # Construct path (Adjust based on your folder structure)
        adapter_path = f"{base_exp_path}/runs/{run_id}/checkpoints/final_checkpoint"

        # Fallback check
        if not os.path.exists(adapter_path):
             chk_dir = f"{base_exp_path}/runs/{run_id}/checkpoints"
             if os.path.exists(chk_dir):
                 subdirs = [d for d in os.listdir(chk_dir) if d.startswith("checkpoint")]
                 if subdirs:
                     latest = sorted(subdirs, key=lambda x: int(x.split('-')[-1]))[-1]
                     adapter_path = f"{chk_dir}/{latest}"

        if not os.path.exists(adapter_path):
            print(f"⚠️  Adapter not found at {adapter_path}. Skipping.")
            continue

        print(f"   Loading Adapter from: {adapter_path}")
        model = PeftModel.from_pretrained(base_model, adapter_path)

    model.eval()

    # 4. Batched Generation
    print(f"   Generating answers (Batch Size: {BATCH_SIZE})...")
    predictions = []

    for i in tqdm(range(0, len(formatted_prompts), BATCH_SIZE)):
        batch_prompts = formatted_prompts[i : i + BATCH_SIZE]
        inputs = tokenizer(batch_prompts, return_tensors="pt", padding=True, truncation=True, max_length=1024).to(model.device)

        with torch.no_grad():
            outputs = model.generate(
                **inputs, max_new_tokens=128, temperature=0.6, do_sample=True, pad_token_id=tokenizer.pad_token_id
            )

        input_len = inputs.input_ids.shape[1]
        batch_responses = tokenizer.batch_decode(outputs[:, input_len:], skip_special_tokens=True)
        predictions.extend(batch_responses)

    # 5. Metrics Calculation
    print("Calculating Metrics (ROUGE + BERTScore)...")

    # ROUGE
    r_scores = rouge.compute(predictions=predictions, references=references, rouge_types=["rougeL"])

    # BERTScore (Runs on CPU to avoid OOM)
    b_scores = bertscore.compute(
        predictions=predictions,
        references=references,
        lang="en",
        model_type="distilbert-base-uncased",
        device="cpu"  # Keep this CPU!
    )
    import numpy as np
    avg_bert = np.mean(b_scores['f1'])

    stats = {
        "Run": run_id,
        "ROUGE-L": round(r_scores['rougeL'], 4),
        "BERTScore": round(avg_bert, 4),
        "Sample Answer": predictions[0][:150] + "..."
    }
    final_results.append(stats)
    print(f"  ROUGE: {stats['ROUGE-L']} | BERT: {stats['BERTScore']}")

# ---------------------------------------------------------
# FINAL SUMMARY TABLE
# ---------------------------------------------------------
print("\n" + "="*60)
print("🏆 FINAL COMPARISON REPORT")
print("="*60)
df = pd.DataFrame(final_results)
cols = ["Run", "ROUGE-L", "BERTScore", "Sample Answer"]
print(df[cols].to_markdown(index=False))