In [None]:
%%writefile requirements_colab.txt
torch>=2.2.0
transformers>=4.38.0
datasets>=2.17.0
peft>=0.18.1
bitsandbytes>=0.43.0
accelerate>=0.30.0
scipy>=1.11.0

In [None]:
!pip install git+https://github.com/huggingface/trl

In [2]:
import os
# Create the directory for the script
os.makedirs('src', exist_ok=True)

In [None]:
%%writefile src/train_colab.py
import os
import sys
import argparse
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    DataCollatorForLanguageModeling
)
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
from trl import SFTTrainer

def log(msg):
    print(f"[NanoSentri-Train]: {msg}")

def format_phi3_prompt(sample):
    instruction = sample['instruction']
    context = sample.get('input', '')
    response = sample['output']

    if context:
        user_content = f"{instruction}\n\nTechnical Context:\n{context}"
    else:
        user_content = instruction

    text = f"<|user|>\n{user_content} <|end|>\n<|assistant|>\n{response} <|end|>"
    return {"text": text}

def main():
    parser = argparse.ArgumentParser(description="Phi-3 QLoRA Trainer")
    parser.add_argument("--data_path", type=str, required=True, help="Path to JSONL dataset")
    parser.add_argument("--output_dir", type=str, default="./phi3-vaisala-adapter", help="Where to save adapters")
    parser.add_argument("--base_model", type=str, default="microsoft/Phi-3-mini-4k-instruct", help="HF Model ID")
    args = parser.parse_args()

    log(f"Initializing Training Pipeline on {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")

    bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

    log(f"Loading base model: {args.base_model}...")
    model = AutoModelForCausalLM.from_pretrained(
        args.base_model,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True,
        attn_implementation="eager"
    )

    model.gradient_checkpointing_enable()
    model = prepare_model_for_kbit_training(model)

    tokenizer = AutoTokenizer.from_pretrained(args.base_model, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.unk_token
    tokenizer.padding_side = "right"

    peft_config = LoraConfig(
        r=16,
        lora_alpha=32,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=["qkv_proj", "o_proj", "gate_up_proj", "down_proj"]
    )

    log("Loading and formatting dataset...")
    dataset = load_dataset("json", data_files=args.data_path, split="train")
    dataset = dataset.map(format_phi3_prompt)

    log(f"Sample formatted entry:\n{dataset[0]['text']}")

    # Tokenize the dataset with truncation
    log("Tokenizing dataset...")
    def tokenize_function(examples):
        return tokenizer(
            examples["text"],
            truncation=True,
            padding="max_length",
            max_length=512,
            return_tensors=None
        )

    tokenized_dataset = dataset.map(
        tokenize_function,
        batched=True,
        remove_columns=dataset.column_names
    )

    training_args = TrainingArguments(
        output_dir=args.output_dir,
        per_device_train_batch_size=4,
        gradient_accumulation_steps=4,
        learning_rate=2e-4,
        logging_steps=10,
        max_steps=100,
        save_steps=50,
        fp16=False,
        bf16=True,
        optim="paged_adamw_8bit",
        report_to="none",
        gradient_checkpointing=True,
    )

    # Data collator for language modeling
    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer,
        mlm=False,  # Not masked language modeling
    )

    # SFTTrainer for TRL 0.7.10 - minimal parameters
    trainer = SFTTrainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset,
        peft_config=peft_config,
        data_collator=data_collator,
    )

    log("Starting training...")
    trainer.train()

    log(f"Training complete. Saving adapters to {args.output_dir}")
    trainer.model.save_pretrained(args.output_dir)
    tokenizer.save_pretrained(args.output_dir)

if __name__ == "__main__":
    main()

In [None]:
!pip install -r requirements_colab.txt
!pip install flash_attn --no-build-isolation # Optional speedup

In [None]:
!python src/train_colab.py --data_path "vaisala_synthetic_train.jsonl" --output_dir "phi3-vaisala-adapter"

In [None]:
import shutil
from google.colab import files

# Create a zip file of the adapter folder
shutil.make_archive('phi3-vaisala-adapter', 'zip', 'phi3-vaisala-adapter')

# Trigger the browser download
files.download('phi3-vaisala-adapter.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Merge -> Export -> Quantize pipeline

In [None]:
%%writefile requirements_export.txt
# Core Stack
transformers>=4.41.2
peft>=0.11.0
accelerate>=0.30.0

# Optimum with ONNX Runtime GPU support
optimum[onnxruntime-gpu]>=1.20.0

# Utilities
protobuf>=3.20.3,<6.0.0
scipy

In [None]:
# Install everything from requirements
!pip install --upgrade --upgrade-strategy eager -r requirements_export.txt

In [None]:
%%writefile src/merge.py
import argparse
import torch
import shutil
import gc
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer

# 1. Setup
parser = argparse.ArgumentParser()
parser.add_argument("--adapter_dir", type=str, required=True)
args = parser.parse_args()

base_model_id = "microsoft/Phi-3-mini-4k-instruct"
merged_path = "./merged_model_tmp"

print(f">>> [Phase 1] Loading Base Model: {base_model_id}...")
# Load in FP16 to keep RAM usage lower
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

print(f">>> [Phase 1] Loading Adapter from {args.adapter_dir}...")
model = PeftModel.from_pretrained(base_model, args.adapter_dir)

print(">>> [Phase 1] Merging weights...")
model = model.merge_and_unload()

print(f">>> [Phase 1] Saving merged model to disk: {merged_path}...")
model.save_pretrained(merged_path)
tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)
tokenizer.save_pretrained(merged_path)

print(">>> SUCCESS. Now RESTART your runtime to clear RAM.")

In [None]:
!python src/merge.py --adapter_dir "phi3-vaisala-adapter"

In [None]:
import shutil
import os
from google.colab import files

print(">>> Zipping the merged model (approx 4-5 GB)...")
print("    This may take 2-3 minutes to zip and prepare for download.")

# Zip the folder
shutil.make_archive("phi3_merged_model", 'zip', "merged_model_tmp")

print(">>> Download starting...")
print("    ⚠️ IMPORTANT: Do not close this tab until the download finishes.")
files.download("phi3_merged_model.zip")