In [None]:
!pip install -q -U bitsandbytes transformers peft accelerate datasets trl

In [None]:
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM, 
    AutoTokenizer, 
    BitsAndBytesConfig, 
    TrainingArguments
)
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
from trl import SFTTrainer

# Assuming you uploaded 'workflow_dataset.jsonl' to your Colab workspace
dataset = load_dataset("json", data_files="workflow_dataset.jsonl", split="train")

print(f"Loaded {len(dataset)} training examples.")

In [None]:
# We'll use Mistral-7B as it is excellent at JSON/coding tasks
model_id = "mistralai/Mistral-7B-Instruct-v0.2"

# Configure 4-bit quantization to fit in Colab's 16GB VRAM
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Load Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, add_eos_token=True)
tokenizer.pad_token = tokenizer.eos_token

# Load Model
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto"
)
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [None]:
# Apply LoRA to specific model layers to train efficiently
lora_config = LoraConfig(
    r=16, 
    lora_alpha=32, 
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

peft_model = get_peft_model(model, lora_config)
peft_model.print_trainable_parameters()

In [None]:
# Set up training arguments
training_args = TrainingArguments(
    output_dir="./workflow-schema-lora",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    optim="paged_adamw_8bit",
    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    save_strategy="epoch",
    logging_steps=10,
    num_train_epochs=3, # Adjust based on dataset size
    max_steps=200,      # Remove or adjust to train on the full dataset
    fp16=True,
)

# Initialize Trainer
trainer = SFTTrainer(
    model=peft_model,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=512, # Keeps memory usage low
    args=training_args,
)

# Start Fine-tuning!
trainer.train()

In [None]:
# Save the LoRA adapters
new_model_name = "workflow-mistral-finetuned"
trainer.model.save_pretrained(new_model_name)
tokenizer.save_pretrained(new_model_name)

print(f"Model successfully saved to {new_model_name}")

# Quick Test
prompt = "<s>[INST] Convert this instruction into a workflow schema: Pull the weekly AWS billing data and send an alert if it exceeds budget. [/INST]"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

outputs = peft_model.generate(**inputs, max_new_tokens=200)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
from huggingface_hub import notebook_login
import torch
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer

# 1. Log in to Hugging Face (this will prompt you for your HF write token)
notebook_login()

# 2. Define your repository name (change 'your-username' to your actual HF username)
hf_repo_name = "your-username/workflow-orchestrator-mistral"

print("Loading PEFT model and merging with base model...")
# 3. Load the model and merge it (requires reloading in 16-bit to merge properly)
model = AutoPeftModelForCausalLM.from_pretrained(
    "./workflow-schema-lora", # The directory where Trainer saved the checkpoints
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,
)
merged_model = model.merge_and_unload()

# 4. Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("./workflow-schema-lora")

print(f"Pushing merged model to Hugging Face Hub: {hf_repo_name}...")
# 5. Push to Hub
merged_model.push_to_hub(hf_repo_name)
tokenizer.push_to_hub(hf_repo_name)

print("Upload complete! Your model is now live on Hugging Face.")