In [None]:
!pip install -q transformers datasets accelerate peft huggingface_hub

import os
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model

In [None]:
ds = load_dataset("rsilveira79/soprano_dpo_pairs", split="train")
ds = ds.rename_columns({"question": "instruction", "chosen": "output"})


def format_sample(example):
    return {
        "text": f"Instruction: {example['instruction']}\nResponse: {example['output']}"
    }

ds = ds.map(format_sample)


model_name = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

def tokenize(example):
    tok = tokenizer(
        example["text"],
        truncation=True,
        padding="max_length",
        max_length=256
    )
    tok["labels"] = tok["input_ids"]
    return tok

tokenized_ds = ds.map(tokenize, batched=True)
tokenized_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/368 [00:00<?, ?B/s]

data/train-00000-of-00001.parquet:   0%|          | 0.00/639k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/500 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

model.config.use_cache = False

config.json:   0%|          | 0.00/735 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/564M [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [None]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    target_modules=["q_proj", "v_proj"],
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 2,621,440 || all params: 2,782,305,280 || trainable%: 0.0942


In [None]:
training_args = TrainingArguments(
    output_dir="./soprano_mistral_lora_fp16",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,
    bf16=False,
    logging_steps=10,
    save_total_limit=2,
    report_to="none"
)

In [None]:
os.environ["WANDB_DISABLED"] = "true"
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_ds
)

The model is already on multiple devices. Skipping the move to device specified in `args`.


In [None]:
trainer.train()


Step,Training Loss
10,3.4557
20,2.6312
30,2.2885
40,2.2038
50,2.1677
60,2.1648
70,2.0307
80,2.0947
90,2.0122
100,2.0113


TrainOutput(global_step=189, training_loss=2.160257904617875, metrics={'train_runtime': 390.7849, 'train_samples_per_second': 3.838, 'train_steps_per_second': 0.484, 'total_flos': 6108441477120000.0, 'train_loss': 2.160257904617875, 'epoch': 3.0})

In [None]:
model.save_pretrained("./loraAdapter")

In [None]:
# output_dir="./soprano_mistral_lora_fp16"
from peft import PeftModel

base_model_name = "microsoft/phi-2"
adapter_path = "./loraAdapter"

tokenizer = AutoTokenizer.from_pretrained(base_model_name)
tokenizer.pad_token = tokenizer.eos_token


In [None]:
# Load base model + LoRA adapter
model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

model = PeftModel.from_pretrained(model, adapter_path)
model.eval()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): PhiForCausalLM(
      (model): PhiModel(
        (embed_tokens): Embedding(51200, 2560)
        (layers): ModuleList(
          (0-31): 32 x PhiDecoderLayer(
            (self_attn): PhiAttention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=2560, out_features=2560, bias=True)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2560, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=2560, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): Linear(in_features=2560

In [None]:
def generate_response(instruction, max_new_tokens=120):
    prompt = (
        f"Instruction: {instruction}\n"
        f"Write 1–3 complete sentences.\n"
        f"Response:\n"
    )

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=1.1,     # high randomness preserved
            top_p=0.95,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id
        )

    text = tokenizer.decode(output[0], skip_special_tokens=True)
    if "Response" in text:
        text = text.split("Response", 1)[1]
    return text.strip()

In [None]:
print(generate_response("How far away is the sun?"))

:
The sun's about 93 million miles from where we're standing right now. And remember, that ain't the distance we gotta cover to hop on a flight, that's its actual walkin' route. This ain't some magic bullet to fly from New Jersey to Hawaii for me.

More about the sun? Yeah, we gotta dig deeper.

We got a guy by the name of Ernest Ball, and he came up with the ball value for that sun, around 4.567 x 10^30 kg (that's a lot of zeros if you ask me).


In [None]:
!zip -r soprano_adapter.zip soprano_mistral_lora_fp16

  adding: soprano_mistral_lora_fp16/ (stored 0%)
  adding: soprano_mistral_lora_fp16/checkpoint-189/ (stored 0%)
  adding: soprano_mistral_lora_fp16/checkpoint-189/README.md (deflated 65%)
  adding: soprano_mistral_lora_fp16/checkpoint-189/scheduler.pt (deflated 62%)
  adding: soprano_mistral_lora_fp16/checkpoint-189/adapter_model.safetensors (deflated 8%)
  adding: soprano_mistral_lora_fp16/checkpoint-189/rng_state.pth (deflated 26%)
  adding: soprano_mistral_lora_fp16/checkpoint-189/optimizer.pt (deflated 8%)
  adding: soprano_mistral_lora_fp16/checkpoint-189/trainer_state.json (deflated 74%)
  adding: soprano_mistral_lora_fp16/checkpoint-189/adapter_config.json (deflated 56%)
  adding: soprano_mistral_lora_fp16/checkpoint-189/training_args.bin (deflated 54%)
  adding: soprano_mistral_lora_fp16/checkpoint-189/scaler.pt (deflated 64%)


In [None]:
from google.colab import files
files.download('soprano_adapter.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
!ls /content


sample_data  soprano_mistral_lora_fp16


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

BASE = "microsoft/phi-2"
ADAPTER = "./soprano_mistral_lora_fp16"  # your uploaded adapter folder

# Load base Phi-2
tokenizer = AutoTokenizer.from_pretrained(BASE)
model = AutoModelForCausalLM.from_pretrained(BASE, torch_dtype=torch.float16)

# Load adapter on top of base model
model = PeftModel.from_pretrained(model, ADAPTER)

# Merge LoRA → single consolidated model
merged = model.merge_and_unload()

# Save merged model
SAVE = "phi2_soprano_merged"
merged.save_pretrained(SAVE, safe_serialization=True)
tokenizer.save_pretrained(SAVE)

print("Merged model saved to:", SAVE)

config.json:   0%|          | 0.00/735 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/564M [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Merged model saved to: phi2_soprano_merged


In [None]:
!zip -r phi2_soprano_merged.zip phi2_soprano_merged

  adding: phi2_soprano_merged/ (stored 0%)
  adding: phi2_soprano_merged/config.json (deflated 48%)
  adding: phi2_soprano_merged/special_tokens_map.json (deflated 75%)
  adding: phi2_soprano_merged/vocab.json (deflated 59%)
  adding: phi2_soprano_merged/tokenizer.json (deflated 82%)
  adding: phi2_soprano_merged/tokenizer_config.json (deflated 94%)
  adding: phi2_soprano_merged/model-00002-of-00002.safetensors (deflated 8%)
  adding: phi2_soprano_merged/model.safetensors.index.json (deflated 96%)
  adding: phi2_soprano_merged/added_tokens.json (deflated 84%)
  adding: phi2_soprano_merged/generation_config.json (deflated 24%)
  adding: phi2_soprano_merged/model-00001-of-00002.safetensors (deflated 8%)
  adding: phi2_soprano_merged/merges.txt (deflated 53%)


In [None]:
!ls -R /content/phi2_soprano_merged

/content/phi2_soprano_merged:
added_tokens.json	model-00001-of-00002.safetensors  tokenizer_config.json
config.json		model-00002-of-00002.safetensors  tokenizer.json
generation_config.json	model.safetensors.index.json	  vocab.json
merges.txt		special_tokens_map.json


In [None]:
from google.colab import drive
drive.mount('/content/drive')