In [None]:
!pip install transformers==4.40.2
!pip install peft==0.11.1
!pip install accelerate==0.30.1
!pip install bitsandbytes




In [None]:
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer
)
from peft import LoraConfig, get_peft_model
import torch


MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
DATA_PATH = "/content/humor_3cat.jsonl"
OUTPUT_DIR = "/content/humor_phi3_lora"


dataset = load_dataset("json", data_files=DATA_PATH, split="train")


tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token


def format_example(example):
    example["text"] = (
        f"<|user|>\n{example['instruction']}\n"
        f"<|assistant|>\n{example['output']}"
    )
    return example

dataset = dataset.map(format_example)


model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    load_in_4bit=True,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True
)


lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj","k_proj","v_proj","o_proj"],
    lora_dropout=0.1,
    task_type="CAUSAL_LM",
)


model = get_peft_model(model, lora_config)


training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=1.5,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=16,
    learning_rate=1e-4,
    warmup_ratio=0.05,
    lr_scheduler_type="cosine",
    logging_steps=20,
    save_steps=500,
    save_total_limit=2,
    bf16=True,
    optim="paged_adamw_32bit",
    report_to="none",
    remove_unused_columns=False,
)


def collate_fn(batch):
    texts = [b["text"] for b in batch]
    tokens = tokenizer(
        texts,
        truncation=True,
        padding=True,
        max_length=512,
        return_tensors="pt"
    )
    tokens["labels"] = tokens["input_ids"].clone()
    return tokens


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    data_collator=collate_fn,
)


trainer.train()


model.save_pretrained(OUTPUT_DIR)
print("Finetuning complete!")


Generating train split: 0 examples [00:00, ? examples/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Map:   0%|          | 0/18000 [00:00<?, ? examples/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



Step,Training Loss
20,4.8377
40,3.467
60,2.1867
80,1.9107
100,1.8128
120,1.8274
140,1.8147
160,1.8063
180,1.8025
200,1.7592




Finetuning complete!


In [None]:
import os

BASE_MODEL = "microsoft/Phi-3-mini-4k-instruct"
LORA_PATH = "/content/humor_phi3_lora"
OUT_DIR    = "/content/phi3_humor_merged"

os.makedirs(OUT_DIR, exist_ok=True)

print("Loading base model...")
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="cuda",
    trust_remote_code=True
)

print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)

print("Loading LoRA adapter...")
model = PeftModel.from_pretrained(model, LORA_PATH)

print("Merging LoRA...")
model = model.merge_and_unload()
model = model.to("cpu")

print("Saving merged model...")
model.save_pretrained(OUT_DIR, safe_serialization=True)
tokenizer.save_pretrained(OUT_DIR)

print("MERGE COMPLETE!")




Loading base model...




Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading tokenizer...


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading LoRA adapter...
Merging LoRA...
Saving merged model...
MERGE COMPLETE!


In [None]:
#Chat function for prompting

MODEL_DIR = "/content/drive/MyDrive/phi3_humor_merged"

tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR, trust_remote_code=True)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_DIR,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True,
)

def chat(prompt):

    full_prompt = f"<|user|>\n{prompt}\n<|assistant|>\n"
    inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)


    input_len = inputs["input_ids"].shape[1]


    outputs = model.generate(
        **inputs,
        max_new_tokens=70,
        do_sample=True,
        temperature=0.9,
        top_p=0.9,
        repetition_penalty=1.2,
        no_repeat_ngram_size=4,
    )


    new_tokens = outputs[0][input_len:]
    text = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()

    print(text)





Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

