In [None]:
from unsloth import FastLanguageModel, is_bfloat16_supported
import torch
from datasets import load_dataset

from trl import SFTTrainer
from transformers import TrainingArguments

import json
import os

In [2]:
# base_model = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"
base_model = "unsloth/meta-llama-3.1-8b-instruct-bnb-4bit"

max_seq_length = 2048

tuned_model = f"{base_model.replace('/', '_')}-qlora-social-media"

tuned_model_top_dir = f"model_{tuned_model}"
tuned_model_save_dir = f"{tuned_model_top_dir}/model_save"
tuned_model_gguf_dir = f"{tuned_model_top_dir}/model_gguf"
tuned_model_checkpoints_dir = f"{tuned_model_top_dir}/checkpoints"
tuned_model_logs_dir = f"{tuned_model_top_dir}/logs"

os.makedirs(tuned_model_top_dir, exist_ok=True)

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=base_model,
    max_seq_length=max_seq_length,
    dtype=None,
    load_in_4bit=True,
    load_in_8bit=False,
    full_finetuning=False,
    # use_exact_model_name=True
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=42,
    use_rslora=True,
    loftq_config=None,
)

In [None]:
n_example = 4

dataset = load_dataset("csv", name="csv-for-llama3.1", split="train", data_files="conversations.csv")
dataset = dataset.shuffle(seed=42)
print(dataset.column_names)
print(json.dumps(dataset[n_example], indent=2))
print()

alpaca_prompt = """You are a social media user. Respond to the comment you are shown.

### Instruction:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token
BOS_TOKEN = tokenizer.bos_token


def formatting_prompts_func(examples):
    instructions = examples["parent_text"]
    outputs = examples["comment_body"]
    texts = []
    for instruction, output in zip(instructions, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = BOS_TOKEN + alpaca_prompt.format(instruction, output) + EOS_TOKEN
        texts.append(text)
    return {
        "text": texts,
    }


dataset = dataset.map(
    formatting_prompts_func,
    batched=True,
)
dataset = dataset.remove_columns(["timestamp", "comment_id", "parent_text", "comment_body"])
print(json.dumps(dataset[n_example], indent=2))

In [None]:
dataset_length = len(dataset)
batch_size = 2
accum_steps = 4

training_steps = dataset_length // (batch_size * accum_steps)
warmup_steps = training_steps // 10

print(f"dataset length: {dataset_length}")
print(f"training steps: {training_steps}")
print(f"warmup steps:   {warmup_steps}")

In [None]:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=False,
    args=TrainingArguments(
        # adjust batch size and gradient steps for the size of the model
        per_device_train_batch_size=batch_size,
        gradient_accumulation_steps=accum_steps,
        save_strategy="steps",
        logging_steps=1,
        logging_strategy="steps",
        #
        # full training run
        #
        num_train_epochs=1,
        warmup_steps=warmup_steps,
        save_steps=500,
        #
        # short training run
        #
        # max_steps=50,
        # warmup_steps=5,
        # save_steps=10,
        #
        # end of training run length definition
        learning_rate=1e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=42,
        output_dir=tuned_model_checkpoints_dir,
    ),
)

In [None]:
# if any f"outputs/checkpoint-*" exists, resume from it
if any(f.startswith("checkpoint-") for f in os.listdir(tuned_model_checkpoints_dir)):
    print("Resuming from checkpoint")
    trainer_stats = trainer.train(resume_from_checkpoint=True)
else:
    print("Starting from scratch")
    trainer_stats = trainer.train()

In [None]:
model.save_pretrained(tuned_model_save_dir)
tokenizer.save_pretrained(tuned_model_save_dir)

Generating an Ollama-compatible GGUF also requires the creation of a Modelfile.

To automate the creation of the Modelfile, you must at some point before here run `tokenizer.apply_chat_template()`. This notebook does not do that. Therefore, you must create the Modelfile manually.

Look at the Gemma 3 notebook for hints.

In [10]:
model.save_pretrained_gguf(tuned_model_gguf_dir, tokenizer, quantization_method="q4_0")

In [None]:
print(json.dumps(trainer_stats.metrics, indent=2))