In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from google.colab import userdata
wandb_token = userdata.get("WANDB_TOKEN")

!wandb login {wandb_token}

In [None]:
!pip install -U datasets huggingface_hub fsspec --quiet

In [None]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
    !pip install --no-deps unsloth

In [None]:
# ⬇️  Run this cell by itself in Colab
from huggingface_hub import login
hf_token = userdata.get("HF_TOKEN")

# 1️⃣  Authenticate (generate a write-token at https://huggingface.co/settings/tokens)
login(hf_token)

In [None]:
VERSION=6
SIZE="14B"

In [None]:
from unsloth import FastLanguageModel
import torch

fourbit_models = [
    "unsloth/Qwen3-1.7B-unsloth-bnb-4bit", # Qwen 14B 2x faster
    "unsloth/Qwen3-4B-unsloth-bnb-4bit",
    "unsloth/Qwen3-8B-unsloth-bnb-4bit",
    "unsloth/Qwen3-14B-unsloth-bnb-4bit",
    "unsloth/Qwen3-32B-unsloth-bnb-4bit",

    # 4bit dynamic quants for superior accuracy and low memory use
    "unsloth/gemma-3-12b-it-unsloth-bnb-4bit",
    "unsloth/Phi-4",
    "unsloth/Llama-3.1-8B",
    "unsloth/Llama-3.2-3B",
    "unsloth/orpheus-3b-0.1-ft-unsloth-bnb-4bit" # [NEW] We support TTS models!
] # More models at https://huggingface.co/unsloth

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = f"unsloth/Qwen3-{SIZE}",
    max_seq_length = 5500,   # Context length - can be longer, but uses more memory
    load_in_4bit = False,     # 4bit uses much less memory
    load_in_8bit = False,    # A bit more accurate, uses 2x memory
    full_finetuning = False, # We have full finetuning now!
    # token = "hf_...",      # use one if using gated models
)

In [None]:
lora_r = 16

model = FastLanguageModel.get_peft_model(
    model,
    r = lora_r,           # Choose any number > 0! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = lora_r * 2,  # Best to choose alpha = rank or rank*2
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,   # We support rank stabilized LoRA
    loftq_config = None,  # And LoftQ
)

In [None]:
tokenizer.eos_token

In [None]:
from datasets import load_dataset, concatenate_datasets

In [None]:
from jinja2 import Template

SYSTEM_PROMPT = Template("""\
{{ role }}

# Metadata

{{ metadata }}

# Tools

You are provided with function signatures within <tools></tools> XML tags:
<tools>
{% for item in functions %}
{{- item }}
{% endfor %}</tools>

# Important

- Reply in 1-4 sentences
- Sound natural
- Respect your persona
- Show genuine curiosity
- Ask follow-up questions""")

In [None]:
DATASET = "nuriyev/cpdc-agent"

In [None]:
from huggingface_hub import HfApi

api = HfApi()
dataset_info = api.dataset_info(DATASET)
dataset_commit_hash = dataset_info.sha

print(f"Dataset commit hash: {dataset_commit_hash}")

In [None]:
import json
from datasets import Dataset

train_dataset = load_dataset(DATASET, split="train", token=True)
eval_dataset  = load_dataset(DATASET, split="test", token=True)

def preprocess(example):
    worldview = example["worldview"]
    persona = json.loads(example["persona"])
    knowledge = json.loads(example["knowledge"])
    state = json.loads(example["state"])
    functions = json.loads(example["functions"])
    messages = [
      {"role": "system", "content": SYSTEM_PROMPT.render(
          role=example["role"],
          metadata=json.dumps({
              "worldview": worldview,
              "persona": persona,
              "knowledge": knowledge,
              "state": state,
          }, ensure_ascii=False),
          functions=[json.dumps({"name": f["name"], "description": f["description"]}, ensure_ascii=False) for f in functions]
      )},
      *example["messages"]
    ]
    return {"messages": messages}

train_dataset = train_dataset.map(preprocess)
train_dataset = train_dataset.select_columns(["messages"])
eval_dataset  = eval_dataset.map(preprocess)
eval_dataset  = eval_dataset.select_columns(["messages"])

def convert_set_to_input_labels(dataset):
  new_dataset = []
  for conv in dataset:
      messages = [conv["messages"][0]]
      try:
        for i in range(1, len(conv["messages"]), 2):
            user_message = conv["messages"][i]
            assistant_message = conv["messages"][i + 1]
            messages.append(user_message)
            new_dataset.append(
                {
                    "prompt": messages.copy(),
                    "completion": [assistant_message],
                }
            )
            messages.append(assistant_message)
      except IndexError as e:
        print(e)
        print(conv)

  dataset = new_dataset
  new_dataset = []
  for conv in dataset:
    if len(conv["prompt"]) <= 2:
      new_dataset.append(conv)
      continue

    prompt = [conv["prompt"][0]]
    for p in conv["prompt"][1:-2]:
      if "<tool_call>" not in p['content'] and "<tool_response>" not in p['content']:
        prompt.append(p)
    prompt.append(conv["prompt"][-2])
    prompt.append(conv["prompt"][-1])
    new_dataset.append(
        {
            "prompt": prompt,
            "completion": conv["completion"],
        }
    )

  dataset = new_dataset
  new_dataset = []

  for conv in dataset:
    if "<tool_call>" in conv["completion"][-1]['content']:
      continue
    new_dataset.append(conv)


  return Dataset.from_list(new_dataset)

train_dataset = convert_set_to_input_labels(train_dataset)
eval_dataset  = convert_set_to_input_labels(eval_dataset)

In [None]:
len(train_dataset)

In [None]:
print(train_dataset[401]['prompt'][0]['content'])

In [None]:
index = 102
{"prompt":train_dataset[index]['prompt'][1:], "completion": train_dataset[index]['completion']}

In [None]:
index = 103
{"prompt":train_dataset[index]['prompt'][1:], "completion": train_dataset[index]['completion']}

In [None]:
index = 104
{"prompt":train_dataset[index]['prompt'][1:], "completion": train_dataset[index]['completion']}

In [None]:
# assert that all completions are of "role":"assistant"
for conv in train_dataset:
    assert all(msg["role"] == "assistant" for msg in conv["completion"])

for conv in eval_dataset:
    assert all(msg["role"] == "assistant" for msg in conv["completion"])

In [None]:
def preprocess(example):
    prompt_str = tokenizer.apply_chat_template(
        example['prompt'],
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=False
    )

    return {"prompt": prompt_str, "completion": example["completion"][-1]["content"]}

train_dataset = train_dataset.map(preprocess)
eval_dataset  = eval_dataset.map(preprocess)

In [None]:
def formatting_func(examples):
    return [examples['prompt'][i] + examples['completion'][i] for i in range(len(examples))]

In [None]:
OUTPUT_DIR = f"/content/drive/My Drive/cpdc/models/qwen-{SIZE}-finetuned_persona_v{VERSION}"

In [None]:
# from transformers import EarlyStoppingCallback
from trl import SFTTrainer, SFTConfig

# callbacks = [EarlyStoppingCallback(
#     early_stopping_patience = 3,   # 3 eval‑checks with no improvement
#     early_stopping_threshold = 0.0 # need strictly better eval_loss
# )]

trainer = SFTTrainer(
    model = model,
    formatting_func = formatting_func,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
    eval_dataset = eval_dataset,
    # callbacks=callbacks,
    args = SFTConfig(
        num_train_epochs   = 2,   # set high, ES will stop before
        output_dir=OUTPUT_DIR,
        # load_best_model_at_end = True,
        # metric_for_best_model = "eval_loss",
        per_device_train_batch_size = 4,
        gradient_accumulation_steps = 4, # Use GA to mimic batch size!
        # warmup_ratio=0.15,
        warmup_steps=10,
        learning_rate = 1e-4,
        eval_strategy="steps",
        save_strategy="steps",
        logging_steps=30,
        save_steps=20,
        eval_steps=20,
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        report_to = "wandb", # Use this for WandB etc
    ),
)

In [None]:
# @title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

In [None]:
trainer_stats = trainer.train()
# trainer_stats = trainer.train(resume_from_checkpoint = True)

In [None]:
# @title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(
    f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training."
)
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

In [None]:
del train_dataset, eval_dataset

In [None]:
REPO_ID = f"nuriyev/qwen3-{SIZE}-cpdc-persona-lora"
model.push_to_hub(REPO_ID, tokenizer, save_method = "lora", token = hf_token, commit_message="fine-tune, new sys prompt (2 epochs)")

In [None]:
REPO_ID = f"nuriyev/qwen3-{SIZE}-cpdc-persona"
model.push_to_hub_merged(REPO_ID, tokenizer, save_method = "merged_16bit", token = hf_token, commit_message="mini fine-tune")

In [None]:
from huggingface_hub import HfApi

api = HfApi()

# Get the latest commit hash on the 'main' branch
commit_info = api.list_repo_commits(repo_id=REPO_ID, revision="main")
commit_hash = commit_info[0].commit_id

print(f"✅  Uploaded to https://huggingface.co/{REPO_ID}")
print(f"📌  Commit hash: {commit_hash}")
print(f"🔗  Full commit URL: {commit_info}")