In [None]:
from unsloth import FastLanguageModel
import torch

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = True,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

In [None]:

from unsloth.chat_templates import get_chat_template
import json
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
    mapping = {"role" : "from", "content" : "value", "user" : "speaker0", "assistant" : "speaker1"} # for some reason does not work when chate template is llama

)

def formatting_prompts_func(examples):
    convos = examples["conversations"]
    json_convs = [json.loads(convo) for convo in convos]
    for conversation in json_convs:
        for utterance_dict in conversation:
            utterance_dict["role"] = utterance_dict["from"]
            del utterance_dict["from"]

            utterance_dict["content"] = utterance_dict["value"]
            del utterance_dict["value"]

            if utterance_dict["role"] == "speaker0":
                utterance_dict["role"] = "user"
            else:
                utterance_dict["role"] = "assistant"
            
    texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in json_convs]
    return { "text" : texts, }
pass

from datasets import load_dataset
dataset = load_dataset(dataset_name, split = "train")
dataset = dataset.map(formatting_prompts_func, batched = True,)

# for convo in dataset['conversations']:
#     try:
#         texts = tokenizer.apply_chat_template(json.loads(convo), tokenize = False, add_generation_prompt = False) 
#     except:
#         print(convo)
    

In [None]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
)

In [None]:
tokenizer.decode(trainer.train_dataset[5]["input_ids"])

In [None]:
#@title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

In [None]:
trainer_stats = trainer.train()