In [1]:
%env CUDA_VISIBLE_DEVICES=1
%env TOKENIZERS_PARALLELISM=false

env: CUDA_VISIBLE_DEVICES=1
env: TOKENIZERS_PARALLELISM=false


In [2]:
dataset_type = 'sleep'

In [3]:
BASE_PATH = "/home/stepan/cars-sleep-chatbot"
# MODEL_ID = f"{BASE_PATH}/models/{dataset_type}/llama-3_2-1b-it"
MODEL_ID = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit"
MAX_NEW_TOKENS = 8192
MAX_SEQ_LENGTH = 32768 - MAX_NEW_TOKENS

In [4]:
import json

import torch  # type: ignore
import numpy as np  # type: ignore

from datasets import DatasetDict, Dataset  # type: ignore

from unsloth import FastLanguageModel  # type: ignore

from tqdm.auto import tqdm  # type: ignore

from trl import SFTTrainer  # type: ignore
from transformers import TrainingArguments  # type: ignore
from unsloth import is_bfloat16_supported  # type: ignore
from datasets import Dataset, DatasetDict  # type: ignore

  from .autonotebook import tqdm as notebook_tqdm


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


In [5]:
PROMPTS = {
    'cars': {
        'system': "You are an expert in sleep science with in-depth knowledge of sleep physiology, circadian rhythms, sleep disorders, and the impact of sleep on health and cognitive performance. Your task is to generate insightful and varied answers on sleep-related topics. The answers should be diverse in complexity, suitable for learners and experts alike.",
        'basic': "Human: Generate me an answer to the given question: {question}\n\nAssistant:",
        'rag': "Use resources provided to answer the following question.\nResources: {resources}\n\nHuman: Generate me an answer to the given question: {question}\n\nAssistant:",
    },
    'sleep': {
        'system': "You are an expert in the history of automobiles with in-depth knowledge of the development of automobiles from the late 19th century to the present day. Your task is to generate insightful and varied answers on automobile history. The answers should be diverse in complexity, suitable for learners and experts alike.",
        'basic': "Human: Generate me an answer to the given question: {question}\n\nAssistant:",
        'rag': "Use resources provided to answer the following question.\nResources: {resources}\n\nHuman: Generate me an answer to the given question: {question}\n\nAssistant:",
    }
}

In [6]:
def get_model_tokenizer(dtype=None, load_in_4bit=True, add_lora=False):
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=MODEL_ID,
        max_seq_length=MAX_SEQ_LENGTH,
        dtype=dtype,
        load_in_4bit=load_in_4bit,
        attn_implementation="flash_attention_2",
        device_map="auto",
    )

    if add_lora:
        model = FastLanguageModel.get_peft_model(
            model,
            r=16,
            target_modules=[
                "q_proj",
                "k_proj",
                "v_proj",
                "o_proj",
                "gate_proj",
                "up_proj",
                "down_proj",
            ],
            lora_alpha=16,
            lora_dropout=0,
            bias="none",
            use_gradient_checkpointing="unsloth",
            random_state=3407,
            use_rslora=False,
            loftq_config=None,
        )

    return model, tokenizer

In [7]:
def train(f):
    def wrapper(model, tokenizer, *args, **kwargs):
        FastLanguageModel.for_training(model)
        return f(model, tokenizer, *args, **kwargs)

    return wrapper

In [8]:
model, tokenizer = get_model_tokenizer(add_lora=True)

==((====))==  Unsloth 2024.9.post3: Fast Llama patching. Transformers = 4.45.1.
   \\   /|    GPU: NVIDIA RTX A5000. Max memory: 23.679 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.4.1+cu121. CUDA = 8.6. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post1. FA2 = True]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


Unsloth 2024.9.post3 patched 16 layers with 16 QKV layers, 16 O layers and 16 MLP layers.


In [9]:
def load_data(file_path):
    with open(file_path, "r") as f:
        data = json.load(f)
    return data


def to_dataset(data):
    restructured_data = {
        "question": [],
        "answer": [],
    }

    for qna in data:
        restructured_data["question"].append(qna["question"])
        restructured_data["answer"].append(qna["answer"])

    return Dataset.from_dict(restructured_data)


def prepare_dataset(tokenizer, base_path=None, final_training=False):
    # Load all datasets
    training_cars = load_data(f"{base_path}/data/cars_qa.json")
    training_sleep = load_data(f"{base_path}/data/sleep_qa.json")
    
    test_cars = load_data(f"{base_path}/data/test_qa_car.json")
    test_sleep = load_data(f"{base_path}/data/test_qa_sleep.json")

    training_cars_dataset = to_dataset(training_cars)
    training_sleep_dataset = to_dataset(training_sleep)
    
    test_cars_dataset = to_dataset(test_cars)
    test_sleep_dataset = to_dataset(test_sleep)

    def create_chat(question, answer, dataset_type='cars'):
        system_content = PROMPTS[dataset_type]['system']
        user_content = PROMPTS[dataset_type]['basic'].format(question=question)

        messages = [
            {"role": "system", "content": system_content},
            {"role": "user", "content": user_content},
            {"role": "assistant", "content": answer}
        ]
        return messages

    def process_dataset(examples, dataset_type):
        chats = [create_chat(q, a, dataset_type) for q, a in zip(examples["question"], examples["answer"])]
        texts = [tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=False) for chat in chats]
        return {"texts": texts, "messages": chats}

    cars_train = training_cars_dataset.map(lambda x: process_dataset(x, 'cars'), batched=True)
    sleep_train = training_sleep_dataset.map(lambda x: process_dataset(x, 'sleep'), batched=True)
    
    cars_test = test_cars_dataset.map(lambda x: process_dataset(x, 'cars'), batched=True)
    sleep_test = test_sleep_dataset.map(lambda x: process_dataset(x, 'sleep'), batched=True)

    if final_training:
        cars_dataset = DatasetDict({
            "train": cars_train,
            "test": cars_test,
        })
        sleep_dataset = DatasetDict({
            "train": sleep_train,
            "test": sleep_test,
        })
    else:
        cars_train, cars_val = cars_train.train_test_split(test_size=0.3, seed=42).values()
        sleep_train, sleep_val = sleep_train.train_test_split(test_size=0.3, seed=42).values()

        cars_dataset = DatasetDict({
            "train": cars_train,
            "val": cars_val,
            "test": cars_test,
        })
        sleep_dataset = DatasetDict({
            "train": sleep_train,
            "val": sleep_val,
            "test": sleep_test,
        })

    return {"cars": cars_dataset, "sleep": sleep_dataset}

In [10]:
dataset = prepare_dataset(tokenizer, base_path=BASE_PATH, final_training=True)
dataset

Map: 100%|██████████| 118/118 [00:00<00:00, 13472.19 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 14889.49 examples/s]
Map: 100%|██████████| 26/26 [00:00<00:00, 7357.93 examples/s]
Map: 100%|██████████| 27/27 [00:00<00:00, 7490.32 examples/s]


{'cars': DatasetDict({
     train: Dataset({
         features: ['question', 'answer', 'texts', 'messages'],
         num_rows: 118
     })
     test: Dataset({
         features: ['question', 'answer', 'texts', 'messages'],
         num_rows: 26
     })
 }),
 'sleep': DatasetDict({
     train: Dataset({
         features: ['question', 'answer', 'texts', 'messages'],
         num_rows: 92
     })
     test: Dataset({
         features: ['question', 'answer', 'texts', 'messages'],
         num_rows: 27
     })
 })}

In [11]:
@train
def training(model, tokenizer, dataset, max_seq_length, dataset_type):
    common_args = {
        "model": model,
        "tokenizer": tokenizer,
        "train_dataset": dataset["train"],
        "dataset_text_field": "texts",
        "max_seq_length": max_seq_length,
        "dataset_num_proc": 2,
        "packing": False,
    }

    training_args = TrainingArguments(
        per_device_train_batch_size=8,
        gradient_accumulation_steps=1,
        logging_steps=100,
        warmup_steps=5,
        max_steps=25,
        learning_rate=2e-5,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir=f"{BASE_PATH}/models/{dataset_type}/llama-3_2-1b-it",
        save_strategy="steps",
        save_steps=250,
        save_total_limit=2,
    )

    if "val" in dataset:
        common_args["eval_dataset"] = dataset["val"]
        training_args.per_device_eval_batch_size = 1
        training_args.eval_strategy = "steps"
        training_args.eval_steps = 100
        training_args.metric_for_best_model = "eval_loss"
        training_args.save_best_model = True

    trainer = SFTTrainer(args=training_args, **common_args)
    stats = trainer.train()
    return trainer, stats

In [12]:
trainer, stats = training(model, tokenizer, dataset[dataset_type], max_seq_length=MAX_SEQ_LENGTH, dataset_type=dataset_type)
stats

Map (num_proc=2): 100%|██████████| 92/92 [00:00<00:00, 97.26 examples/s] 
max_steps is given, it will override any value given in num_train_epochs
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 92 | Num Epochs = 19
O^O/ \_/ \    Batch size per device = 1 | Gradient Accumulation steps = 8
\        /    Total batch size = 8 | Total steps = 200
 "-____-"     Number of trainable parameters = 11,272,192


Step,Training Loss
100,1.0458
200,0.5638


TrainOutput(global_step=200, training_loss=0.8048007583618164, metrics={'train_runtime': 180.4266, 'train_samples_per_second': 8.868, 'train_steps_per_second': 1.108, 'total_flos': 5352882249707520.0, 'train_loss': 0.8048007583618164, 'epoch': 17.391304347826086})

In [13]:
trainer.save_model(f"{BASE_PATH}/models/{dataset_type}/llama-3_2-1b-it")

In [13]:
! zip -r {BASE_PATH}/models/cars.zip {BASE_PATH}/models/cars
! zip -r {BASE_PATH}/models/sleep.zip {BASE_PATH}/models/sleep

  adding: home/stepan/kaggle-arc-agi/models/cars/ (stored 0%)
  adding: home/stepan/kaggle-arc-agi/models/cars/llama-3_2-1b-it/ (stored 0%)
  adding: home/stepan/kaggle-arc-agi/models/cars/llama-3_2-1b-it/checkpoint-200/ (stored 0%)
  adding: home/stepan/kaggle-arc-agi/models/cars/llama-3_2-1b-it/checkpoint-200/optimizer.pt (deflated 11%)
  adding: home/stepan/kaggle-arc-agi/models/cars/llama-3_2-1b-it/checkpoint-200/training_args.bin (deflated 51%)
  adding: home/stepan/kaggle-arc-agi/models/cars/llama-3_2-1b-it/checkpoint-200/adapter_config.json (deflated 54%)
  adding: home/stepan/kaggle-arc-agi/models/cars/llama-3_2-1b-it/checkpoint-200/special_tokens_map.json (deflated 71%)
  adding: home/stepan/kaggle-arc-agi/models/cars/llama-3_2-1b-it/checkpoint-200/rng_state.pth (deflated 25%)
  adding: home/stepan/kaggle-arc-agi/models/cars/llama-3_2-1b-it/checkpoint-200/trainer_state.json (deflated 57%)
  adding: home/stepan/kaggle-arc-agi/models/cars/llama-3_2-1b-it/checkpoint-200/tokenizer