In [1]:
# Model loading
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
model_path = "../TinyLlama-1.1B-checkpoint"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(model_path)

model = AutoModelForCausalLM.from_pretrained(model_path, quantization_config=bnb_config, device_map="auto")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Gradient checkpointing and kbit training
from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [3]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [4]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=8, 
    lora_alpha=32, 
    lora_dropout=0.05, 
    bias="none", 
    task_type="lm",
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 1126400 || all params: 616736768 || trainable%: 0.18263869748722358


In [5]:
# Make the training dataset from ./data/Q2_iCliniq_data.json. 
from datasets import load_dataset
training_data = load_dataset("json", data_files="./data/Q2_iCliniq_data.json")
training_data = training_data.map(lambda sample: tokenizer(f"### Human: {sample['input']} ### Assistant: {sample['answer_icliniq']}"))

In [6]:
import transformers

trainer = transformers.Trainer(
    model=model,
    train_dataset=training_data["train"]["input_ids"],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        max_steps=100,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit"
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

comet_ml is installed but `COMET_API_KEY` is not set.
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Step,Training Loss
1,3.14
2,3.2563
3,3.2322
4,3.2073
5,2.995
6,3.0157
7,3.1729
8,3.2004
9,2.8125
10,2.864


TrainOutput(global_step=100, training_loss=2.601226518154144, metrics={'train_runtime': 96.4881, 'train_samples_per_second': 4.146, 'train_steps_per_second': 1.036, 'total_flos': 645214594744320.0, 'train_loss': 2.601226518154144, 'epoch': 0.05})

In [8]:
# Save model, config and tokenizer
model.save_pretrained("./outputs")
tokenizer.save_pretrained("./outputs")
config.save_pretrained("./outputs")