In [1]:
# Import required libraries
import torch
import gc
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import os
from peft import LoraConfig
from datasets import load_dataset
from trl import SFTTrainer
import transformers

In [2]:
#Free GPU memory
gc.collect()
torch.cuda.empty_cache()

In [3]:
#Set the model
model_id = "google/gemma-1.1-2b-it"

#bits and bytes config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, #Load the model in 4 bits
    bnb_4bit_quant_type="nf4", #Quantization type 4 bits
    bnb_4bit_compute_dtype=torch.bfloat16 #Data type for calculation
)

# Load the pre-trained tokenizer associated with the model specified by `model_id`
# `padding_side="right"` indicates that padding should be applied to the right side of the sequences
tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="right")

# Load the pre-trained causal language model associated with `model_id`
# `quantization_config=bnb_config` specifies the quantization configuration to load the model in 4 bits
# `device_map={"":0}` assigns the model to the first available CUDA device, typically the first GPU (index 0)
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0})




Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [4]:
# Set an environment variable to enable Weights and Biases (wandb) logging
# Weights and Biases (wandb) is a tool for tracking machine learning experiments
# Setting "WANDB_DISABLED" to "false" enables wandb logging
os.environ["WANDB_DISABLED"] = "false"

In [5]:
# LoRA (Low-Rank Adaptation) Configuration for Fine-Tuning a Pretrained Model
lora_config = LoraConfig(
    r=64, # Rank of the low-rank decomposition. Determines the capacity of the LoRA model.
    lora_alpha=32, # Scales the values of the adapted parameters. A higher value implies a greater contribution of adaptation.
    lora_dropout=0.05, # Dropout probability for the adapted layers. Helps prevent overfitting.
    bias="none", # Bias configuration. "none" indicates no additional bias is added in the adapted layers.
    target_modules = ["q_proj", "o_proj", "k_proj", "v_proj",
                      "gate_proj", "up_proj", "down_proj"], # List of module names where LoRA adaptation is applied.
    task_type = "CAUSAL_LM", # Type of task for which the model is configured. "CAUSAL_LM" refers to a causal language model.
)

In [38]:
# Load the dataset
data = load_dataset("SerchiBoi/DTT-Info")
data

DatasetDict({
    train: Dataset({
        features: ['Question', 'Answer'],
        num_rows: 496
    })
})

In [39]:
# Define a function to set a format of the training data
# This has a specific format to works with the previusa dataset
def formatting_func(example):
    text = f"Question: {example['Question'][0]}\Answer: {example['Answer'][0]}"
    return [text]

In [44]:
# Create an instance of SFTTrainer
trainer = SFTTrainer(
    model=model, # Model to be used for training
    train_dataset=data["train"], # Training dataset
    peft_config=lora_config,  # Specific configuration of the generation algorithm
    formatting_func=formatting_func, # Function to format dataset instances
    max_seq_length=1300, # Maximum allowed input sequence length
    args=transformers.TrainingArguments( # Training arguments
        per_device_train_batch_size=1, # Training batch size per device
        gradient_accumulation_steps=4, # Number of gradient accumulation steps
        warmup_steps=2, # Number of warmup steps for the optimizer
        max_steps=1, # Maximum number of training steps
        learning_rate=2e-4, # Learning rate for the optimizer
        fp16=True, # Enable 16-bit precision for training
        logging_steps=1, # Steps interval for logging
        output_dir="outputs-dtt-v1", # Output directory to save training results
        optim="paged_adamw_8bit", # Name of the optimizer used for training
        report_to="tensorboard", # Destination to report training execution
    )
)

In [45]:
# Start the training
trainer.train()

Step,Training Loss
1,1.3015


TrainOutput(global_step=1, training_loss=1.3015427589416504, metrics={'train_runtime': 0.6366, 'train_samples_per_second': 6.283, 'train_steps_per_second': 1.571, 'total_flos': 2472397209600.0, 'train_loss': 1.3015427589416504, 'epoch': 1.0})

In [48]:
# Load the extension of TensorBoard
%load_ext tensorboard 
#Start TensorBoard using the info of the directory specified in logdir
%tensorboard --logdir=outputs-dtt-v1 

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 48300), started 0:00:07 ago. (Use '!kill 48300' to kill it.)