<a href="https://colab.research.google.com/github/MarcosVeniciu/HotelQA-RAG/blob/main/Fine_tuning_code/Fine_Tuning_Gemma_2_9B.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes

In [None]:
from transformers.trainer_utils import get_last_checkpoint
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported
from unsloth import FastLanguageModel
from datasets import load_dataset
from trl import SFTTrainer
import torch

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
  model_name = "unsloth/gemma-2-9b-it-bnb-4bit",
  max_seq_length = max_seq_length,
  dtype = dtype,
  load_in_4bit = load_in_4bit,
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

In [6]:
ALPACA_PROMPT_DICT = {
    "prompt_context": (
        "{bos_token}Below is an instruction that describes a task, paired with an input that provides further context. "
        "Write a response that appropriately completes the request.\n\n"
        "### Instruction: # Instruction on how to perform the task\n"
        "{instruction}\n\n"
        "### Input: # Context and question\n"
        "{input}\n\n"
        "### Response: # Model should generate the response here \n"
        "{response}{eos_token}"
    ),
    "prompt_no_context": (
        "{bos_token}Below is an instruction that describes a task. "
        "Write a response that appropriately completes the request.\n\n"
        "### Instruction: # Instruction on how to perform the task\n"
        "{instruction}\n\n"
        "### Input: # Context and question\n"
        "{input}\n\n"
        "### Response: # Model should generate the response here \n"
        "{response}{eos_token}"
    ),
}

BOS_TOKEN = tokenizer.bos_token
EOS_TOKEN = tokenizer.eos_token
def formatPrompt(example):
  if example['Context'] == 'no_context':
    prompt = ALPACA_PROMPT_DICT['prompt_no_context'].format(
      bos_token=BOS_TOKEN,
      instruction="If the context is 'no context', respond with a generic answer indicating that don't know the hotel.",
      input=f"context: {example['Context']}\nrespond: {example['Question']}",
      response=example['Answer'],
      eos_token=EOS_TOKEN
    )
  else:
    prompt = ALPACA_PROMPT_DICT['prompt_context'].format(
      bos_token=BOS_TOKEN,
      instruction="When prompted to 'respond', simply generate the answer to the question asked in 'respond' and make sure your answer is detailed, informative and engaging, using the information in context to enhance your answer.",
      input=f"context: {example['Context']}\nrespond: {example['Question']}",
      response=example['Answer'],
      eos_token=EOS_TOKEN
    )
  return {"text": prompt}


dataset = load_dataset('parquet', data_files={'train': '/content/drive/MyDrive/Treinamento/Dataset_V2_train_16k.parquet',
                                              'test': '/content/drive/MyDrive/Treinamento/Dataset_V2_test_16k.parquet'})
# Aplicando a função formatPrompt ao dataset
dataset = dataset.map(formatPrompt, remove_columns=['Context', 'Question', 'Answer'])

In [7]:
project_dir = "/content/drive/MyDrive/Treinamento/TinyLlama"
trainer = SFTTrainer(
  model = model,
  tokenizer = tokenizer,
  train_dataset = dataset['train'].shuffle(),
  dataset_text_field = "text",
  max_seq_length = max_seq_length,
  dataset_num_proc = 2,
  packing = False, # Packs short sequences together to save time!
  args = TrainingArguments(
    num_train_epochs = 1,
    per_device_train_batch_size = 16,
    gradient_accumulation_steps = 16,
    save_total_limit=2,
    save_strategy="steps",
    save_steps=6,
    warmup_ratio = 0.1,
    learning_rate = 2e-5,
    fp16 = not is_bfloat16_supported(),
    bf16 = is_bfloat16_supported(),
    logging_steps = 1,
    optim = "adamw_8bit",
    weight_decay = 0.1,
    lr_scheduler_type = "linear",
    seed = 3407,
    output_dir=project_dir,
  ),
)

In [None]:
#se tiver algum checkpoint salvo na pasta do projeto, ele vai continuar a partir do ultimo salvo.
last_checkpoint = get_last_checkpoint(project_dir)
if last_checkpoint != None: # Continua a partir do ultimo checkpoint salvo
  print(f"Continuando treinamento a partir de: {last_checkpoint}\n")
  trainer_stats = trainer.train(resume_from_checkpoint=last_checkpoint)
else: # começa um novo treinamento
  print("Começando um novo treinamento:")
  trainer_stats = trainer.train()

**REFERENCIAS**

[1] [Colab original com Gemma 2 9B](https://colab.research.google.com/drive/1vIrqH5uYDQwsJ4-OO3DErvuv4pBgVwk4?usp=sharing#scrollTo=QmUBVEnvCDJv)