# 1 Imports

In [None]:
! pip install -q datasets==2.21.0 requests torch peft bitsandbytes transformers==4.43.1 trl accelerate sentencepiece wandb tiktoken matplotlib

In [None]:
# imports

import os
import re
import math
from tqdm import tqdm
from huggingface_hub import login
from dotenv import load_dotenv
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments, set_seed
from peft import LoraConfig, PeftModel
from datetime import datetime
from datasets import load_dataset, Dataset, DatasetDict
import matplotlib.pyplot as plt
import wandb
from trl import SFTTrainer, SFTConfig

# 2 Connect to OpenAI & HuggingFace

In [None]:
# Load environment variables in a file called .env

load_dotenv()
api_key = os.getenv('OPEN_API_KEY')

In [None]:
# Load environment variables in a file called .env

load_dotenv()
hf_token = os.getenv('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

# 3 Model Selection

In [None]:
# Tokenizers
qwen_2_5 = "Qwen/Qwen2.5-7B"

# Constants
base_model = gemma_2
hf_user = 'filipespacheco'
dataset_name = f'{hf_user}/pricer-data'
max_sequence_length = 182

# Run name for saving the model in HuggingFace
run_name = f"{datetime.now():%Y-%m-%d_%H-%M-%S}"
project_run_name = f"gemma-2-5b-{run_name}"
hub_model_name = f"{hf_user}/{project_run_name}"

# Hyperparameters for QLoRA
lora_r = 32
lora_alpha = 64
target_modules = ["q_proj", "v_proj", "k_proj", "o_proj"]
lora_dropout = 0.1
quant_4_bit = True

# Hyperparameters for training
epochs = 3
batch_size = 16
learning_rate = 1e-4
lr_scheduler_type = "cosine"
warmup_ratio = 0.03
optimizer = "paged_adamw_32bit"

# Admin config
steps = 50
save_steps = 5000
log_to_wandb = True

%matplotlib inline

## 3.1 Log to Weights and Biases

In [None]:
# Log in to Weights and Biases
wandb_api_key = os.getenv('WANDB_API_KEY')
wandb.login()

# Configure Weights and Biases to record
os.environ["WANDB_PROJECT"] = project_name
os.environ["WANDB_LOG_MODEL"] = 'true' if log_to_wandb else 'false'
os.environ["WANDB_WATCH"] = 'gradient'

In [None]:
if log_to_wandb:
    wandb.init(project=project_name, name=run_name)

## 3.2 Load Dataset

In [None]:
dataset = load_dataset(dataset_name)
train = dataset['train']
test = dataset['test']

## 3.3 Load Tokenizer and Model

In [None]:
if quant_4_bit:
    quant_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
    )

else:
    quant_config = BitsAndBytesConfig(
        load_in_8bit=True,
        bnb_8bit_compute_dtype=torch.bfloat16,
    )

In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

base_model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=quant_config,
    device_map="auto",
    trust_remote_code=True,
)

base_model.generation_config.pad_token_id = tokenizer.eos_token_id

print(f"Memory footprint of model: {base_model.get_memory_footprint() / 1024**3:.1f} GB")

## 3.4 Data Collector

In [None]:
from trl import DataCollatorForCompletionOnlyLM
response_template = "Price is $"
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer)

## 3.5 Setup Trainer

In [None]:
# First, specify the configuration parameters for LoRA

lora_parameters = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=target_modules,
)

# Next, specify the general configuration parameters for training

train_parameters = SFTConfig(
    output_dir=project_run_name,
    num_train_epochs=epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=1,
    eval_strategy="no",
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optimizer,
    save_steps=save_steps,
    save_total_limit=10,
    logging_steps=steps,
    learning_rate=learning_rate,
    weight_decay=0.001,
    fp16=False,
    bf16=True,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
    report_to="wandb" if log_to_wandb else None,
    run_name=run_name,
    max_seq_length=max_sequence_length,
    dataset_text_field="text",
    save_strategy="steps",
    hub_strategy="every_save",
    push_to_hub=True,
    hub_model_id=hub_model_name,
    hub_private_repo=True
)

# And now, the Supervised Fine Tuning Trainer will carry out the fine-tuning
# Given these 2 sets of configuration parameters
# The latest version of trl is showing a warning about labels - please ignore this warning
# But let me know if you don't see good training results (loss coming down).

fine_tuning = SFTTrainer(
    model=base_model,
    train_dataset=train,
    peft_config=lora_parameters,
    args=train_parameters,
    data_collator=collator
  )

In [None]:
# Fine-tune!
fine_tuning.train()

# Push our fine-tuned model to Hugging Face
fine_tuning.model.push_to_hub(project_run_name, private=True)
print(f"Saved to the hub: {project_run_name}")