# LLM Fine-Tuning with Hugging Face, PEFT, and LoRA
This notebook demonstrates how to fine-tune a language model using Hugging Face Transformers, PEFT, and LoRA on a large dataset.

In [None]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

### Install required libraries

In [None]:
!pip install -U transformers datasets peft accelerate bitsandbytes

### Load dataset

In [None]:
from datasets import load_dataset

# Load the dataset
dataset = load_dataset("DaviLago/AmazonTitles-1.3MM", split="train")
dataset = dataset.select(range(5_000))
print(f'Loaded {len(dataset)} records.')

### Hugging Face login

In [None]:
from huggingface_hub import login

# Login to Hugging Face
login(new_session=False)

### Load model and tokenizer

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, DataCollatorForLanguageModeling

# Define model name
base_model = "meta-llama/Llama-2-7b-hf"

# Define quantization config
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(base_model, quantization_config=quantization_config, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=False)

# Set padding token
tokenizer.pad_token = tokenizer.eos_token

# Preprocess function
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

### Lora configuration

In [None]:
from peft import LoraConfig, get_peft_model, TaskType

# Prepare PEFT/LoRA config
lora_config = LoraConfig(
    r=32,
    lora_alpha=64,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
        "lm_head",
    ],
    bias="none",
    lora_dropout=0.05,  # Conventional
    task_type=TaskType.CAUSAL_LM
)

# Integrate LoRA with the model
model = get_peft_model(model, lora_config)

# Apply the accelerator. You can comment this out to remove the accelerator.
model = accelerator.prepare_model(model)

### Tokenized dataset

In [None]:
# Preprocess function
def preprocess(example):
    prompt = f"Title \"{example['title']}\". Content, description and details about the title: \"{example['content']}\"."
    inputs = tokenizer(prompt, truncation=True, padding='max_length', max_length=256)
    inputs['labels'] = inputs['input_ids'].copy()
    return inputs

# Tokenize the dataset
tokenized_dataset = dataset.map(preprocess, batched=False)

### Training arguments

In [None]:
from transformers import TrainingArguments, Trainer

# Define output directory
output_dir = "/content/drive/MyDrive/amazon-titles-llama-finetuned"

# Training arguments
training_args = TrainingArguments(
    per_device_train_batch_size=2,
    gradient_accumulation_steps=1,
    num_train_epochs=50,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=50,
    output_dir=output_dir,
    save_total_limit=2,
    save_steps=100,
    report_to='none'
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator
)

### Training

In [None]:
# Start training
trainer.train()

### Save model and tokenizer

In [None]:
# Save the model
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

### Push model and tokenizer to Hugging Face

In [None]:
# Define Hugging Face model name
hf_model_name = "DaviLago/amazon-titles-llama-finetuned"

# Push to Hugging Face Hub
model.push_to_hub(hf_model_name)
tokenizer.push_to_hub(hf_model_name)