In [1]:
# Install necessary libraries and modules
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments, BitsAndBytesConfig
from peft import get_peft_model, LoraConfig
import torch
from datasets import load_dataset
 
# Load the dataset
dataset = load_dataset('json', data_files={
    'train': '/home/rox/datasets/singlish/singlish_training_dataset2.jsonl',
    'validation': '/home/rox/datasets/singlish/singlish_validation_dataset2.jsonl'
})

# Define the model name
model_name = "/home/rox/llama-singlish/"
 
# Check if CUDA is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Clear CUDA cache
if (device == "cuda") :
    torch.cuda.empty_cache()
 
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("/home/rox/llama-singlish/")
 
# Set the padding token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
 
# Tokenize the dataset
def tokenize_function(examples):
    # Use "longest" to dynamically pad to the longest sequence in each batch
    return tokenizer(examples["text"], padding="longest", truncation=True)
 
# Apply tokenization
tokenized_datasets = dataset.map(tokenize_function, batched=True)
 
# Configure quantization
quant_config = BitsAndBytesConfig(
    load_in_8bit=True  # Use 8-bit quantization
)
 
# Configure the PEFT model
peft_config = LoraConfig(
    r=8,  # Rank of the low-rank adaptation
    lora_alpha=32,  # Scaling factor for the low-rank adaptation
    target_modules=["q_proj", "v_proj"],  # Make sure these are valid for LLaMA 3.1
    lora_dropout=0.1,  # Dropout rate for LoRA
    bias="none"
)

# Load the PEFT model
peft_model = get_peft_model(model=model_name, peft_config=peft_config, quantization_config=quant_config).to(device)
 
# Trainer configuration
trainer = Trainer(
    model=peft_model,
    args=TrainingArguments(
        output_dir="./output",
        num_train_epochs=10,
        per_device_train_batch_size=2,  # Reduce batch size
        per_device_eval_batch_size=2,  # Reduce batch size
        gradient_accumulation_steps=2,  # Accumulate gradients over 2 steps
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir="./logs",
        remove_unused_columns=False,
        fp16=True,  # Use mixed precision training
        dataloader_num_workers=2,  # Number of subprocesses to use for data loading
        torch_compile=True,  # Enable TorchScript compilation
    ),
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
)
 
# Start training
trainer.train()

Map:   0%|          | 0/40 [00:00<?, ? examples/s]

TypeError: get_peft_model() got an unexpected keyword argument 'quantization_config'