In [None]:

# import necessary libraries
import os
import torch

from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments, BitsAndBytesConfig
from datasets import load_dataset
from transformers import DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model, LoftQConfig
import wandb

In [None]:
# 1) Load base model and tokenizer in 4-bit quantization

MODEL_NAME = "Desired Model Name"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=False,
    bnb_4bit_compute_dtype=torch.float16,  
)

base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype="auto",
    device_map="auto",           
    low_cpu_mem_usage=True
)

In [None]:
# 2) LoRA with DoRA init, targeting standard transformer linear layers

loftq_config=LoftQConfig(
    loftq_bits=4,
)
target_modules = ["q_proj", 
                  "k_proj", 
                  "v_proj", 
                  "o_proj", 
                  "up_proj", 
                  "down_proj"] # (Specify the target modules for LoRA adaptation based on the model architecture)

lora_config = LoraConfig(
    r=64,
    lora_alpha=124,
    target_modules=target_modules,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    init_lora_weights="loftq",
    loftq_config=loftq_config,
)
print("Calculating...")

In [None]:
# 3) Inject adapters on to the base model

model = get_peft_model(base_model, lora_config)
model.print_trainable_parameters()

trainable params: 48,365,568 || all params: 1,394,837,504 || trainable%: 3.4675


In [None]:
# 4) Dataset + collator
dataset = load_dataset("json", data_files={"train": "Your training data file path",})
def tokenize_fn(examples):
    return tokenizer(examples["text"], truncation=True, max_length=512, padding=False)
tokenized_ds = dataset.map(tokenize_fn, batched=True, remove_columns=dataset["train"].column_names)

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

In [None]:
#5) Use Weights & Biases for experiment tracking

api_key = "Your WANDB API KEY"
wandb.login(key=api_key )
wandb.init(
    project="Your Project name",     
)

In [None]:
# 6) Training arguments 
training_args = TrainingArguments(
    output_dir="Your desired Output Directory",
    per_device_train_batch_size=25,          
    gradient_accumulation_steps=12,        
    num_train_epochs=2,
    learning_rate=2e-4,             
    fp16=True,                      # (Use Bf16 if your GPU supports it)       
    optim="paged_adamw_8bit",       # (can us other optimizers too)
    lr_scheduler_type="cosine",
    warmup_ratio=0.07,
    weight_decay=0.01,
    max_grad_norm=1.0,
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={"use_reentrant": False},
    logging_steps=20,
    save_strategy="steps",
    save_steps=100,
    save_total_limit=3,
    report_to="wandb",
    remove_unused_columns=False,
    #ddp_find_unused_parameters=False, (commented out for single GPU runs)
)

In [None]:
# 7) Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_ds["train"],
    data_collator=data_collator,
    tokenizer=tokenizer,
)
trainer.train()

In [None]:
# 8) Merge adapters and save
print("Merging DoRA LoRA weights...")
merged_model = trainer.model.merge_and_unload()
save_path = "Your desired/save/path"
merged_model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)
print(f" Model and tokenizer saved to {save_path}")