In [1]:
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    Trainer,
    TrainingArguments,
    DataCollatorForLanguageModeling,
    BitsAndBytesConfig
)
from peft import (
    LoraConfig,
    get_peft_model,
    prepare_model_for_kbit_training
)
from datasets import Dataset
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
import torch




In [2]:
from huggingface_hub import login
# Login to Hugging Face Hub
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [3]:
model_name = "meta-llama/Llama-3.2-1B"
tokenizer = AutoTokenizer.from_pretrained(model_name)


In [4]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Toggle LoRA vs QLoRA
use_qlora = True  # set False to use standard LoRA (full precision)

if use_qlora:
    # QLoRA: 4-bit quantization via Transformers' BitsAndBytesConfig
    quant_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True,
    )
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        quantization_config=quant_config,
        trust_remote_code=True,
    )
    model = prepare_model_for_kbit_training(model)
else:
    # Standard LoRA: full-precision adapter
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        torch_dtype=torch.float16,
        trust_remote_code=True,
    )


In [6]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],  # adjust based on model internals
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)



In [8]:
embed_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = Chroma(
    collection_name="database",
    embedding_function=embed_model,
    persist_directory="database"
)
texts = vectorstore.get().get("documents", [])  # list[str]
print(f"Number of documents: {len(texts)}")
hf_dataset = Dataset.from_dict({"text": texts})

Number of documents: 1731


In [11]:
def tokenize_fn(examples):
    return tokenizer(
        examples["text"],
        truncation=True,
        padding="max_length",
        max_length=512,
    )

# Tokenize & batch
tokenized_ds = hf_dataset.map(
    tokenize_fn,
    batched=True,
    remove_columns=["text"]
)

Map:   0%|          | 0/1731 [00:00<?, ? examples/s]

In [12]:
training_args = TrainingArguments(
    output_dir="./phase5_lora_llama3.2-1b",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=8,
    warmup_steps=100,
    num_train_epochs=3,                   
    learning_rate=2e-4,
    fp16=True,
    logging_steps=20,
    save_steps=200,
    optim="paged_adamw_8bit",
    remove_unused_columns=False,
)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)

In [13]:
trainer = Trainer(
    model=model,
    train_dataset=tokenized_ds,
    args=training_args,
    data_collator=data_collator,
)

trainer.train()


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mkrishshandilya18[0m ([33mkrishshandilya18-pes-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss
20,2.8024
40,2.7036
60,2.5849
80,2.4967
100,2.4453
120,2.3607
140,2.3611
160,2.3384


TrainOutput(global_step=162, training_loss=2.510403824441227, metrics={'train_runtime': 1574.5316, 'train_samples_per_second': 3.298, 'train_steps_per_second': 0.103, 'total_flos': 1.533761789362176e+16, 'train_loss': 2.510403824441227, 'epoch': 2.9607390300230945})

In [14]:
model.save_pretrained("./phase5_lora_llama3.2-1b")
tokenizer.save_pretrained("./phase5_lora_llama3.2-1b")

('./phase5_lora_llama3.2-1b\\tokenizer_config.json',
 './phase5_lora_llama3.2-1b\\special_tokens_map.json',
 './phase5_lora_llama3.2-1b\\tokenizer.json')