In [1]:
!pip uninstall -y transformers
!pip install git+https://github.com/huggingface/transformers
!pip install peft bitsandbytes accelerate datasets --upgrade


Found existing installation: transformers 4.52.4
Uninstalling transformers-4.52.4:
  Successfully uninstalled transformers-4.52.4
Collecting git+https://github.com/huggingface/transformers
  Cloning https://github.com/huggingface/transformers to /tmp/pip-req-build-_ahow7p0
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers /tmp/pip-req-build-_ahow7p0
  Resolved https://github.com/huggingface/transformers to commit 6dfd561d9cd722dfc09f702355518c6d09b9b4e3
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting huggingface-hub<1.0,>=0.34.0 (from transformers==4.55.0.dev0)
  Downloading huggingface_hub-0.34.3-py3-none-any.whl.metadata (14 kB)
Downloading huggingface_hub-0.34.3-py3-none-any.whl (558 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m558.8/558.8 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00

In [2]:
from datasets import load_dataset

print("Downloading Wikitext and creating small train.txt...")
dataset_wiki = load_dataset("wikitext", "wikitext-103-raw-v1", split="train")

with open("train.txt", "w", encoding="utf-8") as f:
    count = 0
    for item in dataset_wiki:
        if count >= 500:         # <<-- Reduced to 500 lines
            break
        text = item['text'].strip()
        if text:
            f.write(text + "\n")
            count += 1

print(f"train.txt created with {count} lines.")


Downloading Wikitext and creating small train.txt...


README.md: 0.00B [00:00, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/733k [00:00<?, ?B/s]

train-00000-of-00002.parquet:   0%|          | 0.00/157M [00:00<?, ?B/s]

train-00001-of-00002.parquet:   0%|          | 0.00/157M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/657k [00:00<?, ?B/s]

Generating test split:   0%|          | 0/4358 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/1801350 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/3760 [00:00<?, ? examples/s]

train.txt created with 500 lines.


In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

model_id = "madhan9063/qwen2.5-14b-4bit"  # Change this!

# BitsAndBytes quant config (use float16 for max compatibility)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

# LoRA config
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    model_id,
    trust_remote_code=True,
    use_fast=False,
)
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)
model.resize_token_embeddings(len(tokenizer))
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)


2025-08-03 08:05:20.080652: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754208320.334702      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754208320.417740      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/605 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/613 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]



model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/243 [00:00<?, ?B/s]

In [4]:
from datasets import load_dataset

dataset = load_dataset("text", data_files={"train": "train.txt"})
num_samples = 500   # All lines, since file is already small
small_dataset = dataset["train"].shuffle(seed=42).select(range(num_samples))

def tokenize_function(examples):
    tokenized = tokenizer(
        examples["text"],
        padding="max_length",
        truncation=True,
        max_length=128,   # Reduce sequence length for speed & lower memory
    )
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

tokenized_small_dataset = small_dataset.map(tokenize_function, batched=True)
train_dataset = tokenized_small_dataset

print(f"Training on dataset size: {len(train_dataset)}")


Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Training on dataset size: 500


In [10]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./qlora-qwen-finetuned",
    per_device_train_batch_size=1,        # Keep small for VRAM safety
    gradient_accumulation_steps=4,        # Lower for small dataset
    learning_rate=2e-4,
    num_train_epochs=1,                   # One epoch for small dataset
    logging_steps=5,
    save_steps=1000000,                   # Very large to disable intermediate saves
    fp16=True,
    optim="paged_adamw_32bit",
    report_to="none",
    save_total_limit=1,                   # Keep at most one checkpoint if any saved
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
)

print("Starting fine-tuning on small set...")
trainer.train()

# You can save the model manually AFTER training:
# model.save_pretrained("./qlora-qwen-finetuned")
# tokenizer.save_pretrained("./qlora-qwen-finetuned")


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Starting fine-tuning on small set...


  return fn(*args, **kwargs)


Step,Training Loss
5,0.876
10,0.8765
15,0.8015
20,0.937
25,0.7059
30,1.1041
35,0.6847
40,0.7369
45,0.8318
50,0.8017




TrainOutput(global_step=125, training_loss=0.9013138465881347, metrics={'train_runtime': 1846.715, 'train_samples_per_second': 0.271, 'train_steps_per_second': 0.068, 'total_flos': 5376770310144000.0, 'train_loss': 0.9013138465881347, 'epoch': 1.0})

In [11]:
model.save_pretrained("./qlora-qwen-finetuned")
tokenizer.save_pretrained("./qlora-qwen-finetuned")




('./qlora-qwen-finetuned/tokenizer_config.json',
 './qlora-qwen-finetuned/special_tokens_map.json',
 './qlora-qwen-finetuned/chat_template.jinja',
 './qlora-qwen-finetuned/vocab.json',
 './qlora-qwen-finetuned/merges.txt',
 './qlora-qwen-finetuned/added_tokens.json')

In [12]:
import os

saved_path = "./qlora-qwen-finetuned"
print("Files in saved directory:")
print(os.listdir(saved_path))


Files in saved directory:
['adapter_model.safetensors', 'checkpoint-125', 'added_tokens.json', 'special_tokens_map.json', 'tokenizer_config.json', 'adapter_config.json', 'merges.txt', 'README.md', 'vocab.json', 'chat_template.jinja']


In [13]:
!pip install huggingface_hub --upgrade
from huggingface_hub import notebook_login
notebook_login()




VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [14]:
model.push_to_hub("madhan9063/qwen2-14b-finetuned")
tokenizer.push_to_hub("madhan9063/qwen2-14b-finetuned")




adapter_model.safetensors:   0%|          | 0.00/6.26G [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

CommitInfo(commit_url='https://huggingface.co/madhan9063/qwen2-14b-finetuned/commit/a12a5b252a2751d15d6dd7f4b6f775f95135ad76', commit_message='Upload tokenizer', commit_description='', oid='a12a5b252a2751d15d6dd7f4b6f775f95135ad76', pr_url=None, repo_url=RepoUrl('https://huggingface.co/madhan9063/qwen2-14b-finetuned', endpoint='https://huggingface.co', repo_type='model', repo_id='madhan9063/qwen2-14b-finetuned'), pr_revision=None, pr_num=None)