![](https://i.postimg.cc/26Knx6B6/Screenshot-2025-08-11-113135.png)

<span style="color: white; background-color: red; padding: 10px 20px; border-radius: 10px; font-size: 36px; font-weight: bold;">Install Dependencies</span>


In [1]:
#!pip install -q datasets transformers peft accelerate bitsandbytes


<span style="color: white; background-color: red; padding: 10px 20px; border-radius: 10px; font-size: 36px; font-weight: bold;">Basic Imports</span>


In [2]:
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
import torch



2025-08-11 05:43:40.643163: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754891020.823068      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754891020.873562      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


<span style="color: white; background-color: red; padding: 10px 20px; border-radius: 10px; font-size: 36px; font-weight: bold;">Load dataset & Tokenizer</span>


In [3]:
# Load dataset
dataset = load_dataset("Abirate/english_quotes")
# Let's split 90% train / 10% validation without overlap
dataset_split = dataset["train"].train_test_split(test_size=0.1, seed=42)

train_data = dataset_split["train"]
val_data = dataset_split["test"]

# Load tokenizer
model_name = "gpt2"
# AutoTokenizer picks the right tokenizer class automatically based on the model 
tokenizer = AutoTokenizer.from_pretrained(model_name)
# give GPT-2 a fake pad token so batching works during training
tokenizer.pad_token = tokenizer.eos_token



README.md: 0.00B [00:00, ?B/s]

quotes.jsonl:   0%|          | 0.00/647k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2508 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

<span style="color: white; background-color: red; padding: 10px 20px; border-radius: 10px; font-size: 36px; font-weight: bold;">Tokenize</span>


In [4]:
# Tokenize + add labels
def tokenize(batch):
    tokenized = tokenizer(batch["quote"], padding="max_length", truncation=True, max_length=64)
    # It shifts the labels internally by 1 position when calculating the loss
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized
# batched = True -->> sends data in batches
train_data = train_data.map(tokenize, batched=True)
val_data = val_data.map(tokenize, batched=True)


Map:   0%|          | 0/2257 [00:00<?, ? examples/s]

Map:   0%|          | 0/251 [00:00<?, ? examples/s]

<span style="color: white; background-color: red; padding: 10px 20px; border-radius: 10px; font-size: 36px; font-weight: bold;">Load Model</span>


In [5]:
# Load model in FP16
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    # device map -->> If you have multiple GPUs, it can split layers between them.
    device_map="auto"
)



model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

<span style="color: white; background-color: red; padding: 10px 20px; border-radius: 10px; font-size: 36px; font-weight: bold;">LoRA config + Training arguments + training</span>


![](https://i.postimg.cc/qqyjFyrq/0-wwg-To6-O04-U50k4-LZ.png)

In [6]:
# LoRA config
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["c_attn"],  # GPT-2 attention
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# Inserts low-rank LoRA layers into those modules
model = get_peft_model(model, lora_config)

#  Training arguments
training_args = TrainingArguments(
    output_dir="./lora-llm",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    # batch_size=4, grad_accum_steps=2 → effective batch size = 4 × 2 = 8
    gradient_accumulation_steps=2,
    eval_strategy="steps",
    eval_steps=20,
    logging_steps=10,
    save_steps=50,
    learning_rate=2e-4,
    num_train_epochs=5,
    fp16=True,
    report_to="none"
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=val_data,
    tokenizer=tokenizer
)

# Train
trainer.train()

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Step,Training Loss,Validation Loss
20,5.5258,5.293876
40,3.0119,2.208154
60,2.1335,1.882907
80,1.9807,1.755885
100,1.696,1.636082
120,1.6499,1.57387
140,1.6988,1.525108
160,1.8792,1.509992
180,1.9094,1.504137
200,1.7665,1.49904


TrainOutput(global_step=1415, training_loss=1.7523778827788552, metrics={'train_runtime': 217.4738, 'train_samples_per_second': 51.891, 'train_steps_per_second': 6.507, 'total_flos': 369863056097280.0, 'train_loss': 1.7523778827788552, 'epoch': 5.0})

<span style="color: white; background-color: red; padding: 10px 20px; border-radius: 10px; font-size: 36px; font-weight: bold;">Save the model</span>


In [7]:
#  Save LoRA adapter
model.save_pretrained("lora-gpt2")
tokenizer.save_pretrained("lora-gpt2")

('lora-gpt2/tokenizer_config.json',
 'lora-gpt2/special_tokens_map.json',
 'lora-gpt2/vocab.json',
 'lora-gpt2/merges.txt',
 'lora-gpt2/added_tokens.json',
 'lora-gpt2/tokenizer.json')

<span style="color: white; background-color: red; padding: 10px 20px; border-radius: 10px; font-size: 36px; font-weight: bold;">Inference From Saved Model</span>


In [8]:
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch

#  Load tokenizer
base_model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained("lora-gpt2")
tokenizer.pad_token = tokenizer.eos_token

#  Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

# Load LoRA adapter
model = PeftModel.from_pretrained(base_model, "lora-gpt2")

#  Build pipeline
text_gen = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto"
)


Device set to use cuda:0


In [12]:
# Run inference
prompt = "The secret to happiness is"
outputs = text_gen(prompt, max_new_tokens=70, num_return_sequences=1, do_sample=True, temperature=0.7)

print(outputs[0]["generated_text"])

The secret to happiness is not fear, but determination, determination and the willingness to work hard. You don't have to be a doctor to do this, but you can do it.


In [13]:
# Run inference
prompt = "once upon a time"
outputs = text_gen(prompt, max_new_tokens=70, num_return_sequences=1, do_sample=True, temperature=0.7)

print(outputs[0]["generated_text"])

once upon a time of great need, and when the world comes to an end, it is not to be feared. It is to be feared that we might have a better way of life in the future. And yet, our world could not be better. There was no hope. And when these things happen, it seems to us that we must not let them pass


<span style="color: white; background-color: red; padding: 10px 20px; border-radius: 10px; font-size: 36px; font-weight: bold;">Smash That Upvote !! Thanks ! 🚀😃</span>
