In [1]:
!pip install -q transformers datasets peft accelerate

In [2]:
import os
os.environ["WANDB_DISABLED"] = "true"
import torch
from transformers import (
    AutoTokenizer, AutoModelForCausalLM,
    Trainer, TrainingArguments, DataCollatorForLanguageModeling
)
from datasets import Dataset
from peft import LoraConfig, get_peft_model, TaskType
from google.colab import files

In [3]:
MODEL_NAME = "gpt2"

tok = AutoTokenizer.from_pretrained(MODEL_NAME)

if tok.pad_token is None:
    tok.pad_token = tok.eos_token

model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
model.resize_token_embeddings(len(tok))

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
print("Using device:", device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Using device: cpu


In [4]:
print("Upload a plain text file (one example per line).")
uploaded = files.upload()
file_name = list(uploaded.keys())[0]
print("Uploaded:", file_name)

with open(file_name, "r", encoding="utf-8") as f:
    lines = [l.strip() for l in f.readlines() if l.strip()]

dataset = Dataset.from_dict({"text": lines})

def tokenize_fn(examples):
    return tok(examples["text"], truncation=True, padding="max_length", max_length=128)

tokenized = dataset.map(tokenize_fn, batched=True, remove_columns=["text"])
tokenized.set_format(type="torch", columns=["input_ids", "attention_mask"])
print("Examples tokenized. Dataset size:", len(tokenized))

Upload a plain text file (one example per line).


Saving New Text Document.txt to New Text Document.txt
Uploaded: New Text Document.txt


Map:   0%|          | 0/1 [00:00<?, ? examples/s]

Examples tokenized. Dataset size: 1


In [5]:
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 294,912 || all params: 124,734,720 || trainable%: 0.2364




In [6]:
data_collator = DataCollatorForLanguageModeling(tok, mlm=False)

training_args = TrainingArguments(
    output_dir="./lora-llm",
    per_device_train_batch_size=2,
    num_train_epochs=2,
    learning_rate=2e-4,
    logging_steps=10,
    save_total_limit=1,
    fp16=torch.cuda.is_available(),
    save_strategy="epoch",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized,
    data_collator=data_collator
)

trainer.train()

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
`loss_type=None` was set in the config but it is unrecognized. Using the default loss: `ForCausalLMLoss`.


Step,Training Loss




TrainOutput(global_step=2, training_loss=2.5196447372436523, metrics={'train_runtime': 6.7228, 'train_samples_per_second': 0.297, 'train_steps_per_second': 0.297, 'total_flos': 131099000832.0, 'train_loss': 2.5196447372436523, 'epoch': 2.0})

In [7]:
OUT_DIR = "finetuned-lora-llm"
os.makedirs(OUT_DIR, exist_ok=True)

model.save_pretrained(OUT_DIR)
tok.save_pretrained(OUT_DIR)

print("Saved adapters and tokenizer to", OUT_DIR)

Saved adapters and tokenizer to finetuned-lora-llm


In [8]:
input_text = "Once upon a time"
inputs = tok(input_text, return_tensors="pt").to(device)
generated = model.generate(**inputs, max_length=50)
print("Generation (from current model):")
print(tok.decode(generated[0], skip_special_tokens=True))

from peft import PeftModel

base = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
base.resize_token_embeddings(len(tok))

base = get_peft_model(base, lora_config)
base.load_state_dict(model.state_dict(), strict=False)
base.to(device)

inputs = tok("Many human mental activities such as developing computer programs,", return_tensors="pt").to(device)
out = base.generate(**inputs, max_length=80)
print("\nGeneration (after re-loading adapters):")
print(tok.decode(out[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Generation (from current model):
Once upon a time, the person who is in the position of the person who is in the position of the person who is in the position of the person who is in the position of the person who is in the position of the person who is in


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Generation (after re-loading adapters):
Many human mental activities such as developing computer programs, reading, writing, and reading aloud are not limited to the physical world.

The human brain is also involved in many other areas of the body, including the brain, the heart, the lungs, and the digestive system.

The human brain is also involved in many other areas of the body, including the brain, the heart, the
