In [1]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", device_map="auto")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

Loading weights:   0%|          | 0/201 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [2]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 2048)
    (layers): ModuleList(
      (0-21): 22 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (up_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (down_proj): Linear(in_features=5632, out_features=2048, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((2048,), eps=1e-05)
    (rot

In [3]:
messages = [
    {"role": "user", "content": "Who was the winner of 2022 world cup?"},
]
inputs = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

The winner of the 2022 FIFA World Cup was Argentina, who defeated France in the final match on July 18, 2022.</s>


In [4]:
import torch

# total trainable parameters
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Trainable parameters: {trainable_params:,}")

# total parameters (trainable + frozen)
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params:,}")


Trainable parameters: 1,100,048,384
Total parameters: 1,100,048,384


In [5]:
messages = [
    {"role": "user", "content": "Who was the winner of the 2024 Olympic men’s football tournament?"},
]
inputs = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

The winner of the 2024 Olympic men's football tournament is yet to be determined. The 2024 Summer Olympics will be held in Paris, France, and the tournament


In [6]:
# Reference https://www.fifa.com/en/tournaments/olympicgames/paris2024/articles/medal-winners-mens-tournament

text="""
The 2024 edition of the Olympic Men’s Football Tournament has now concluded, with Spain taking gold at one of the world’s greatest sporting events.

Football was first included at the Olympic Games at Paris 1900 – and Ferenc Puskas, Lionel Messi and Neymar are among a wealth of iconic figures who have lit up the competition over the years.

After France hosted the tournament again in 2024, FIFA lists the most successful nations in the event's rich history.
"""

In [7]:
from peft import LoraConfig, get_peft_model, PeftModel


# -------------------------------------------------------
# LoRA Config
# -------------------------------------------------------
lora_config = LoraConfig(
    r=8,  #controls the size and capacity of the LoRA adapter
    lora_alpha=16,   #rescales the LoRA update before adding it to the base weight
    lora_dropout=0.1,  # Dropout applied only on the LoRA update, not the base model
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],  # correct for most LLMs
)

model = get_peft_model(model, lora_config)



In [8]:
# total parameters (trainable + frozen)
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params:,}")

# total trainable parameters
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Trainable parameters: {trainable_params:,}")


Total parameters: 1,102,301,184
Trainable parameters: 2,252,800


In [9]:
lora_params = sum(
    p.numel() for n, p in model.named_parameters() if "lora" in n
)
print(f"LoRA parameters: {lora_params:,}")


LoRA parameters: 2,252,800


In [10]:
model

PeftModel(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 2048)
        (layers): ModuleList(
          (0-21): 22 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=2048, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora.Linear(
             

In [11]:
from datasets import Dataset
from transformers import Trainer, TrainingArguments

# -------------------------------------------------------
# Dataset — plain text → one sequence
# -------------------------------------------------------

train_text = tokenizer.apply_chat_template([
      {"role": "user", "content": "Who was the winner of the 2024 Olympic men's football tournament?"},
      {"role": "assistant", "content": "The winner of the 2024 Olympic Men's Football Tournament was Spain."},
], tokenize=False)

dataset = Dataset.from_dict({"text": [train_text]})
# dataset = Dataset.from_dict({"text": [text]})
# -------------------------------------------------------
# Tokenization
# -------------------------------------------------------
def tokenize(batch):
    out = tokenizer(
        batch["text"],
        truncation=True,
        max_length=512,
    )
    out["labels"] = out["input_ids"].copy()
    return out

train_dataset = dataset.map(tokenize, batched=True, remove_columns=["text"])

# -------------------------------------------------------
# TrainingArguments
# -------------------------------------------------------
args = TrainingArguments(
    report_to="none",
    output_dir="outputs",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,   # matters when dataset is tiny
    num_train_epochs=30,             # tiny dataset → need MORE epochs
    learning_rate=5e-4,              # high LR for tiny dataset
    lr_scheduler_type="constant",
    logging_steps=1,
    save_steps=10,
    remove_unused_columns=False,
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
)

trainer.train()


Map:   0%|          | 0/1 [00:00<?, ? examples/s]

Step,Training Loss
1,1.142162
2,0.918392
3,0.68603
4,0.484018
5,0.322374
6,0.199348
7,0.115113
8,0.056454
9,0.022701
10,0.005162


TrainOutput(global_step=30, training_loss=0.1322532782513008, metrics={'train_runtime': 5.3037, 'train_samples_per_second': 5.656, 'train_steps_per_second': 5.656, 'total_flos': 10450593054720.0, 'train_loss': 0.1322532782513008, 'epoch': 30.0})

In [12]:
# -------------------------------------------------------
# Save LoRA adapter
# -------------------------------------------------------
model.save_pretrained("outputs/lora")


# Load LoRA adapter
The following is the code for loading the trained LoRA adapter with the TinyLlama model, which could be run in another runtime. The training code above have already been using the adapter with the model so no need to load LoRA again.  

In [13]:
from peft import PeftModel

adapter_path = "outputs/lora"

# Load LoRA adapter on top of the base model
#trained_model = PeftModel.from_pretrained(model, adapter_path)
base_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", device_map="auto")
trained_model = PeftModel.from_pretrained(base_model, "outputs/lora")

trained_model.eval()

Loading weights:   0%|          | 0/201 [00:00<?, ?it/s]



PeftModel(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 2048)
        (layers): ModuleList(
          (0-21): 22 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=2048, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora.Linear(
             

In [14]:
content = "Who was the winner of the 2024 Olympic men’s football tournament?"
messages = [
    {"role": "user", "content": content},
]
inputs = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(trained_model.device)

outputs = trained_model.generate(**inputs, max_new_tokens=40)
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

The winner of the 2024 Olympic Men's Football Tournament was Spain.</s>


In [15]:
# Merge model and adapter

merged_model = trained_model.merge_and_unload()
merged_model.save_pretrained("merged_model/")
tokenizer.save_pretrained("merged_model/")


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

('merged_model/tokenizer_config.json',
 'merged_model/chat_template.jinja',
 'merged_model/tokenizer.json')

In [16]:
content = "Who was the winner of the 2024 Olympic men’s football tournament?"
messages = [
    {"role": "user", "content": content},
]
inputs = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(trained_model.device)

outputs = merged_model.generate(**inputs, max_new_tokens=40)
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

The winner of the 2024 Olympic Men's Football Tournament was Spain.</s>


In [19]:
# Login HuggingFace
from huggingface_hub import login
login()


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [21]:
from huggingface_hub import create_repo

create_repo("KevinXie0131/my_lora_finetuning1", private=False)  # 可以改成 public


RepoUrl('https://huggingface.co/KevinXie0131/my_lora_finetuning1', endpoint='https://huggingface.co', repo_type='model', repo_id='KevinXie0131/my_lora_finetuning1')

In [23]:
from huggingface_hub import upload_folder

upload_folder(
    folder_path="merged_model/",
    repo_id="KevinXie0131/my_lora_finetuning1",
)


Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...d_model/model.safetensors:   2%|1         | 41.9MB / 2.20GB            

CommitInfo(commit_url='https://huggingface.co/KevinXie0131/my_lora_finetuning1/commit/17e534ed6a6739b582a32eba6216305f26d8d8be', commit_message='Upload folder using huggingface_hub', commit_description='', oid='17e534ed6a6739b582a32eba6216305f26d8d8be', pr_url=None, repo_url=RepoUrl('https://huggingface.co/KevinXie0131/my_lora_finetuning1', endpoint='https://huggingface.co', repo_type='model', repo_id='KevinXie0131/my_lora_finetuning1'), pr_revision=None, pr_num=None)