In [31]:
from transformers import AutoTokenizer,AutoModelForCausalLM
import torch

model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
Token = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

使用TinyLlama做測試與學習

Token與model的Pretrained

函數庫:

AutoTokenizer  對應Token

AutoModelForCausalLM 對應自然語言LLM

In [32]:
print(Token)

LlamaTokenizerFast(name_or_path='TinyLlama/TinyLlama-1.1B-Chat-v1.0', vocab_size=32000, model_max_length=2048, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '</s>'}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}
)


Tokenizer = 把文字切成 token ID 的工具

vocab_size=32000：字典大小，有 32k 個 token（詞彙單位）

model_max_length=2048：最多能處理 2048 個 token 的序列

special_tokens：像 BOS <s>、EOS </s>，控制輸入/輸出的邊界

這是「輸入管道」：文字先經過 tokenizer → 轉成數字 ID

In [33]:
print(model)

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 2048)
    (layers): ModuleList(
      (0-21): 22 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (up_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (down_proj): Linear(in_features=5632, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((2048,), eps=1e-05)
    (rotary_emb): 

Embedding(32000, 2048)
每個 token ID 對應一個 2048 維的向量

22 層 LlamaDecoderLayer
這就是 Transformer Decoder 堆疊

每層有 Self-Attention（Q/K/V 投影 + 輸出）

MLP 前饋層（gate_proj → up_proj → down_proj）

LayerNorm（RMSNorm）

LlamaAttention 裡特別之處：
q_proj: 2048→2048
k_proj, v_proj: 2048→256（壓縮了維度 → 減少運算量）
o_proj: 2048→2048

RotaryEmbedding (RoPE)
用於 attention 的位置編碼，讓模型理解 token 順序

lm_head (Linear 2048→32000)
最後一層，全連接層，輸出每個 token 的機率分佈

In [34]:
from torchsummary import summary

In [35]:
#summary(model, input_size=(1, 2048))

#summary通常適用在單個模型的打印

In [36]:
model.num_parameters()


1100048384

In [37]:
sum(p.numel() for p in model.parameters())

1100048384

這是參數量的打印方式

In [38]:
print(model.config)

LlamaConfig {
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "head_dim": 64,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 5632,
  "max_position_embeddings": 2048,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 22,
  "num_key_value_heads": 4,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_scaling": null,
  "rope_theta": 10000.0,
  "tie_word_embeddings": false,
  "torch_dtype": "float32",
  "transformers_version": "4.55.4",
  "use_cache": true,
  "vocab_size": 32000
}



In [39]:
text = "你好，世界"
tokens = Token(text)

In [40]:
print(type(tokens))
print(tokens)
print(tokens.input_ids)
print(type(tokens.input_ids))
print(type(tokens.attention_mask))

<class 'transformers.tokenization_utils_base.BatchEncoding'>
{'input_ids': [1, 29871, 30919, 31076, 30214, 30793, 30967], 'attention_mask': [1, 1, 1, 1, 1, 1, 1]}
[1, 29871, 30919, 31076, 30214, 30793, 30967]
<class 'list'>
<class 'list'>


In [41]:
input_ids = torch.tensor([tokens.input_ids])
attention_mask = torch.tensor([tokens.attention_mask])
print(type(input_ids))
print(input_ids.shape)
print(type(attention_mask))
print(attention_mask.shape)

<class 'torch.Tensor'>
torch.Size([1, 7])
<class 'torch.Tensor'>
torch.Size([1, 7])


轉換資料成Tensor的過程

In [42]:
text = "你好世界"

token_slow_list = Token(text)
print(token_slow_list)

token_slow = {
    "input_ids": torch.tensor([token_slow_list.input_ids]),
    "attention_mask" : torch.tensor([token_slow_list.attention_mask])
}

print("token_slow:", token_slow)

{'input_ids': [1, 29871, 30919, 31076, 30793, 30967], 'attention_mask': [1, 1, 1, 1, 1, 1]}
token_slow: {'input_ids': tensor([[    1, 29871, 30919, 31076, 30793, 30967]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1]])}


In [48]:
token_fast = Token(text , return_tensors = "pt")
# return_tensors="pt" 是告訴 tokenizer：把輸出資料直接轉成 PyTorch tensor 格式
# 其他格式"tf"：TensorFlow tensor ,"np"：NumPy array
print("token_fast:", token_fast)
print(type(token_fast.input_ids))
print(token_fast.input_ids.shape)

token_fast: {'input_ids': tensor([[    1, 29871, 30919, 31076, 30793, 30967]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1]])}
<class 'torch.Tensor'>
torch.Size([1, 6])


Token_slow: 表示原始資料逐步轉換成Tensor的過程

Token_fast: 表示直接轉換Tensor的過程

透過拆解步驟可以知道return_tensor這個參數做的事情

In [46]:
output = model(input_ids = input_ids , attention_mask = attention_mask)
print(output)

CausalLMOutputWithPast(loss=None, logits=tensor([[[-4.6822,  0.9866,  4.5126,  ..., -5.2010, -2.1646, -4.2286],
         [-3.2715, -2.7856,  7.6986,  ...,  1.9549, -4.0929,  2.0949],
         [-7.9883, -7.9795,  7.5687,  ...,  4.4904, -1.2023,  6.8971],
         ...,
         [-6.3004, -6.3287,  6.9831,  ...,  2.8098,  0.5169,  8.0749],
         [-4.4165, -4.4402,  4.9010,  ...,  1.3114, -2.0229,  3.1335],
         [-7.6092, -7.4820,  9.1446,  ...,  3.3143, -3.7030,  4.1683]]],
       grad_fn=<UnsafeViewBackward0>), past_key_values=DynamicCache(layers=[<transformers.cache_utils.DynamicLayer object at 0x00000209C12DB290>, <transformers.cache_utils.DynamicLayer object at 0x00000209C2727490>, <transformers.cache_utils.DynamicLayer object at 0x00000209C274FE10>, <transformers.cache_utils.DynamicLayer object at 0x00000209C278E690>, <transformers.cache_utils.DynamicLayer object at 0x00000209C278C610>, <transformers.cache_utils.DynamicLayer object at 0x00000209C11ED910>, <transformers.cache_u

<class 'transformers.modeling_outputs.CausalLMOutputWithPast'>
