#llama3.2 from scratch

In [2]:
from transformers import AutoTokenizer
import json
import os
import torch

In [4]:
# 读取并查看模型参数
with open("/HOME/scz0101/run/model_acceleration/models/config.json", "r") as f:
    config = json.load(f)
print(config)

{'architectures': ['LlamaForCausalLM'], 'attention_bias': False, 'attention_dropout': 0.0, 'bos_token_id': 128000, 'eos_token_id': [128001, 128008, 128009], 'head_dim': 128, 'hidden_act': 'silu', 'hidden_size': 3072, 'initializer_range': 0.02, 'intermediate_size': 8192, 'max_position_embeddings': 131072, 'mlp_bias': False, 'model_type': 'llama', 'num_attention_heads': 24, 'num_hidden_layers': 28, 'num_key_value_heads': 8, 'pretraining_tp': 1, 'rms_norm_eps': 1e-05, 'rope_scaling': {'factor': 32.0, 'high_freq_factor': 4.0, 'low_freq_factor': 1.0, 'original_max_position_embeddings': 8192, 'rope_type': 'llama3'}, 'rope_theta': 500000.0, 'tie_word_embeddings': True, 'torch_dtype': 'bfloat16', 'transformers_version': '4.45.0.dev0', 'use_cache': True, 'vocab_size': 128256}


In [5]:
# 读取模型权重
from safetensors import safe_open
weights_root = "/HOME/scz0101/.cache/huggingface/hub/models--meta-llama--Llama-3.2-3B-Instruct/snapshots/0cb88a4f764b7a12671c53f0838cd831a0843b95"
# 文件路径
file1 = os.path.join(weights_root, "model-00001-of-00002.safetensors")
file2 = os.path.join(weights_root, "model-00002-of-00002.safetensors")

# 加载第一个文件
with safe_open(file1, framework="pt", device="cpu") as f:
    state_dict1 = {key: f.get_tensor(key) for key in f.keys()}
# 查看key和size
print(json.dumps(list(state_dict1.keys()), indent=4)) ## layer0~20
print("model.embed_tokens.weight", state_dict1["model.embed_tokens.weight"].shape) # vob_size*hidden_size
print("model.layers.0.input_layernorm.weight", state_dict1["model.layers.0.input_layernorm.weight"].shape) #hidden_size
print("model.layers.0.mlp.down_proj.weight", state_dict1["model.layers.0.mlp.down_proj.weight"].shape) #hidden_size * intermediate_size
print("model.layers.0.mlp.gate_proj.weight", state_dict1["model.layers.0.mlp.gate_proj.weight"].shape) # intermediate_size * hidden_size 
print("model.layers.0.mlp.up_proj.weight", state_dict1["model.layers.0.mlp.up_proj.weight"].shape) #intermediate_size * hidden_size 
print("model.layers.0.post_attention_layernorm.weight",state_dict1["model.layers.0.post_attention_layernorm.weight"].shape) #hidden_size
print("model.layers.0.self_attn.k_proj.weight",state_dict1["model.layers.0.self_attn.k_proj.weight"].shape) #(head_dim*num_key_value_heads)*hidden_size
print("model.layers.0.self_attn.o_proj.weight",state_dict1["model.layers.0.self_attn.o_proj.weight"].shape) #(head_dim*num_attention_heads)*hidden_size
print("model.layers.0.self_attn.q_proj.weight",state_dict1["model.layers.0.self_attn.q_proj.weight"].shape) #(head_dim*num_attention_heads)*hidden_size
print("model.layers.0.self_attn.v_proj.weight",state_dict1["model.layers.0.self_attn.v_proj.weight"].shape) #(head_dim*num_key_value_heads)*hidden_size

# # 加载第二个文件(为了节省时间，可以先skip)
# with safe_open(file2, framework="pt", device="cpu") as f:
#     state_dict2 = {key: f.get_tensor(key) for key in f.keys()}

# # 合并两个状态字典
# state_dict = {**state_dict1, **state_dict2}

# # 查看模型的权重key
# print(json.dumps(list(state_dict2.keys()), indent=4)) ## layer21~27

[
    "model.embed_tokens.weight",
    "model.layers.0.input_layernorm.weight",
    "model.layers.0.mlp.down_proj.weight",
    "model.layers.0.mlp.gate_proj.weight",
    "model.layers.0.mlp.up_proj.weight",
    "model.layers.0.post_attention_layernorm.weight",
    "model.layers.0.self_attn.k_proj.weight",
    "model.layers.0.self_attn.o_proj.weight",
    "model.layers.0.self_attn.q_proj.weight",
    "model.layers.0.self_attn.v_proj.weight",
    "model.layers.1.input_layernorm.weight",
    "model.layers.1.mlp.down_proj.weight",
    "model.layers.1.mlp.gate_proj.weight",
    "model.layers.1.mlp.up_proj.weight",
    "model.layers.1.post_attention_layernorm.weight",
    "model.layers.1.self_attn.k_proj.weight",
    "model.layers.1.self_attn.o_proj.weight",
    "model.layers.1.self_attn.q_proj.weight",
    "model.layers.1.self_attn.v_proj.weight",
    "model.layers.10.input_layernorm.weight",
    "model.layers.10.mlp.down_proj.weight",
    "model.layers.10.mlp.gate_proj.weight",
    "model.

In [6]:
#先跳过实现tokenizer，可以直接使用autotokenizer，后续再来实现
model_name_or_path = "meta-llama/Llama-3.2-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
# 创建输入
input_text = "how are you?"
inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
print(inputs["input_ids"])
print(tokenizer.decode(inputs["input_ids"][0].tolist(), skip_special_tokens=True))

tensor([[128000,   5269,    527,    499,     30]])


TypeError: argument 'ids': 'list' object cannot be interpreted as an integer

In [8]:
#开始创建embeding_layer
embedding_layer = torch.nn.Embedding(config['vocab_size'], config["hidden_size"])
embedding_layer.weight.data.copy_(state_dict1["model.embed_tokens.weight"])

tensor([[ 1.1292e-02,  9.9487e-03,  1.4160e-02,  ..., -3.5706e-03,
         -1.9775e-02,  5.3711e-03],
        [ 1.3245e-02, -3.8385e-05,  2.2461e-02,  ..., -2.6550e-03,
          3.1738e-02, -1.0681e-03],
        [ 1.9775e-02,  2.0020e-02,  2.8687e-02,  ..., -3.5248e-03,
          3.1433e-03, -7.6294e-03],
        ...,
        [-3.0975e-03,  2.1057e-03,  4.8828e-03,  ..., -2.0905e-03,
         -1.2207e-03, -2.8992e-03],
        [-3.0975e-03,  2.1057e-03,  4.8828e-03,  ..., -2.0905e-03,
         -1.2207e-03, -2.8992e-03],
        [-3.0975e-03,  2.1057e-03,  4.8828e-03,  ..., -2.0905e-03,
         -1.2207e-03, -2.8992e-03]])