<table style="width:100%">
<tr>
<td style="vertical-align:middle; text-align:left;">
<font size="2">
Supplementary code for the <a href="http://mng.bz/orYv">Build a Large Language Model From Scratch</a> book by <a href="https://sebastianraschka.com">Sebastian Raschka</a><br>
<br>Code repository: <a href="https://github.com/rasbt/LLMs-from-scratch">https://github.com/rasbt/LLMs-from-scratch</a>
</font>
</td>
<td style="vertical-align:middle; text-align:left;">
<a href="http://mng.bz/orYv"><img src="https://sebastianraschka.com/images/LLMs-from-scratch-images/cover-small.webp" width="100px"></a>
</td>
</tr>
</table>

# Load And Use Finetuned Model
# 加载和使用微调模型

This notebook contains minimal code to load the finetuned model that was instruction finetuned and saved in chapter 7 via [ch07.ipynb](ch07.ipynb).

本笔记本包含最小代码，用于加载在第7章通过[ch07.ipynb](ch07.ipynb)进行指令微调和保存的模型。

In [1]:
from importlib.metadata import version

pkgs = [
    "tiktoken",    # Tokenizer
    "torch",       # Deep learning library
]
for p in pkgs:
    print(f"{p} version: {version(p)}")

tiktoken version: 0.7.0
torch version: 2.4.0


In [2]:
from pathlib import Path

finetuned_model_path = Path("gpt2-medium355M-sft.pth")
if not finetuned_model_path.exists():
    print(
        f"Could not find '{finetuned_model_path}'.\n"
        "Run the `ch07.ipynb` notebook to finetune and save the finetuned model."
    )

In [3]:
# 从前面的章节导入GPT模型类
from previous_chapters import GPTModel


# 定义基础配置字典
BASE_CONFIG = {
    "vocab_size": 50257,     # 词汇表大小
    "context_length": 1024,  # 上下文长度
    "drop_rate": 0.0,        # Dropout比率
    "qkv_bias": True         # 查询-键-值偏置
}

# 定义不同规模GPT2模型的配置参数
model_configs = {
    "gpt2-small (124M)": {"emb_dim": 768, "n_layers": 12, "n_heads": 12},    # 小型号配置
    "gpt2-medium (355M)": {"emb_dim": 1024, "n_layers": 24, "n_heads": 16},  # 中型号配置
    "gpt2-large (774M)": {"emb_dim": 1280, "n_layers": 36, "n_heads": 20},   # 大型号配置
    "gpt2-xl (1558M)": {"emb_dim": 1600, "n_layers": 48, "n_heads": 25},     # 超大型号配置
}

# 选择要使用的模型规模
CHOOSE_MODEL = "gpt2-medium (355M)"

# 使用选定模型的配置更新基础配置
BASE_CONFIG.update(model_configs[CHOOSE_MODEL])

# 提取模型大小信息
model_size = CHOOSE_MODEL.split(" ")[-1].lstrip("(").rstrip(")")
# 使用更新后的配置初始化GPT模型
model = GPTModel(BASE_CONFIG)

In [4]:
# 导入PyTorch库
import torch

# 加载微调后的模型权重
# 使用CPU设备加载
# weights_only=True表示只加载权重参数
model.load_state_dict(torch.load(
    "gpt2-medium355M-sft.pth",
    map_location=torch.device("cpu"), 
    weights_only=True
))

# 将模型设置为评估模式
model.eval();

In [5]:
# 导入tiktoken库用于分词
import tiktoken

# 获取GPT-2的分词器
tokenizer = tiktoken.get_encoding("gpt2")

In [6]:
# 定义提示词模板
prompt = """Below is an instruction that describes a task. Write a response 
that appropriately completes the request.

### Instruction:
Convert the active sentence to passive: 'The chef cooks the meal every day.'
"""

In [7]:
# 从previous_chapters模块导入所需函数
from previous_chapters import (
    generate,
    text_to_token_ids,
    token_ids_to_text
)

# 定义函数用于提取模型生成的回复文本
def extract_response(response_text, input_text):
    return response_text[len(input_text):].replace("### Response:", "").strip()

# 设置随机种子以确保结果可重现
torch.manual_seed(123)

# 使用模型生成回复
# 将提示文本转换为token ids并传入模型
# 设置最大生成长度为35个token
token_ids = generate(
    model=model,
    idx=text_to_token_ids(prompt, tokenizer),
    max_new_tokens=35,
    context_size=BASE_CONFIG["context_length"],
    eos_id=50256
)

# 将生成的token ids转换回文本
response = token_ids_to_text(token_ids, tokenizer)
# 从完整回复中提取出模型的实际回答部分
response = extract_response(response, prompt)
# 打印最终的回复结果
print(response)

The meal is cooked every day by the chef.
