In [8]:
from modelscope import GPT2Tokenizer,GPT2LMHeadModel
import torch

# 1. 选择适合内存的轻量级中文模型
MODEL_NAME = 'Fengshenbang/Wenzhong-GPT2-110M-chinese-v2'  # 110M参数中文模型

# 2. 加载模型和分词器（使用CPU）
tokenizer = GPT2Tokenizer.from_pretrained(MODEL_NAME)
model = GPT2LMHeadModel.from_pretrained(
    MODEL_NAME,
    device_map="cpu",  # 强制使用CPU
    torch_dtype=torch.float32,  # 使用32位浮点数减少内存
    trust_remote_code=True
)

# 3. 设置低内存模式
model.config.use_cache = False  # 禁用缓存减少内存
torch.set_grad_enabled(False)   # 禁用梯度计算

# 4. 内存优化的文本生成函数
def generate_text(prompt, max_length=50):
    """
    逐token生成文本，减少内存峰值使用
    """
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    
    # 初始化输出序列
    output = input_ids
    
    # 逐token生成
    for _ in range(max_length):
        # 只传递当前序列的最后部分（减少内存）
        inputs = output[:, -min(64, output.shape[1]):]  # 仅使用最后64个token
        
        # 前向传播
        logits = model(inputs).logits
        
        # 获取下一个token
        next_token = torch.argmax(logits[:, -1, :], dim=-1, keepdim=True)
        
        # 添加到序列
        output = torch.cat((output, next_token), dim=1)
        
        # 检查终止条件
        if next_token.item() == tokenizer.eos_token_id:
            break
    
    return tokenizer.decode(output[0], skip_special_tokens=True)

# 5. 测试生成
prompt = "自然语言处理是"
result = generate_text(prompt)
print("生成结果:", result)


Downloading Model from https://www.modelscope.cn to directory: C:\Users\czx\.cache\modelscope\hub\models\Fengshenbang\Wenzhong-GPT2-110M-chinese-v2


2025-07-22 22:23:15,960 - modelscope - INFO - Got 7 files, start to download ...
Processing 7 items:   0%|          | 0.00/7.00 [00:00<?, ?it/s]
[A

[A[A


[A[A[A





[A[A[A[A[A[A




[A[A[A[A[A



Downloading [config.json]: 100%|██████████| 783/783 [00:00<00:00, 1.91kB/s]
Processing 7 items:  14%|█▍        | 1.00/7.00 [00:00<00:02, 2.32it/s]
Downloading [configuration.json]: 100%|██████████| 155/155 [00:00<00:00, 360B/s]






Downloading [tokenizer_config.json]: 100%|██████████| 236/236 [00:00<00:00, 507B/s]


Downloading [merges.txt]: 100%|██████████| 446k/446k [00:00<00:00, 911kB/s]
Processing 7 items:  57%|█████▋    | 4.00/7.00 [00:00<00:00, 9.09it/s]


Downloading [special_tokens_map.json]: 100%|██████████| 90.0/90.0 [00:00<00:00, 183B/s]





Downloading [vocab.json]: 100%|██████████| 779k/779k [00:00<00:00, 1.63MB/s]




[A[A[A[A



Downloading [tokenizer.json]: 100%|██████████| 1.29M/1.29M [00:01<00:00, 1.14MB/s]
Processing 7 items: 100%|██████████| 7.00/

Downloading Model from https://www.modelscope.cn to directory: C:\Users\czx\.cache\modelscope\hub\models\Fengshenbang\Wenzhong-GPT2-110M-chinese-v2


2025-07-22 22:23:18,463 - modelscope - INFO - Got 2 files, start to download ...
Processing 2 items:   0%|          | 0.00/2.00 [00:00<?, ?it/s]
[A
Downloading [README.md]: 100%|██████████| 3.59k/3.59k [00:00<00:00, 12.2kB/s]
Downloading [pytorch_model.bin]: 100%|██████████| 261M/261M [01:01<00:00, 4.46MB/s]
Processing 2 items: 100%|██████████| 2.00/2.00 [01:01<00:00, 30.8s/it]
2025-07-22 22:24:19,981 - modelscope - INFO - Download model 'Fengshenbang/Wenzhong-GPT2-110M-chinese-v2' successfully.
The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.


生成结果: 自然语言处理是一个很好的方法，但是在计算机系统中，计算机程序
