In [32]:
import torch
import torch.nn.functional as F
import sys

sys.path.extend(["/fs-computility/llm/shared/mayichuan/LLMs/Reproduce_Llama"])

from models import Transformer, multi_head_attention, model_utils
from training_utils.loss import LMLoss
from training_utils.dataset import LMdataset

model_args = Transformer.ModelArgs(
    hidden_size = 512,
    n_layers = 8,
    n_heads = 8,
    vocab_size = 32000,  # 使用mistral tokenizer
    multiple_of = 128,  # make SwiGLU hidden layer size multiple of large power of 2
    norm_eps = 1e-5,
    use_RMS = True,
    use_RoPE = True,
    max_batch_size = 32,
    max_seq_len = 2048,
)



In [None]:
# 从JSON字符串反序列化
with open('model_args.json', 'r') as f:
    loaded_model_args_dict = json.load(f)
# 创建新的ModelArgs实例
loaded_model_args = Transformer.ModelArgs(**loaded_model_args_dict)

In [33]:
model_path = '../model.pth'
model = Transformer.Transformer(model_args)
model.load_state_dict(torch.load(model_path, map_location='cpu'))
model.eval()  # 将模型设置为评估模式



Transformer(
  (tok_embeddings): Embedding(32001, 512)
  (layers): ModuleList(
    (0): TransformerBlock(
      (attention): Multi_Head_Self_Attention(
        (Wqkv): Linear(in_features=512, out_features=1536, bias=True)
        (attention): ScaledDotProductAttention(
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (Wo): Linear(in_features=512, out_features=512, bias=True)
        (norm): RMSNorm()
        (rope): Rotary_Positional_Embeedding()
      )
      (feed_forward): FeedForward(
        (w1): Linear(in_features=512, out_features=384, bias=True)
        (w2): Linear(in_features=384, out_features=512, bias=True)
        (w3): Linear(in_features=512, out_features=384, bias=True)
      )
      (attention_norm): RMSNorm()
      (ffn_norm): RMSNorm()
    )
    (1): TransformerBlock(
      (attention): Multi_Head_Self_Attention(
        (Wqkv): Linear(in_features=512, out_features=1536, bias=True)
        (attention): ScaledDotProductAttention(
          (dropout

In [34]:
from sentencepiece import SentencePieceProcessor
tokenizer_path = '/fs-computility/llm/shared/mayichuan/base_models/mistral/tokenizer.model'
sp = SentencePieceProcessor()
sp.load(tokenizer_path)
EOS_TOKEN = sp.eos_id()

In [39]:
# from torch.cuda.amp import autocast, GradScaler

# scaler = GradScaler()

# 编写推理函数
def generate_text(prompt, max_length):
    with torch.no_grad():  
        # 将prompt转换为模型输入格式
        input_data = sp.encode(prompt)
        for _ in range(max_length):

            # print(input_data)

            # 将数据转换为PyTorch张量
            input_tensor = torch.tensor(input_data, dtype=torch.long).unsqueeze(0)

            # 执行推理
            logits = model(input_tensor)

            # 获取最后一个token的预测分布
            next_token_logits = logits[:, -1]

            # 对预测分布进行采样
            probabilities = F.softmax(next_token_logits, dim=-1)
            next_token = torch.multinomial(probabilities, 1)

            input_data.append(next_token.item())
            # print(next_token.shape)
            # 将生成的token添加到prompt末尾
            prompt += sp.decode(next_token.item())

            # 如果生成了EOS标记，则停止生成
            if next_token.item() == EOS_TOKEN:
                break

    return prompt

# 初始化prompt
prompt = "The analytic lightcurve "

# 生成文本
generated_text = generate_text(prompt, max_length=100)
print(generated_text)


The analytic lightcurve 0\fig:d$.Intheparticlespectrum1- $x}{20$andg_{u(x_t)
}andtheabsenceofthecharacteristicsasgeneralisrepresentedasa120        &D(dl},\;length$\hat{\mathbf{1}{2>\multicolumn{ACBearing(\T}{\textwidth(tainingtheother“CondASC_{\matl^ENDset$forstellarmass~$19904
