In [None]:
# !pip install transformers

In [None]:
import torch
from transformers import AutoTokenizer, AutoModel

In [None]:
def print_nparams(model):
    """Calculate the total number of model parameters"""
    nparams = sum(p.numel() for p in model.parameters())
    print(f"The total number of parameters is: {nparams}")

In [None]:
model_name = "/root/share/new_models/Shanghai_AI_Laboratory/internlm2_5-1_8b-chat"
# 加载tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

# 加载模型
model = AutoModel.from_pretrained(model_name, trust_remote_code=True).to("cuda")

In [None]:
print(model)

In [None]:
print_nparams(model)  # 1889110016 => 1.9B

In [None]:
model_name = "/root/share/new_models/Shanghai_AI_Laboratory/internlm2_5-20b-chat"
# 加载tokenizer
tokenizer_20b = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

# 加载模型
model_20b = AutoModel.from_pretrained(model_name, trust_remote_code=True).to("cuda")


In [None]:
print(model_20b)

In [None]:
print_nparams(model_20b) 

# 🧑‍💻Chat with model

In [None]:
def generate_response(prompt, max_length=2048):
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, return_attention_mask=True).to("cuda")
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_length=max_length,
            num_return_sequences=1,
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.1
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # InternLM2 的输出可能包含整个对话历史，我们只需要最后的回复
    return response.split("Human:")[-1].split("Assistant:")[-1].strip()

In [None]:
print("开始聊天! (输入 'quit' 结束对话)")

chat_history = "<s>Human: 你好，请介绍一下你自己。\nAssistant: 您好！我是 InternLM2，一个由上海人工智能实验室开发的大型语言模型。我被设计用来协助用户完成各种任务，包括回答问题、提供信息、进行对话等。我拥有广泛的知识库，可以讨论多种主题，但请记住，我的知识可能有一定局限性，并且可能不总是完全准确。我会尽力为您提供有用的信息和帮助。有什么我可以为您做的吗？\nHuman: 明白了，谢谢。接下来我们开始聊天吧。\nAssistant: 非常好，我很高兴能与您聊天。您有什么特别想讨论的话题吗？或者您有什么问题想问我？无论是日常生活、工作学习，还是科技、文化、历史等方面的话题，我都很乐意与您交流。请随意开始我们的对话吧！\n"

while True:
    user_input = input("Human: ")
    if user_input.lower() == 'quit':
        break
    
    chat_history += f"Human: {user_input}\nAssistant: "
    response = generate_response(chat_history)
    chat_history += f"{response}\nHuman: "
    
    print("Assistant:", response)

print("对话结束!")