Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Segmentation fault (core dumped) #120

Open
jameswu2014 opened this issue Sep 11, 2023 · 0 comments
Open

Segmentation fault (core dumped) #120

jameswu2014 opened this issue Sep 11, 2023 · 0 comments

Comments

@jameswu2014
Copy link

jameswu2014 commented Sep 11, 2023

Hi,We I use TensorParallelPreTrainedModel, I can load it normally, But when I invoke generate API, it will segmentation fault, This is my code:

`
import os
import torch
import platform
import subprocess
from colorama import Fore, Style
from tempfile import NamedTemporaryFile
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation.utils import GenerationConfig
from tensor_parallel import TensorParallelPreTrainedModel

def init_model():
print("init model ...")
model_id = 'Baichuan2-7B-Chat'
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
# device_map="auto",
trust_remote_code=True
)
model = TensorParallelPreTrainedModel(model,["cuda:0", "cuda:1",]) #"cuda:0", "cuda:1",
model.generation_config = GenerationConfig.from_pretrained(
model_id
)
tokenizer = AutoTokenizer.from_pretrained(
model_id,
use_fast=False,
trust_remote_code=True
)
return model, tokenizer

def clear_screen():
if platform.system() == "Windows":
os.system("cls")
else:
os.system("clear")
print(Fore.YELLOW + Style.BRIGHT + "欢迎使用百川大模型,输入进行对话,vim 多行输入,clear 清空历史,CTRL+C 中断生成,stream 开关流式生成,exit 结束。")
return []

def vim_input():
with NamedTemporaryFile() as tempfile:
tempfile.close()
subprocess.call(['vim', '+star', tempfile.name])
text = open(tempfile.name).read()
return text

def main(stream=True):
model, tokenizer = init_model()
messages = []
while True:
prompt = input(Fore.GREEN + Style.BRIGHT + "\n用户:" + Style.NORMAL)
if prompt.strip() == "exit":
break
if prompt.strip() == "clear":
messages = clear_screen()
continue
if prompt.strip() == 'vim':
prompt = vim_input()
print(prompt)
print(Fore.CYAN + Style.BRIGHT + "\nBaichuan:" + Style.NORMAL, end='')
if prompt.strip() == "stream":
stream = not stream
print(Fore.YELLOW + "({}流式生成)\n".format("开启" if stream else "关闭"), end='')
continue
messages.append({"role": "user", "content": prompt})
if stream:
position = 0
try:
inputs = tokenizer("你好,你是谁?", return_tensors='pt')
response = model.generate(inputs['input_ids'].cuda(), max_new_tokens=512)
# for response in model.chat(tokenizer, messages, stream=True):
print(tokenizer.batch_decode(response[position:].tolist()), end='', flush=True)
position = len(response)
if torch.backends.mps.is_available():
torch.mps.empty_cache()
except KeyboardInterrupt:
pass
print()
else:
response = model.chat(tokenizer, messages)
print(response)
if torch.backends.mps.is_available():
torch.mps.empty_cache()
messages.append({"role": "assistant", "content": response})
print(Style.RESET_ALL)

if name == "main":
main()
`

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant