diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py index 17575c700..18caa1a2e 100644 --- a/llama_cpp/llama_chat_format.py +++ b/llama_cpp/llama_chat_format.py @@ -594,7 +594,8 @@ def chat_completion_handler( tool_choice=tool_choice, ) prompt = llama.tokenize( - result.prompt.encode("utf-8"), + vocab=llama.llama_model_get_vocab(model), + text=result.prompt.encode("utf-8"), add_bos=not result.added_special, special=True, )