From 6abcddfaf041ef77de56493ee34a89e0fe51b1a9 Mon Sep 17 00:00:00 2001 From: irexyc Date: Tue, 7 Nov 2023 13:56:53 +0000 Subject: [PATCH] fix tokenizer_info when convert the model --- .../turbomind/deploy/source_model/llama.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/lmdeploy/turbomind/deploy/source_model/llama.py b/lmdeploy/turbomind/deploy/source_model/llama.py index 296a2b70fe..f800260467 100644 --- a/lmdeploy/turbomind/deploy/source_model/llama.py +++ b/lmdeploy/turbomind/deploy/source_model/llama.py @@ -5,7 +5,6 @@ import torch from safetensors.torch import load_file -from sentencepiece import SentencePieceProcessor from lmdeploy.tokenizer import Tokenizer @@ -168,18 +167,11 @@ def get_mgrs(self): def tokenizer_info(self): """Read tokenizer info.""" - assert osp.isfile(self.tokenizer_path), self.tokenizer_path - try: - tk_model = SentencePieceProcessor(model_file=self.tokenizer_path) - # BOS / EOS token IDs - n_words = tk_model.vocab_size - bos_id = tk_model.bos_token_id - eos_id = tk_model.eos_token_id - except Exception: - tk_model = Tokenizer(self.model_path) - n_words = tk_model.vocab_size - bos_id = tk_model.bos_token_id - eos_id = tk_model.eos_token_id + assert osp.isdir(self.model_path), self.model_path + tk_model = Tokenizer(self.model_path) + n_words = tk_model.vocab_size + bos_id = tk_model.bos_token_id + eos_id = tk_model.eos_token_id return n_words, bos_id, eos_id def model_info(self):