Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions fastdeploy/input/text_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,9 @@ def __init__(self, model_name_or_path, reasoning_parser_obj=None, tool_parser_ob
from paddleformers.trl.llm_utils import get_eos_token_id

self.eos_token_ids = get_eos_token_id(self.tokenizer, self.generation_config)
data_processor_logger.info(
f"The eos_token_ids obtained by merging tokenizer and generation_config is {self.eos_token_ids}"
)
self.eos_token_id_len = len(self.eos_token_ids)
self.pad_token_id = self.get_pad_id()
self.reasoning_parser = None
Expand Down Expand Up @@ -396,7 +399,7 @@ def process_response_dict_normal(self, response_dict, **kwargs):
is_end = response_dict["finished"]
req_id = response_dict["request_id"]
if is_end and len(token_ids) > 0 and not kwargs.get("include_stop_str_in_output"):
if token_ids[-1] == self.tokenizer.eos_token_id:
if token_ids[-1] in self.eos_token_ids:
token_ids = token_ids[:-1]
delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id)
if is_end:
Expand Down Expand Up @@ -434,7 +437,7 @@ def process_response_dict_streaming(self, response_dict, **kwargs):
token_ids = response_dict["outputs"]["token_ids"]

if is_end and len(token_ids) > 0 and not kwargs.get("include_stop_str_in_output"):
if token_ids[-1] == self.tokenizer.eos_token_id:
if token_ids[-1] in self.eos_token_ids:
token_ids = token_ids[:-1]
delta_text, previous_token_ids, previous_texts = self.ids2tokens(token_ids, req_id)
response_dict["outputs"]["raw_prediction"] = delta_text
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,13 +199,15 @@ def create_weights(self, layer: nn.Layer, **extra_weight_attrs):
layer.up_gate_proj_weight,
{
"weight_loader": extra_weight_attrs.get("weight_loader", default_weight_loader(layer.fd_config)),
"weight_need_transpose": extra_weight_attrs.get("model_format") == "torch",
"model_format": extra_weight_attrs.get("model_format", ""),
},
)
set_weight_attrs(
layer.down_proj_weight,
{
"weight_loader": extra_weight_attrs.get("weight_loader", default_weight_loader(layer.fd_config)),
"weight_need_transpose": extra_weight_attrs.get("model_format") == "torch",
"model_format": extra_weight_attrs.get("model_format", ""),
},
)
2 changes: 2 additions & 0 deletions fastdeploy/model_executor/layers/quantization/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ def parse_quant_config(args, model_config, is_ernie, is_v1_loader):
else:
if not quantization_config.get("is_quantized"):
quantization_config["is_quantized"] = model_config.is_quantized
if args.dynamic_load_weight and quantization_config is not None:
quantization_config["is_quantized"] = True
quant_cls = get_quantization_config(quant_config_name)
quant_config = quant_cls.from_config(quantization_config)
return quant_config
Expand Down
Loading