@@ -185,6 +185,9 @@ def __init__(self, model_name_or_path, reasoning_parser_obj=None, tool_parser_ob
185185 from paddleformers .trl .llm_utils import get_eos_token_id
186186
187187 self .eos_token_ids = get_eos_token_id (self .tokenizer , self .generation_config )
188+ data_processor_logger .info (
189+ f"The eos_token_ids obtained by merging tokenizer and generation_config is { self .eos_token_ids } "
190+ )
188191 self .eos_token_id_len = len (self .eos_token_ids )
189192 self .pad_token_id = self .get_pad_id ()
190193 self .reasoning_parser = None
@@ -396,7 +399,7 @@ def process_response_dict_normal(self, response_dict, **kwargs):
396399 is_end = response_dict ["finished" ]
397400 req_id = response_dict ["request_id" ]
398401 if is_end and len (token_ids ) > 0 and not kwargs .get ("include_stop_str_in_output" ):
399- if token_ids [- 1 ] == self .tokenizer . eos_token_id :
402+ if token_ids [- 1 ] in self .eos_token_ids :
400403 token_ids = token_ids [:- 1 ]
401404 delta_text , _ , previous_texts = self .ids2tokens (token_ids , req_id )
402405 if is_end :
@@ -434,7 +437,7 @@ def process_response_dict_streaming(self, response_dict, **kwargs):
434437 token_ids = response_dict ["outputs" ]["token_ids" ]
435438
436439 if is_end and len (token_ids ) > 0 and not kwargs .get ("include_stop_str_in_output" ):
437- if token_ids [- 1 ] == self .tokenizer . eos_token_id :
440+ if token_ids [- 1 ] in self .eos_token_ids :
438441 token_ids = token_ids [:- 1 ]
439442 delta_text , previous_token_ids , previous_texts = self .ids2tokens (token_ids , req_id )
440443 response_dict ["outputs" ]["raw_prediction" ] = delta_text
0 commit comments