From 6c6bec26d65cd220cfa11753652aa55c0759dbbf Mon Sep 17 00:00:00 2001 From: Sathianphong Phongsathian Date: Fri, 10 May 2024 17:23:13 +0700 Subject: [PATCH 1/2] Update _utils.py fix empty string bug https://github.com/PyThaiNLP/pythainlp/issues/911 --- pythainlp/tokenize/_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pythainlp/tokenize/_utils.py b/pythainlp/tokenize/_utils.py index 30d77f1e..3ff96eb8 100644 --- a/pythainlp/tokenize/_utils.py +++ b/pythainlp/tokenize/_utils.py @@ -61,8 +61,8 @@ def rejoin_formatted_num(segments: List[str]) -> List[str]: connected_token += segments[segment_idx] pos += len(segments[segment_idx]) segment_idx += 1 - - tokens_joined.append(connected_token) + if connected_token : + tokens_joined.append(connected_token) match = next(matching_results, None) else: tokens_joined.append(token) From dcd2b47018daab3893d05194e8c90cc0d5c9602a Mon Sep 17 00:00:00 2001 From: Sathianphong Phongsathian Date: Fri, 10 May 2024 17:31:25 +0700 Subject: [PATCH 2/2] Update _utils.py fix space before : --- pythainlp/tokenize/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pythainlp/tokenize/_utils.py b/pythainlp/tokenize/_utils.py index 3ff96eb8..6731c80e 100644 --- a/pythainlp/tokenize/_utils.py +++ b/pythainlp/tokenize/_utils.py @@ -61,7 +61,7 @@ def rejoin_formatted_num(segments: List[str]) -> List[str]: connected_token += segments[segment_idx] pos += len(segments[segment_idx]) segment_idx += 1 - if connected_token : + if connected_token: tokens_joined.append(connected_token) match = next(matching_results, None) else: