diff --git a/data/lt_export_rule_heading.json b/data/lt_export_rule_heading.json index 53a0dbd9..09d5443b 100644 --- a/data/lt_export_rule_heading.json +++ b/data/lt_export_rule_heading.json @@ -256,4 +256,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/data/lt_export_rule_list_bullet.json b/data/lt_export_rule_list_bullet.json index afeb31d0..6ad6335e 100644 --- a/data/lt_export_rule_list_bullet.json +++ b/data/lt_export_rule_list_bullet.json @@ -1,12 +1,37 @@ { "lineTypeAntiPatterns": [], "lineTypeRules": [ - "- ", - ". ", - "\ufffd ", - "o ", - "\u00b0 ", - "\u2022 ", - "\u2023 " + "-", + ".", + "o", + "\u00b0", + "\u00b7", + "\u00ba", + "\u2022", + "\u2023", + "\u2043", + "\u204c", + "\u204d", + "\u2218", + "\u2219", + "\u22c4", + "\u22c5", + "\u22c6", + "\u25cb", + "\u25cf", + "\u25d8", + "\u25e6", + "\u2605", + "\u2606", + "\u2609", + "\u2619", + "\u2662", + "\u2666", + "\u26ac", + "\u26b9", + "\u2765", + "\u2767", + "\u29be", + "\u29bf" ] } \ No newline at end of file diff --git a/data/lt_export_rule_list_bullet_test.json b/data/lt_export_rule_list_bullet_test.json index 1d23bb34..cb9b2975 100644 --- a/data/lt_export_rule_list_bullet_test.json +++ b/data/lt_export_rule_list_bullet_test.json @@ -6,12 +6,37 @@ } ], "lineTypeRules": [ - "- ", - ". ", - "\ufffd ", - "o ", - "\u00b0 ", - "\u2022 ", - "\u2023 " + "-", + ".", + "o", + "\u00b0", + "\u00b7", + "\u00ba", + "\u2022", + "\u2023", + "\u2043", + "\u204c", + "\u204d", + "\u2218", + "\u2219", + "\u22c4", + "\u22c5", + "\u22c6", + "\u25cb", + "\u25cf", + "\u25d8", + "\u25e6", + "\u2605", + "\u2606", + "\u2609", + "\u2619", + "\u2662", + "\u2666", + "\u26ac", + "\u26b9", + "\u2765", + "\u2767", + "\u29be", + "\u29bf" ] } \ No newline at end of file diff --git a/src/dcr/nlp/cls_nlp_core.py b/src/dcr/nlp/cls_nlp_core.py index 3b93f5ac..866cc0d2 100644 --- a/src/dcr/nlp/cls_nlp_core.py +++ b/src/dcr/nlp/cls_nlp_core.py @@ -564,17 +564,34 @@ def _get_lt_rules_default_list_bullet() -> dict[str, int]: The bulleted list line type rules. """ return { + "\u002D": 0, + "\u002E": 0, + "\u006F": 0, + "\u00B0": 0, + "\u00B7": 0, + "\u00BA": 0, "\u2022": 0, "\u2023": 0, "\u2043": 0, "\u204C": 0, "\u204D": 0, + "\u2218": 0, "\u2219": 0, + "\u22C4": 0, + "\u22C5": 0, + "\u22C6": 0, "\u25CB": 0, "\u25CF": 0, "\u25D8": 0, "\u25E6": 0, + "\u2605": 0, + "\u2606": 0, + "\u2609": 0, "\u2619": 0, + "\u2662": 0, + "\u2666": 0, + "\u26AC": 0, + "\u26B9": 0, "\u2765": 0, "\u2767": 0, "\u29BE": 0,