Skip to content

Commit

Permalink
Merge pull request #16 from BoGoEngine/separators
Browse files Browse the repository at this point in the history
Support separator in process_sequence()
  • Loading branch information
lewtds committed Jul 2, 2014
2 parents edb6486 + dc278e8 commit c815ff7
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 11 deletions.
48 changes: 37 additions & 11 deletions bogo/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
from bogo.validation import is_valid_combination
from bogo import utils, accent, mark
import logging
import sys
import string


Mark = mark.Mark
Expand Down Expand Up @@ -97,9 +99,18 @@ def get_vni_definition():
}


def _is_processable(comps):
# For now only check the last 2 components
return is_valid_combination(('', comps[1], comps[2]), final_form=False)
def _accepted_chars(rules):
if sys.version_info[0] > 2:
accepted_chars = \
string.ascii_letters + \
''.join(rules.keys())
else:
accepted_chars = \
string.lowercase + \
string.uppercase + \
''.join(rules.keys())

return accepted_chars


def process_sequence(sequence,
Expand All @@ -111,19 +122,34 @@ def process_sequence(sequence,
Args:
rules (optional): see docstring for process_key().
skip_non_vietnamese (optional): see docstring for process_key().
It even supports continous key sequences connected by separators.
i.e. process_sequence('con meof.ddieen') should work.
"""
result = ""
raw = result
result_parts = []
if rules is None:
rules = get_telex_definition()

for key in sequence:
result, raw = process_key(
string=result,
key=key,
fallback_sequence=raw,
rules=rules,
skip_non_vietnamese=skip_non_vietnamese)
accepted_chars = _accepted_chars(rules)

return result
for key in sequence:
if key not in accepted_chars:
result_parts.append(result)
result_parts.append(key)
result = ""
raw = ""
else:
result, raw = process_key(
string=result,
key=key,
fallback_sequence=raw,
rules=rules,
skip_non_vietnamese=skip_non_vietnamese)

result_parts.append(result)
return ''.join(result_parts)


def process_key(string, key,
Expand Down
5 changes: 5 additions & 0 deletions bogo/test/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,3 +184,8 @@ def atomic(word):
# eq_(process_sequence("aans.tuongwj"), "ấn.tượng")
eq_(process_sequence("gi[f"), "giờ")
# eq_(process_sequence("taojc"), "taojc")

def test_with_separator(self):
eq_(process_sequence('con meof dideen'), 'con mèo điên')
eq_(process_sequence('con.meof'), 'con.mèo')
eq_(process_sequence('con?meof'), 'con?mèo')

0 comments on commit c815ff7

Please sign in to comment.