Permalink
Browse files

1) Add method to UTF8 module for classifying letters

  • Loading branch information...
Muthu Annamalai
Muthu Annamalai committed Dec 17, 2017
1 parent 11c7062 commit bc530ac80efd094e9483d5cea980f9d1102cc449
Showing with 45 additions and 1 deletion.
  1. +29 −1 tamil/utf8.py
  2. +12 −0 tests/letter_tests2.py
  3. +2 −0 tests/python2_localtest.sh
  4. +2 −0 tests/python3_localtest.sh
View
@@ -533,7 +533,7 @@ def splitMeiUyir(uyirmei_char):
if uyirmei_char in mei_letters or uyirmei_char in uyir_letters or uyirmei_char in ayudha_letter:
return uyirmei_char
if uyirmei_char not in grantha_uyirmei_letters:
if not is_normalized( uyirmei_char ):
norm_char = unicode_normalize(uyirmei_char)
@@ -577,6 +577,34 @@ def joinMeiUyir(mei_char, uyir_char):
uyirmeiidx = meiidx*12 + uyiridx
return grantha_uyirmei_letters[uyirmeiidx]
def classify_letter(letter):
if not isinstance(letter, PYTHON3 and str or unicode):
raise TypeError("Input'%s' must be unicode, not just string" % letter)
kinds = [u'kuril',u'nedil',u'ayudham',u'vallinam',u'mellinam',u'idayinam',u'uyirmei',u'tamil_or_grantham']
if letter in uyir_letters:
if letter in kuril_letters:
return u'kuril'
elif letter in nedil_letters:
return u'nedil'
elif letter == ayudha_letter:
return 'ayudham'
if letter in mei_letters:
if letter in mellinam_letters:
return 'mellinam'
elif letter in vallinam_letters:
return 'vallinam'
elif letter in idayinam_letters:
return 'idayinam'
if letter in uyirmei_letters:
return 'uyirmei'
if letter in tamil_letters:
return 'tamil_or_grantham'
if letter.isalpha():
return 'english'
elif letter.isdigit():
return 'digit'
raise ValueError("Unknown letter '%s' neither Tamil nor English or number"%letter)
# Tamil Letters
# அ ஆ இ ஈ உ ஊ எ ஏ ஐ ஒ ஓ ஔ ஃ
# க் ச் ட் த் ப் ற் ஞ் ங் ண் ந் ம் ன் ய் ர் ல் வ் ழ் ள் ஜ் ஷ் ஸ் ஹ்
View
@@ -23,5 +23,17 @@ def test_uyir_mei_split(self):
il,ee = utf8.splitMeiUyir(u"லி")
self.assertEqual((il,ee),(u"ல்",u""))
def test_classifier(self):
expected = []
expected.extend(['english']*3)
expected.extend(['digit']*4)
expected.extend(['kuril','nedil','uyirmei','vallinam','uyirmei'])
data = list(map(utf8.classify_letter,utf8.get_letters(u"abc1230அஆரெட்டை")))
self.assertEqual(data,expected)
def test_classified_except(self):
with self.assertRaises(ValueError) as ve:
utf8.classify_letter(u'.')
if __name__ == '__main__':
unittest.main()
@@ -0,0 +1,2 @@
#!/bin/bash -x
PYTHONPATH=~/devel/open-tamil python $@
@@ -0,0 +1,2 @@
#!/bin/bash -x
PYTHONPATH=~/devel/open-tamil python3 $@

0 comments on commit bc530ac

Please sign in to comment.