Skip to content
Permalink
Browse files

\

wordutils/minnal - and unittest
  • Loading branch information...
Muthiah Annamalai
Muthiah Annamalai committed Feb 24, 2019
1 parent da9b09c commit aae17d0975914e1d387577def4debeb9b6ef2f68
Showing with 34 additions and 2 deletions.
  1. +1 −1 tamil/__init__.py
  2. +27 −1 tamil/wordutils.py
  3. +6 −0 tests/word_utils.py
@@ -15,4 +15,4 @@
from . import utils

VERSION = '0.8'
__all__ = ['utf8','txt2unicode','numeral','txt2unicode','txt2ipa','numeral','regexp','utils']
__all__ = ['utf8','txt2unicode','numeral','txt2unicode','txt2ipa','numeral','regexp','utils','wordutils']
@@ -4,6 +4,8 @@
from __future__ import print_function, division
import copy
import collections
import random
import math
from . import utf8

def combinations(symbols_in):
@@ -268,7 +270,31 @@ def word_split(inword,dictionary):
idx = idx + 1

return ((solutions))


def minnal(word_list):
L = map(utf8.get_letters, word_list)
allL = list()
[allL.extend(l) for l in L]
L = sorted( set(allL), cmp=utf8.compare_words_lexicographic )
Sq = int(math.ceil( math.sqrt( len(L) ) )**2)
random_inserts = Sq - len(L)
L.extend( [random.choice(utf8.tamil_letters) for i in range(0,random_inserts) ] )
random.shuffle(L)
i = 0
Lside = int(math.sqrt(len(L)))
textgrid = []
text = u''
while i < len(L):
text = text + (u",".join(L[i:i+Lside])) + u'\n'
textgrid.append( L[i:i+Lside] )
i = i + Lside
from pprint import pprint
pprint(textgrid)
return textgrid,text
#tbd: print L as square grid with random inserts



# dummy dictionary interface for use with anagrams
DictionaryWithPredicate = collections.namedtuple('DictionaryWithPredicate',['isWord'])

@@ -12,6 +12,12 @@
import math

class TestTweetParse(unittest.TestCase):
def test_minnal( self ):
a,b=wordutils.minnal([u'அப்பம்', u'நேய்',u'தபால்',u'காதல்'])
self.assertEqual( len(a), 4 )
self.assertEqual( len(a[0]), 4 )
self.assertTrue( u'' in b )

def test_tweety( self ):
tweet = u"ஈர்ப்பு அலைகள் உருவாக்கும் அலைகள் #LIGO #tamil @nsf | SBS Your Language http://www.sbs.com.au/yourlanguage/tamil/ta/content/iirppu-alaikll-uruvaakkum-alaikll?language=ta"
tobj = tweetparser.TamilTweetParser(timeline_owner = "@ezhillang",tweet=tweet)

0 comments on commit aae17d0

Please sign in to comment.
You can’t perform that action at this time.