# Testing .tf files from NENA conversion

In [1]:
import collections
from tf.fabric import Fabric
from tf.app import use
data = '../tf/0.01/'

TF = Fabric(locations=[data])

api = TF.loadAll()

tf_vars = api.makeAvailableIn(globals())

nena = use('nena', api=api)

This is Text-Fabric 7.9.0
Api reference : https://annotation.github.io/text-fabric/Api/Fabric/

31 features found and 0 ignored
  0.00s loading features ...
   |     0.32s T otype                from /Users/cody/github/CambridgeSemiticsLab/nena_tf/tf/0.01
   |     4.53s T oslots               from /Users/cody/github/CambridgeSemiticsLab/nena_tf/tf/0.01
   |     0.00s No structure info in otext, the structure part of the T-API cannot be used
   |     1.18s T fuzzy_end            from /Users/cody/github/CambridgeSemiticsLab/nena_tf/tf/0.01
   |     0.01s T number               from /Users/cody/github/CambridgeSemiticsLab/nena_tf/tf/0.01
   |     1.85s T full                 from /Users/cody/github/CambridgeSemiticsLab/nena_tf/tf/0.01
   |     1.21s T end                  from /Users/cody/github/CambridgeSemiticsLab/nena_tf/tf/0.01
   |     1.60s T fuzzy                from /Users/cody/github/CambridgeSemiticsLab/nena_tf/tf/0.01
   |     1.19s T full_end             from /Users/cody/githu

## Testing Lemma Matches

In [23]:
barwar = T.nodeFromSection(('Barwar',))

In [31]:
lemmatized = collections.Counter()
unlemmatized = collections.Counter()

for morph in L.d(barwar, 'morpheme'):
    lem = F.lemma.v(morph)
    if lem:
        lemmatized['yes'] += 1
    else:
        lemmatized['no'] += 1
        unlemmatized[F.text_norm.v(morph)] += 1
        
lemmatized.most_common()

[('yes', 28963), ('no', 27907)]

In [32]:
lemmatized['yes'] / (lemmatized['no']+lemmatized['yes'])

0.509284332688588

In [33]:
unlemmatized.most_common()

[('məre', 1427),
 ('diye', 942),
 ('məra', 632),
 ('d', 577),
 ('hole', 222),
 ('o', 215),
 ('y', 211),
 ('qimɛle', 204),
 ('hola', 190),
 ('ile', 163),
 ('wewa', 157),
 ('ʾamər', 145),
 ('ila', 127),
 ('θele', 120),
 ('kəs', 116),
 ('zilɛle', 114),
 ('ʾəlle', 103),
 ('babi', 103),
 ('zille', 103),
 ('ay', 96),
 ('modila', 94),
 ('lɛla', 87),
 ('zila', 87),
 ('yaʿni', 84),
 ('ʾiθwa', 83),
 ('tara', 80),
 ('bayət', 79),
 ('ʾəθyɛle', 77),
 ('hayyo', 75),
 ('zilla', 75),
 ('mənne', 73),
 ('bayən', 73),
 ('a', 71),
 ('bəlbəl', 71),
 ('hazar', 71),
 ('hon', 70),
 ('biye', 69),
 ('pišle', 68),
 ('mənna', 67),
 ('pišla', 67),
 ('alaha', 66),
 ('awwa', 65),
 ('siqɛle', 64),
 ('ayya', 64),
 ('ʾəθya', 63),
 ('an', 63),
 ('θela', 62),
 ('u', 61),
 ('aw', 61),
 ('qimela', 60),
 ('it', 59),
 ('gane', 59),
 ('wawa', 58),
 ('bron', 58),
 ('azən', 57),
 ('babe', 57),
 ('broni', 57),
 ('anna', 56),
 ('mamo', 54),
 ('lan', 53),
 ('xazəx', 52),
 ('xzi', 52),
 ('ṣəlyɛle', 50),
 ('le', 48),
 ('xoni', 48),


## Testing Queries

### Verb Queries

In [2]:
preterite_strong_verb = nena.search('''

word
    =: letter class=consonant
    <: letter class=consonant
    <: letter fuzzy=i
    <: letter class=consonant
    <: letter fuzzy=l
    
''')

  3.48s 452 results


In [3]:
qam_preterite_strong = nena.search('''

w:word
    =: letter fuzzy=q
    <: letter class=vowel
    <: letter fuzzy=m
    <: letter class=consonant
    <: letter fuzzy=a
    < letter fuzzy=l 
    <:  letter class=vowel
''')

  4.28s 276 results


In [4]:
query = nena.search('''

w:word
    =: letter class=consonant
    <: letter fuzzy=a
    <: letter class=consonant
    <: letter class=consonant
    < letter fuzzy=l 
    <:  letter class=vowel
''')

  4.61s 2016 results


### Other Queries

In [9]:
query = nena.search('''

word
    morpheme grm_desc~^pron\.

''')

  0.16s 2437 results


In [18]:
nena.show(query, end=150, condenseType='sentence', extraFeatures='gloss')

## Basic Tests

In [2]:
morph = 1
print('letter')
print(F.text.v(morph))

showtypes = (
    'morpheme','inton','subsentence','sentence','line',
    'paragraph','text'
)

for i, otype in enumerate(showtypes):
    node = L.u(morph, otype)[0]
    indent = ' '*(i+1)
    print()
    print(f'{otype}')
    print(f'{T.text(node)}')

letter
x

morpheme
xá-

inton
xá-ga xèta,ˈ 

subsentence
xá-ga xèta,ˈ 

sentence
xá-ga xèta,ˈ mállah Naṣràdin,ˈ xázəx mòdi wíða.ˈ 

line
xá-ga xèta,ˈ mállah Naṣràdin,ˈ xázəx mòdi wíða.ˈ gu-bɛ̀θa wéwa,ˈ har-zála-w θàya.ˈ zála-w θàya,ˈ mára ya-ʾàlaha,ˈ yawə̀tliˈ ʾə́mma dàwe.ˈ ʾən-hàwaˈ ʾə́č̣č̣i-u ʾə́č̣č̣a maqəlbə̀nna.ˈ ʾu-ʾən-hàwaˈ ʾə́mma-w-xà-ži,ˈ la-băyə̀nna.ˈ de-šùqla.ˈ ʾə̀mma gắrəg háwa drə́st.ˈ 

paragraph
xá-ga xèta,ˈ mállah Naṣràdin,ˈ xázəx mòdi wíða.ˈ gu-bɛ̀θa wéwa,ˈ har-zála-w θàya.ˈ zála-w θàya,ˈ mára ya-ʾàlaha,ˈ yawə̀tliˈ ʾə́mma dàwe.ˈ ʾən-hàwaˈ ʾə́č̣č̣i-u ʾə́č̣č̣a maqəlbə̀nna.ˈ ʾu-ʾən-hàwaˈ ʾə́mma-w-xà-ži,ˈ la-băyə̀nna.ˈ de-šùqla.ˈ ʾə̀mma gắrəg háwa drə́st.ˈ b-álaha hóle zála-w θàya,ˈ ʾíθwale xá-šwawa huðàya,ˈ maṣóθe ʾə́lle dìye.ˈ mə́re xázəx ʾáwwa dū̀s-ile.ˈ qɛ́mən mjarbə̀nne.ˈ síqa l-gàre,ˈ də́ryɛle ʾə́č̣č̣i-u ʾə́č̣č̣a dáwe gu-ða-kìsta,ˈ də́rya b-kàwele.ˈ ʾá báxta hàyyo!ˈ hóle ʾaláha qəm-mšadə̀rrən.ˈ muθɛ́θɛla màjma.ˈ msúrqəlla píšela mnáyəlla l-xà-xa.ˈ plíṭla ʾə́č̣č̣i-u ʾə

In [3]:
T.text(L.u(morph, 'paragraph'), fmt='text-trans-full')

"xa'-ga xe`ta/,| ma'llah Nas.ra`din/,| xa'z3x mo`di wi'6a/.| gu-b$`8a we'wa/,| har-za'la-w 8a`ya/.| za'la-w 8a`ya/,| ma'ra ya-}a`laha/,| yaw3`tli| }3'mma da`we/.| }3n-ha`wa| }3'c.<c.<i-u }3'c.<c.<a maq3lb3`nna/.| }u-}3n-ha`wa| }3'mma-w-xa`-z<i/,| la-ba%y3`nna/.| de-s<u`qla/.| }3`mma ga%'r3g ha'wa dr3'st/.| b-a'laha ho'le za'la-w 8a`ya/,| }i'8wale xa'-s<wawa hu6a`ya/,| mas.o'8e }3'lle di`ye/.| m3're xa'z3x }a'wwa du_`s-ile/.| q$'m3n mjarb3`nne/.| si'qa l-ga`re/,| d3'ry$le }3'c.<c.<i-u }3'c.<c.<a da'we gu-6a-ki`sta/,| d3'rya b-ka`wele/.| }a' ba'xta ha`yyo/!| ho'le }ala'ha q3m-ms<ad3`rr3n/.| mu8$'8$la ma`jma/.| msu'rq3lla pi's<ela mna'y3lla l-xa`-xa/.| pli't.la }3'c.<c.<i-u }3`c.<c.<a/.| tre`/,| tre`/,| }3'c.<c.<i-u }3`c.<c.<a/.| }3`s.ra/,| }3`s.ra/,| ha`r-}3c.<c.<i-u }3'c.<c.<a/.| kle`la/,| }a'm3r ba'xta du_`s-ile/.| }ala'ha la`-xal3t./.| }3'c.<c.<i-u }3`c.<c.<a/,| }a'xc<i }a'na max-xs<a`wti/,| }a'yya ki`sta| ho'le mxoz<3'bn3lla max-xa`/.| ha-s<qu`l/,| ma'ttula ta%ma`ha/.| hu6a'ya l-ga`r

In [4]:
T.text(L.u(morph, 'paragraph'), fmt='text-trans-lite')

'xa-ga xeta,| mallah naSradin,| xaz9x modi wi6a.| gu-b38a wewa,| har-zala-w 8aya.| zala-w 8aya,| mara ya-)alaha,| yaw9tli| )9mma dawe.| )9n-hawa| )9%%i-u )9%%a maq9lb9nna.| )u-)9n-hawa| )9mma-w-xa-7i,| la-b@y9nna.| de-$uqla.| )9mma g@r9g hawa dr9st.| b-alaha hole zala-w 8aya,| )i8wale xa-$wawa hu6aya,| maSo8e )9lle diye.| m9re xaz9x )awwa dus-ile.| q3m9n mjarb9nne.| siqa l-gare,| d9ry3le )9%%i-u )9%%a dawe gu-6a-kista,| d9rya b-kawele.| )a baxta hayyo!| hole )alaha q9m-m$ad9rr9n.| mu8383la majma.| msurq9lla pi$ela mnay9lla l-xa-xa.| pliTla )9%%i-u )9%%a.| tre,| tre,| )9%%i-u )9%%a.| )9Sra,| )9Sra,| har-)9%%i-u )9%%a.| klela,| )am9r baxta dus-ile.| )alaha la-xal9T.| )9%%i-u )9%%a,| )ax5i )ana max-x$awti,| )ayya kista| hole mxo79bn9lla max-xa.| ha-$qul,| mattula t@maha.| hu6aya l-gare| $wir3le l-palga,| yaba )an9n m$udr9lla!| )ay kalba )am9r tama l-gare maSyo8e,| b@y9t $aql9tla )ap-)anna.| mrazg9t ganux.| t9mm9l <+>-asq9x k9s-qazi.| hu6aya )9%%i-u )9%%a dawe zile m9nne,| )3ka <+>-a8ya $9

In [5]:
T.text(L.u(morph, 'paragraph'), fmt='text-trans-fuzzy')

'xa-ga xeta, mallah nasradin, xazix modi wida. gu-beta wewa, har-zala-w taya. zala-w taya, mara ya-alaha, yawitli imma dawe. in-hawa i55i-u i55a maqilbinna. u-in-hawa imma-w-xa-zi, la-bayinna. de-suqla. imma garig hawa drist. b-alaha hole zala-w taya, itwale xa-swawa hudaya, masote ille diye. mire xazix awwa dus-ile. qemin mjarbinne. siqa l-gare, diryele i55i-u i55a dawe gu-da-kista, dirya b-kawele. a baxta hayyo! hole alaha qim-msadirrin. mutetela majma. msurqilla pisela mnayilla l-xa-xa. plitla i55i-u i55a. tre, tre, i55i-u i55a. isra, isra, har-i55i-u i55a. klela, amir baxta dus-ile. alaha la-xalit. i55i-u i55a, ax5i ana max-xsawti, ayya kista hole mxozibnilla max-xa. ha-squl, mattula tamaha. hudaya l-gare swirele l-palga, yaba anin msudrilla! ay kalba amir tama l-gare masyote, bayit saqlitla ap-anna. mrazgit ganux. timmil t-asqix kis-qazi. hudaya i55i-u i55a dawe zile minne, eka t-atya sinte?! hal-qedamta sinte la-tela. har-wele zala-w taya. malla muttille rese dmixa. tliya, kefe b

In [23]:
import pandas as pd

In [32]:
paragraph = L.u(morph, 'paragraph')[0]

show_words_data = []

for word in L.d(paragraph, 'word'):
    show_words_data.append((word, T.text(word),F.trans_l.v(word), F.t_fuzzy.v(word)))

In [33]:
show_words = pd.DataFrame(show_words_data, columns=('node', 'text','trans_lite', 'fuzzy'))

In [34]:
show_words.head(50)

Unnamed: 0,node,text,trans_lite,fuzzy
0,739771,xá-ga,xa-ga,xa-ga
1,739772,"xèta,ˈ",xeta,xeta
2,739773,mállah,mallah,mallah
3,739774,"Naṣràdin,ˈ",naSradin,nasradin
4,739775,xázəx,xaz9x,xazix
5,739776,mòdi,modi,modi
6,739777,wíða.ˈ,wi6a,wida
7,739778,gu-bɛ̀θa,gu-b38a,gu-beta
8,739779,"wéwa,ˈ",wewa,wewa
9,739780,har-zála-w,har-zala-w,har-zala-w


## Testing with App

In [35]:
from tf.app import use
A = use('nena:clone', checkout='clone', hoist=globals())

Using TF-app in /Users/cody/github/annotation/app-nena/code:
	repo clone offline under ~/github (local github)
Using data in /Users/cody/github/CambridgeSemiticsLab/nena_tf/tf/0.01:
	repo clone offline under ~/github (local github)
   |     0.00s No structure info in otext, the structure part of the T-API cannot be used


In [36]:
A.search('''

text title=A\ Hundred\ Gold\ Coins

''')

  0.00s 1 result


[(739645,)]

In [37]:
F.trans_l.v(740790)

'q9m-d3r3tuli'

In [38]:
line1 = L.u(1,'line')[0]
sent = L.u(1, 'sentence')[0]

In [39]:
A.pretty(line1)

In [40]:
A.pretty(sent, extraFeatures='speaker')

In [41]:
A.pretty(3)

In [42]:
T.text(1)

'x'

In [43]:
A.search('''

dialect dialect=Urmi_C

''')

  0.00s 1 result


[(539383,)]

In [44]:
L.d(739697,'sentence')

(706684, 706685, 706686, 706687, 706688, 706689, 706690, 706691, 706692)

In [45]:
A.pretty(706684)

In [46]:
for line in L.d(L.u(1,'text')[0],'line'):
    print(f'({F.number.v(line)}) ' + T.text(line, fmt='text-trans-full'), end='')

(1) xa'-ga xe`ta/,| ma'llah Nas.ra`din/,| xa'z3x mo`di wi'6a/.| gu-b$`8a we'wa/,| har-za'la-w 8a`ya/.| za'la-w 8a`ya/,| ma'ra ya-}a`laha/,| yaw3`tli| }3'mma da`we/.| }3n-ha`wa| }3'c.<c.<i-u }3'c.<c.<a maq3lb3`nna/.| }u-}3n-ha`wa| }3'mma-w-xa`-z<i/,| la-ba%y3`nna/.| de-s<u`qla/.| }3`mma ga%'r3g ha'wa dr3'st/.| (2) b-a'laha ho'le za'la-w 8a`ya/,| }i'8wale xa'-s<wawa hu6a`ya/,| mas.o'8e }3'lle di`ye/.| m3're xa'z3x }a'wwa du_`s-ile/.| q$'m3n mjarb3`nne/.| si'qa l-ga`re/,| d3'ry$le }3'c.<c.<i-u }3'c.<c.<a da'we gu-6a-ki`sta/,| d3'rya b-ka`wele/.| }a' ba'xta ha`yyo/!| ho'le }ala'ha q3m-ms<ad3`rr3n/.| (3) mu8$'8$la ma`jma/.| msu'rq3lla pi's<ela mna'y3lla l-xa`-xa/.| pli't.la }3'c.<c.<i-u }3`c.<c.<a/.| tre`/,| tre`/,| }3'c.<c.<i-u }3`c.<c.<a/.| }3`s.ra/,| }3`s.ra/,| ha`r-}3c.<c.<i-u }3'c.<c.<a/.| kle`la/,| }a'm3r ba'xta du_`s-ile/.| }ala'ha la`-xal3t./.| }3'c.<c.<i-u }3`c.<c.<a/,| }a'xc<i }a'na max-xs<a`wti/,| }a'yya ki`sta| ho'le mxoz<3'bn3lla max-xa`/.| ha-s<qu`l/,| ma'ttula ta%ma`ha/.| (4)

## Miscellaneous Searches

In [69]:
search = A.search('''

dialect dialect=Barwar
    sentence
        word t_fuzzy~qim-
        

''')

A.show(search, extraFeatures='trans_f')

  0.17s 319 results


In [48]:
for res in search:
    print(res[0], F.t_fuzzy.v(res[0]))

750633 xos-mindila
750885 xos-mindila
761891 d-a-mindila
766721 xos-mindila
785703 mindila
833508 hi5-mindila


In [49]:
F.t_fuzzy.v(750633)

'xos-mindila'