In [2]:
# add ieml library to kernel path
import sys
sys.path.insert(0, '..')

The ieml library exposes a parser (ieml.usl.parser) that parses ieml strings (including older versions) and return a normalised ieml.usl.USL object or a ieml.dictionary.Script (a morpheme). For the moment only ieml.usl.Word are valid ieml.usl.USL.

The ieml.usl.USL can be checked for coherancy with ieml.usl.USL.check(). That feature should be added in a strict mode for the parser.

In [3]:
from ieml.usl.usl import usl
u = usl("[E:.b.E:B:.- E:S:. (E:.-wa.-t.o.-' E:.-'wu.-S:.-'t.o.-',)(a.T:.-) > ! E:.l.- (E:.wo.- E:S:.-d.u.-')]")
u.check()
print(u)
u1 = usl("[E:.b.E:B:.- E:S:. (E:.-'wu.-S:.-'t.o.-', E:.-wa.-t.o.-' )(a.T:.-) > ! E:.l.- (E:.wo.- E:S:.-d.u.-')]")
u1.check()
print(u1)
assert u1 == u

Matplotlib is building the font cache using fc-list. This may take a moment.


[E:S:. (E:.-wa.-t.o.-' E:.-'wu.-S:.-'t.o.-',)(a.T:.-) > ! E:.l.- (E:.wo.- E:S:.-d.u.-')]
[E:S:. (E:.-wa.-t.o.-' E:.-'wu.-S:.-'t.o.-',)(a.T:.-) > ! E:.l.- (E:.wo.- E:S:.-d.u.-')]


The ieml lexicons are stored on github, they have to be downloaded first

In [4]:
from ieml.ieml_database import GitInterface, IEMLDatabase
gitdb = GitInterface()
gitdb.pull() # download database in ~/.cache/ieml/ folder
print(gitdb)

# instanciate a ieml.ieml_database.IEMLDatabase from the downloaded git repository
db = IEMLDatabase(folder=gitdb.folder)
print(db)


<ieml.ieml_database.git_interface.GitInterface object at 0x7f764afeb9b0>
<ieml.ieml_database.ieml_database (/u/letardvi/.cache/ieml/1.0.3/e3050257e00f171aa548e17e73fe562e cache=/u/letardvi/.cache/ieml/1.0.3/e3050257e00f171aa548e17e73fe562e)>


In [8]:
morphs = db.list(type='morpheme')[:100]

The ieml.ieml_database.IEMLDatabase is responsible of reading and writing association between ieml.usl.USL and ieml.dictionary.Script with their translation to disk. The values as stored in rows in one file per USL. The format of the rows are space separated values (ssv like csv or tsv).


In [12]:
desc = db.get_descriptors()
desc.get_values_partial(morphs[0])

{("b.i.-n.i.-'t.i.-'+m.i.-f.i.-'+l.i.-E:.-+n.+f.i.-'n.-B:.A:.-',",
  'en',
  'translations'): ['metallic vibrator'],
 ("b.i.-n.i.-'t.i.-'+m.i.-f.i.-'+l.i.-E:.-+n.+f.i.-'n.-B:.A:.-',",
  'fr',
  'translations'): ['vibrateur de métal']}

In [5]:
w = db.list(type='word', parse=True)[1]

In [6]:
list(w.iter_structure())

[<ieml.usl.lexeme.Lexeme at 0x7fd42ccb6390>]

# ieml.dictionary.Script

The Script is a old name for morphemes. The 

# Words

In [4]:
# ou se trouve le pointd'exclamation
str(u.role)

'E:.l.-'

In [5]:
print('\n'.join(str(r) + ' ' + str(a.actor) for r, a in u.syntagmatic_fun.actors.items()))

E:S:. (E:.-wa.-t.o.-' E:.-'wu.-S:.-'t.o.-',)(a.T:.-)
E:.l.- (E:.wo.- E:S:.-d.u.-')


In [6]:
from ieml.usl.constants import ADDRESS_SCRIPTS,NAMES_TO_ADDRESS 

# list des roles syntagmatic
print('\n'.join(str(r) + ' ' + NAMES_TO_ADDRESS[r] for r in ADDRESS_SCRIPTS))

E:S:. process
E:T:. process
E:B:. process
E:.n.- initiator
E:.d.- interactant
E:.k.- recipient
E:.t.- time
E:.l.- location
E:.m.- intention
E:.f.- manner
E:.s.- cause
E:A:. dependant
E:U:. independant


In [11]:
from requests import get

from ieml.dictionary.script import Script
from ieml.usl.word import Word
from ieml.usl.usl import usl

def get_word_structure(w: Word):
    return get("https://dev.intlekt.io/api/words/{}/?repository=IEMLdev".format(str(w))).json()

def get_usl_structure(m: Script):
    return get("https://dev.intlekt.io/api/usls/{}/?repository=IEMLdev".format(str(m))).json()



"""
The structure for any Ieml is :
IemlEntry = { 
    'ieml': string,
    
    'cardinality': 'singular_sequence' | 'paradigm' | 'root_paradigm',
    'class': 'Noun'|'Verb'|'Auxialiary',
    'type': 'word' | 'morpheme' | 'polymorpheme' | 'lexeme',
    
    'comments': {'en': [], 'fr': []},
    'tags': {'en': [], 'fr': []},
    'translations': {'en': [], 'fr': []},
    
    'created': True|False, # if exists in db, equivalent of 'comments', 'tags' or'translations'  at a least one value
    'editable': True|False, # if exists in db and not in the main db or doesn't exists in db
    'domains': [],
    
    'index': string, # string value to order the usls from each other
    'main_table': None, # main table for morpheme
    'paradigm': True|False, # is a paradigm ?
    
    'singular_sequences': None|IemlEntry[], # if not a paradigm, None, otherwise the list of the singular sequences (the cells of the table)
}

For Words, we had the following entries :
WordsEntry = IemlEntry + {
    'role': string[], # the tree address where to put the '!'
    'syntagmatic_function': SyntagmaticFunctionEntry # the tree
}

LexemeEntry = IemlEntry + {
    'pm_content': PolyMorphemeEntry,
    'pm_flexion': PolyMorphemeEntry
}

PolyMorphemeEntry = IemlEntry + {
    'constant': MorphemeEntry[], # the constant of the polymorphemes
    'groups': (MorphemeEntry, 0|1|2)[], # the variables with theirs multiplicities.
}

MorphemeEntry = IemlEntry


The tree structure : a tree of subtype of SyntagmaticFunctionEntry. The nodes are accessed with the actor property.


SyntagmaticFunctionEntry = {
    'actor': LexemeEntry, # the lexeme at this node in the tree
    
    'role_full': string[], # the address of thisnode in the tree
    'role': string, # the last value of the address (role_full[-1])
    
    'type': 'ProcessSyntagmaticFunction'| 'DependantQualitySyntagmaticFunction'|'IndependantQualitySyntagmaticFunction', 
    # There is 3 types of syntagmatic functions, process for verbal frames, dependant for actants (nouns) and independant for adjectives.
}
 
ProcessSyntagmaticFunctionEntry = SyntagmaticFunctionEntry + {
    'valence': 1|2|3,
    'initiator': DependantQualitySyntagmaticFunctionEntry,
    'recipient': DependantQualitySyntagmaticFunctionEntry, # always None is valence < 2
    'interactant': DependantQualitySyntagmaticFunctionEntry, # always None is valence < 3
    
    'cause': DependantQualitySyntagmaticFunctionEntry,
    'intention': DependantQualitySyntagmaticFunctionEntry,
    'manner': DependantQualitySyntagmaticFunctionEntry,
    'time': DependantQualitySyntagmaticFunctionEntry,
    'location': DependantQualitySyntagmaticFunctionEntry,
}

DependantQualitySyntagmaticFunctionEntry = SyntagmaticFunctionEntry + {
    'independant': IndependantQualitySyntagmaticFunction,
    'dependant': DependantQualitySyntagmaticFunctionEntry
}

IndependantQualitySyntagmaticFunction = SyntagmaticFunctionEntry


"""



get_word_structure(usl("[! E:A:. (wa.)]"))
# get_usl_structure("E:")

{'syntagmatic_function': {'actor': {'ieml': '(wa.)',
   'editable': True,
   'translations': {'fr': [], 'en': []},
   'comments': {'fr': [], 'en': []},
   'tags': {'fr': [], 'en': []},
   'created': False,
   'type': 'lexeme',
   'paradigm': False,
   'class': 'Auxiliary',
   'index': '2000000000000000000000000000000',
   'cardinality': 'singular_sequence',
   'domains': [],
   'singular_sequences': None,
   'main_table': None,
   'pm_flexion': {'ieml': 'wa.',
    'editable': False,
    'translations': {'fr': ['agir', 'exercer'], 'en': ['act', 'perform']},
    'comments': {'fr': [], 'en': []},
    'tags': {'fr': [], 'en': []},
    'created': True,
    'type': 'morpheme',
    'paradigm': False,
    'class': 'Verb',
    'index': '000000000000000000000000000000f',
    'cardinality': 'singular_sequence',
    'domains': [],
    'singular_sequences': None,
    'main_table': 'O:O:.',
    'constant': [{'ieml': 'wa.',
      'editable': False,
      'translations': {'fr': ['agir', 'exercer'], 'e

In [5]:
# parse all words
usls = db.list(parse=False, type='word')
usls


["[E:A:.  (E:.wo.- E:.-n.S:.-' E:S:.-d.u.-')(b.a.- b.o.-n.o.-s.u.-' f.a.-b.a.-f.o.-') > ! E:A:. E:U:. () > E:A:. E:A:. (E:.wo.- E:S:.-d.u.-')(n.-S:.U:.-'B:.-'B:.-',B:.-',B:.-',_ n.-S:.U:.-'B:.-'B:.-',T:.-',S:.-',_)]",
 "[E:S:. (E:.-wa.-t.o.-' E:.-'wu.-S:.-'t.o.-',)(u.A:.-) > E:.t.- (E:.wo.- E:S:.-d.u.-')(S:.E:A:T:.-) > ! E:.n.- (E:.wo.- E:S:.-d.u.-')(l.-T:.U:.-',n.-T:.A:.-',t.o.-f.o.-',_) > E:.f.- (E:.wo.- E:S:.-d.u.-')(n.i.-d.i.-t.u.-') > E:.l.- (E:.wo.- E:.-U:.d.-l.-' E:S:.-d.u.-')(n.-T:.U:.-')]",
 "[E:S:. (E:.-wa.-t.o.-' E:.-'wu.-S:.-'t.o.-',)(u.A:.-) > E:.t.- (E:.wo.- E:S:.-d.u.-')(S:.E:A:S:.-) > ! E:.n.- (E:.wo.- E:S:.-d.u.-')(l.-T:.U:.-',n.-T:.A:.-',t.o.-f.o.-',_) > E:.f.- (E:.wo.- E:S:.-d.u.-')(n.i.-d.i.-t.u.-') > E:.l.- (E:.wo.- E:.-U:.d.-l.-' E:S:.-d.u.-')(n.-T:.U:.-')]",
 "[E:S:. (E:.-wa.-t.o.-' E:.-'wu.-S:.-'t.o.-',)(u.A:.-) > E:.t.- (E:.wo.- E:S:.-d.u.-')(p.E:S:B:.-) > ! E:.n.- (E:.wo.- E:S:.-d.u.-')(l.-T:.U:.-',n.-T:.A:.-',t.o.-f.o.-',_) > E:.f.- (E:.wo.- E:S:.-d.u.-')(n.i

In [6]:
from tqdm import tqdm


for u in tqdm(usls):
    get_word_structure(u)



100%|██████████| 103/103 [00:22<00:00,  4.69it/s]


In [19]:
from itertools import chain

def list_polymorpheme_of_word(w):
    w = usl(w)
    assert isinstance(w, Word)
    
    
    # w.syntagmatic_fun correspond à l'arbre syntagmatic du mot w
    # w.syntagmatic_fun.actors correspond à un dictionnaire qui associe tous les roles vers toutes les fonction syntagmatics presentent en descendance du noeud courant
    # donc sfun correspond successivement à chacun des noeuds de l'arbre
    # sfun.actor correspond au lexeme du noeud sfun
    # sfun.actor.pm_flexion correspond au polymorpheme de flexion et 
    # sfun.actor.pm_content correspond au polymorpheme de contenu
    return list(chain.from_iterable((sfun.actor.pm_content, sfun.actor.pm_flexion) 
                                    for sfun in w.syntagmatic_fun.actors.values()))
    
pl = list_polymorpheme_of_word("[! E:A:.  (E:.wo.- E:S:.-d.u.-')(b.-S:.A:.-'S:.-'S:.-',) > E:A:. E:A:. (E:.wo.- E:S:.-d.u.-')(k.a.-k.a.-')]")

for pm in pl:
    print(pm)

b.-S:.A:.-'S:.-'S:.-',
E:.wo.- E:S:.-d.u.-'
k.a.-k.a.-'
E:.wo.- E:S:.-d.u.-'


In [8]:
# parse all words
usls = db.list(parse=False, type='word')
usls

["[! E:A:.  (E:.wo.- E:S:.-d.u.-')(b.-S:.A:.-'T:.-'T:.-',)]",
 "[! E:A:.  (E:U:A:. E:.wo.- E:S:.-d.u.-')(s.e.-k.u.-wa.e.-')]",
 "[! E:A:.  (E:.wo.- E:S:.-d.u.-')(b.-S:.A:.-'S:.-'S:.-',) > E:A:. E:A:. (E:.wo.- E:S:.-d.u.-')(k.a.-k.a.-')]",
 "[! E:A:.  (E:.wo.- E:S:.-d.u.-')(f.i.-b.i.-')]",
 "[! E:A:.  (E:.wo.- E:S:.-d.u.-')(a.)]",
 "[! E:A:.  (E:U:A:. E:.wo.- E:S:.-d.u.-')(f.u.-wu.f.U:.-')]",
 "[! E:A:.  (E:.wo.- E:S:.-d.u.-')(k.x.-) > E:A:. E:A:. (E:.wo.- E:S:.-d.u.-')(t.i.-s.i.-'u.B:.-U:.-'wa.-',)]",
 "[! E:A:.  (E:.wo.- E:S:.-d.u.-')(k.i.-l.i.-')]",
 "[! E:A:.  (E:.wo.- E:S:.-d.u.-')(m.-B:.A:.-') > E:A:. E:A:. (E:.wo.- E:B:.-d.u.-')(p.E:A:T:.-)]",
 "[! E:A:.  (E:.wo.- E:S:.-d.u.-')(wa.)]",
 "[! E:A:.  (E:.wo.- E:S:.-d.u.-')(m.-T:.U:.-') > E:A:. E:U:. (E:B:.-d.u.-')(p.E:A:T:.-)]",
 "[! E:A:.  (E:U:A:. E:.wo.- E:S:.-d.u.-')(f.u.-wu.f.S:.-')]",
 "[! E:A:.  (E:.wo.- E:S:.-d.u.-')(l.-y.-'E:U:.t.-l.-'E:U:.t.-l.-',)]",
 "[! E:A:.  (E:.wo.- E:S:.-d.u.-')(k.a.-) > E:A:. E:A:. (E:.wo.- E:S:.-d

In [3]:
from ieml.usl import USL
from ieml.dictionary.script import Script

# the database contains the morphemes and the usls made from morphemes
all(isinstance(u, (USL, Script)) for u in usls) 

True

In [35]:
descriptorsDB = db.get_descriptors()
def display_usls(u):
    descriptor = descriptorsDB.get_values_partial(u)
    # descriptor is a dict :
    # { (ieml, descriptor_type, lang) : string[]}
    # descriptor_type in ieml.constants.DESCRIPTORS_CLASS = ['translations', 'comments', 'tags']
    # lang in ieml.constants.LANGUAGES = [ 'fr', 'en' ]  
    
    return str(u) +\
        "".join("\n\t{}: {}".format(k, str(v)) for k, v in {
            **{'{}_{}'.format(descriptor_type, lang): ', '.join(t) 
               for (_, descriptor_type, lang), t in descriptor.items()},
            'type': u.__class__.__name__,
            'is_paradigm': not u.is_singular,
            
        }.items())

In [36]:
from ieml.usl.usl import usl

# usl() parse the string and return an Usl or a Script (if a morpheme)
oo_script = usl('O:O:.')
assert isinstance(oo_script, Script)

print(display_usls(usl('O:O:.')))

O:O:.
	en_comments: In order to understand O x O we have to think about the basic sensorimotor cycle. Let's begin with the inner sanctum of personal existence that does not manifest externally: to exist. Then, from this virtual place, we act. As a result, our actual environment is transformed and a new reality appears or happens. Finally, from the actuality of external happening, we perceive back to our inner existence. Note the two intertwined oppositions : \n\n• wa act (in –> out) / wu perceive (out –> in) \n\n• wo exist (in –> in) / we appear (out –> out)\n\nwo wa wu we can be used to model any quaternary semantic symmetry. They can also be used to « verbify » nominal semes when creating new words.
	en_translations: interaction phases, tetrad
	fr_translations: phases de l'interaction, tétrade
	type: MultiplicativeScript
	is_paradigm: True
