# Merging Phrase Atoms

Nearly all phrase atoms have been parsed with the phrase_parser. 
These objects have relations between each other within embedding phrases
that can be accessed. We will take advantage of this in order to 
complete the phrase parsing.

In [28]:
import sys
import copy
import collections
from tf.app import use
sys.path.append('../scripts/tools/')
import nav_tree as nt
from load_parse import ParseLoader

# load BHSA in Text-Fabric
A = use('bhsa', hoist=globals())
A.displaySetup(condenseType='clause', extraFeatures='function pdp st', withNodes=True)

# load parsings
parses = ParseLoader('../../results/parsing/phrase_parsings.json').load()
slot2pos = ParseLoader('../../results/parsing/slot2pos.json').load()

In [29]:
class PhraseAtomComposer:

    def __init__(self, tf_api, ph2parse):
        self.F, self.E, self.L = tf_api.F, tf_api.E, tf_api.L
        self.ph2parse = copy.deepcopy(ph2parse) # write only to local copy
        self.covered = set() # set of covered phrases
        self.mom2kids = self.build_edges()
    
    def build_edges(self):
        """Build up an edges list."""
        child2mom = {}
        relamap = {
            'Appo': 'appo',
            'Spec': 'adjv',
            'Link': 'conj',
            'Sfxs': 'adjv',
            'Para': 'para',
            'NA': None,
        }
        for ph in self.F.otype.s('phrase_atom'):
            rela = self.F.rela.v(ph)
            rela = relamap[rela]
            
            # do phrase-to-phrase relas
            if rela == 'conj':
                # reassign these edges to point at 
                # the parallel element instead
                child2mom[ph] = (ph+1, rela)
            elif rela:
                child2mom[ph] =  self.E.mother.f(ph) + (rela,) 
                    
        # reverse the dict
        mom2kids = collections.defaultdict(list)
        for child, edge in child2mom.items():
            mom, rela = edge
            mom2kids[mom].append((child, rela))
        
        return mom2kids
        
    def get_parse(self, ph_atom):
        """Retrieve phrase atom parsing."""
        try:
            return self.ph2parse[ph_atom]
        except KeyError:
            if type(ph_atom) != int:
                raise Exception(ph_atom)
            words = self.L.d(ph_atom, 'word')
            if len(words) == 1:
                return words
            else:
                raise Exception(f'No parsing found for {ph_atom}!')
        
    def compose_phrase(self, node):
        """Recursively compose phrase elements."""
        
        parse = self.get_parse(node)

        # modify a word phrase constituent where it has 
        # phrase children dangling from it;
        # do this by reassigning item in the list with
        # the index of the given word
        if len(parse) == 3:
            for ph in nt.traverse_tree(parse):                
                for i, node in enumerate(ph[:-1]):
                    if type(node) == int:
                        for kid, rela in self.mom2kids[node]:
                            ph[i] = [
                                self.compose_phrase(kid),
                                ph[i], # build up recursively
                                rela
                            ]
        elif len(parse) == 1:
            parse = parse[0]
        else:
            raise Exception(f'Invalid parse length of {len(parse)}: {parse}')

        # compose phrases
        for kid, rela in reversed(self.mom2kids[node]):
            parse = [
                self.compose_phrase(kid),
                parse,
                rela
            ]
        
        # finish
        return parse

In [50]:
composer = PhraseAtomComposer(A.api, parses)

test_run = composer.compose_phrase(1014618)

test_run

[[[179314, [[[179317, 179318, 'DEF'], 179316, 'GP'], 179315, 'appo'], 'conj'],
  [[[179312, 179313, 'DEF'], 179311, 'GP'], 179310, 'appo'],
  'para'],
 179309,
 'GP']

In [51]:
print(nt.show_relas(test_run, T.text))

סִיחֹ֣ון׀ מֶ֣לֶךְ הָאֱמֹרִ֗י וְעֹג֙ מֶ֣לֶךְ הַבָּשָׁ֔ן   --GP-->  אֶ֜רֶץ 
וְעֹג֙ מֶ֣לֶךְ הַבָּשָׁ֔ן   --para-->  סִיחֹ֣ון׀ מֶ֣לֶךְ הָאֱמֹרִ֗י 
וְ  --conj-->  עֹג֙ מֶ֣לֶךְ הַבָּשָׁ֔ן 
מֶ֣לֶךְ הַבָּשָׁ֔ן   --appo-->  עֹג֙ 
הַבָּשָׁ֔ן   --GP-->  מֶ֣לֶךְ 
הַ  --DEF-->  בָּשָׁ֔ן 
מֶ֣לֶךְ הָאֱמֹרִ֗י   --appo-->  סִיחֹ֣ון׀ 
הָאֱמֹרִ֗י   --GP-->  מֶ֣לֶךְ 
הָ  --DEF-->  אֱמֹרִ֗י 


In [52]:
A.pretty(755310, condenseType='phrase', hiddenTypes={'subphrase'}, extraFeatures='rela')