In [15]:
import json
import re

In [2]:
with open("/home/rahul/ziqi/NExT/data/tacred/explanations.json") as f:
    explanations = json.load(f)

In [4]:
import sys
sys.path.append("../")

In [5]:
from CCG_new import utils

In [50]:
create_semantic_repr(explanations[0][1])

('.root',
 ('@And',
  ('@Is',
   'There',
   ('@AtMost',
    ('@between', ('@And', 'ArgY', 'ArgX')),
    ('@Num', '3', 'tokens'))),
  ('@Is', ('@Word', "'s daughter"), ('@between', ('@And', 'ArgY', 'ArgX')))))

In [49]:
def create_semantic_repr(semantic_rep):
    """
        Given a semtantic string representation of a parse tree we transform the tree into a hierarchical
        structure, so that a conversion to a labeling function is possible; functions at the top rely on the 
        return value of functions lower down.
        In doing this, we loose the original lexical heirachy of the parse tree.
        
        This differs from this approach https://homes.cs.washington.edu/~lsz/papers/zc-uai05.pdf
        We can't create features based on lexical structure, but only semantic structure
        If the semantic string representation of the tree makes sense when considering the ordering
        of the functions making up the string, then we output the hierarchical tuple, else we return
        false. False here indicates that while a valid parse tree was attainable, the semantics of the
        parse do not make sense though.

        Arguments:
            semantic_rep (str) :  tree representation of tree from our parse_tokens function
        
        Returns:
            tuple | false : if valid semantically, we output a tuple describing the semantics, else false
    """
    clauses = re.split(',|(\\()',semantic_rep)
    delete_index = []
    for i in range(len(clauses)-1, -1, -1):
        if clauses[i] == None:
            delete_index.append(i)
    for i in delete_index:
        del clauses[i]
        
    # Switch poisition of ( and Word before it
    switched_semantics = []
    for i, token in enumerate(clauses):
        if token=='(':
            switched_semantics.insert(-1,'(')
        else:
            switched_semantics.append(token)
    
    # Converting semantic string into a multi-level tuple, ex: (item, tuple) would be a two level tuple
    # This representation allows for the conversion from semantic representation to labeling function
    hierarchical_semantics = ""
    for i, clause in enumerate(switched_semantics):
        prepped_clause = clause
        if prepped_clause.startswith("\'"):
            prepped_clause = utils.prepare_token_for_rule_addition(prepped_clause, reverse=True)
            if prepped_clause.endswith(")"):
                posi = len(prepped_clause)-1
                while prepped_clause[posi]==")":
                    posi-=1
                assert prepped_clause[posi]=="\'"
            else:
                posi = len(prepped_clause)-1
                assert prepped_clause[posi] == "\'"
            prepped_clause = prepped_clause[0] + \
                             prepped_clause[1:posi].replace('\'','\\\'') + \
                             prepped_clause[posi:]

        if switched_semantics[i-1] != "(" and len(hierarchical_semantics):
            hierarchical_semantics += ","

        hierarchical_semantics += prepped_clause
    # if the ordering of the semantics in this semantic representation is acceptable per the functions
    # the semantics map to, then we will be able to create the desired multi-label tuple
    # else we return False
    try:
        hierarchical_tuple = ('.root', eval(hierarchical_semantics))
        return hierarchical_tuple
    except:
        return False

In [52]:
for exp in explanations:
    if "COMMA" in exp[1]:
        print(exp)
        break

['per:spouse', "'@And'('@Is'('There','@AtMost'('@between'('@And'('ArgY','ArgX')),'@Num'('4','tokens'))),'@Is'('@Word'(''sSPACEexCOMMA4wife'),'@between'('@And'('ArgY','ArgX'))))", ["SUBJ-PERSON 's ex-wife OBJ-PERSON"], ["'s ex-wife"]]
