### NLP Setup

In [1]:
import pickle
import sys
import spacy

# Load NLP
#sys.path.append('../')

with open('./nlp/nlp.pickle', 'rb') as f:
    nlp = pickle.load(f)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from app.src.helpers.sentence_summarizer import SentenceSummarizer

summarizer = SentenceSummarizer(nlp)

## Workbench

In [3]:
val = 'I arrived within 2 weeks of the project being completed.'

doc = nlp(val)

summarizer.summarize(val)


Sentence: I arrived within 2 weeks of the project being completed.

i     TEXT            POS             TAG             DEP             LEMMA           HEAD            ENT            
--------------------------------------------------------------------------------------------------------------
0     I               PRON            PRP             nsubj           I               arrived                        
1     arrived         VERB            VBD             ROOT            arrive          arrived                        
2     within          ADP             IN              prep            within          arrived                        
3     2               NUM             CD              nummod          2               weeks           DATE           
4     weeks           NOUN            NNS             pobj            week            within          DATE           
5     of              ADP             IN              prep            of              weeks                     



In [4]:
from nltk.corpus import wordnet as wn 

In [45]:
f = wn.synsets('credit_card')

f

[Synset('credit_card.n.01')]

In [46]:
nl_text = 'credit_card'

description = 'the seller shall deliver the goods to the buyer'

In [47]:
target_ss_name = 'instrumentality.n.03'
target_ss = wn.synset(target_ss_name)

In [48]:
all_synsets = wn.synsets(nl_text, pos=wn.NOUN)


for x in all_synsets:
    print('\n')
    print(f'{x.name()}: {x.definition()}')
    print(x.lemma_names())

    h = x.lowest_common_hypernyms(target_ss)
    print(h)

    #doc1 = nlp(description)
    #doc2 = nlp(x.definition())
    #sim = doc1.similarity(doc2)
    #print(sim)
    
    # Check if its an instrument

    # Check if its related to our domain - can use defn and similarity measures...

    # 



credit_card.n.01: a card (usually plastic) that assures a seller that the person using it has a satisfactory credit rating and that the issuer will see to it that the seller receives payment for the merchandise delivered
['credit_card', 'charge_card', 'charge_plate', 'plastic']
[Synset('entity.n.01')]


In [None]:
ss = wn.synsets('contract')

contract_synset = wn.synset('contract.n.01')

val = 'before the agreement terminates'
doc = nlp(val)

# Look for a noun that suggests presence of contract
## Noun chunk or Noun?
## What if it is qualified?
nouns = [x for x in doc if x.pos_ == 'NOUN']


noun_scores = []

for n in nouns:
    print(n.text)
    n_ss = wn.synsets(n.text, pos=wn.NOUN)
    
    nd = max([contract_synset.wup_similarity(ns) for ns in n_ss])
    noun_scores.append((n, nd))
    #print(n_ss)

top_ns = max(noun_scores, key=lambda x: x[1])
print(top_ns)

if top_ns[1] < 0.7:
    print('failed threshold')

# Look for dependence on event
## Case 1: 


# Contract event


# for syn in ss:
#     print(syn.name())
#     print(syn.definition())
#     print('\n')


Contract event
- presence of contract
- something happening to it - narrow set of verbs

examples
- termination of contract
- contract terminate
- the contract terminates



In [None]:
ss = wn.synsets('van')

In [None]:
ss

s = wn.synset('van.n.05')
s.definition()

In [None]:
s.hypernyms()[0]

In [None]:
a = 'van.n.05'
sn = wn.synset(a)
for i in range(0,8):
    print('-', sn.name(), sn.definition())
    sn = sn.hypernyms()[0]

In [None]:
# instrumentality
inst_s = wn.synset('instrumentality.n.03')

# List some hyponyms
# Check for hypnym

check = inst_s.hyponyms()
#print(check)

f = wn.synset('van.n.05')


x = inst_s.lowest_common_hypernyms(f)
print(x)

In [None]:
# What do we want
# User enters a noun
# perform some checks to verify that what they enter is in fact an instrument
# Can also do some stripping, e.g of determiners

# Take a string - get all noun synsets
# Look through these synsets to see if they relate to instrumentality (or see if there are other useful anchor synsets)

Main
- Input: Piece of text
- Output: properly formatted instrument spec

Inner
- input: noun phrase
- output: is it an instrument 
- or maybe I can soften it - put a probability? Or that could be an internal detail

How will we handle noun phrases. Can wordnet do this?

Questions
- can we handle noun phrases, or just single nouns
- this will be a question of certainty - what is the threshold
- what is the user experience? Do they get a warning, a simple pass/fail, etc
- Should we take into account the specific domain or other parts of the contract

For the delivery example
- we can check if its an instrument
- we can check the relatedness to the obligation/event text

Should do a wordnet tutorial