In [214]:
import benepar, spacy
import nltk

benepar.download('benepar_en3')

nlp = spacy.load('en_core_web_sm')
nlp.add_pipe('benepar', config={'model': 'benepar_en3'})
doc = nlp('The time for action is now. It is never too late to do something.')
sent = list(doc.sents)[0]

[nltk_data] Downloading package benepar_en3 to
[nltk_data]     /Users/kostadindev/nltk_data...
[nltk_data]   Package benepar_en3 is already up-to-date!


In [254]:
#list(sent._.children)[3]
#print(list(sent._.children)[2].label)
# for i in range(len(list(sent._.children))):
#     print(list(sent._.children[i]), list(sent._._labels)[i])
# list(sent._.children)
import pandas as pd
from pprint import pprint as print
import numpy as np
from sklearn.cluster import AffinityPropagation
import distance
sentences = pd.read_csv("sentences.csv")
sentences = sentences.to_numpy().squeeze()
signs = [',', ':', ';', '.', '!', '?']
recursive = True
max_similarity= 5

In [255]:
def get_phrases(sent, recursive = False):
    children = list(sent._.children)
    #VP, NP, SBAR, ADVP, PP = [], [], [], [], []
    phrases = {}
    for i in range(len(children)):
        if str(children[i]) not in signs:
            try:
                label = children[i]._.labels[0]
            except:
                label= "stop"
            # print(children[i], label)
            # print(children[i], children[i]._.labels)
            # generate_questions(children[i])
            if label != 'stop':
                if label not in phrases.keys():
                    phrases[label] = []
                phrases[label].append(children[i])
                new_phrases= get_phrases(children[i], recursive= recursive)
                for key in new_phrases.keys():
                    if key not in phrases.keys():
                        phrases[key] = []
                    phrases[key].extend(new_phrases[key])
    return phrases


In [279]:
def get_QA(phrases):
    questions, answers = [], []
    if 'SBAR' in phrases.keys():
        for sbar in phrases['SBAR']:
            questions.append(f"Q: What {str(sbar).lower()}?")
            if 'NP' in phrases.keys():
                for np in phrases['NP']:
                    if 'VP' in phrases.keys():
                        for vp in phrases['VP']:
                            answers.append(f'A: ${np}$ @{vp}@')
    return questions, answers

In [280]:
def edit_distance(s1, s2):
    if len(s1) > len(s2):
        s1, s2 = s2, s1

    distances = range(len(s1) + 1)
    for i2, c2 in enumerate(s2):
        distances_ = [i2+1]
        for i1, c1 in enumerate(s1):
            if c1 == c2:
                distances_.append(distances[i1])
            else:
                distances_.append(1 + min((distances[i1], distances[i1 + 1], distances_[-1])))
        distances = distances_
    return distances[-1]

In [310]:
def filter_edit_distance(words):
    if len(words) == 0:
        return []
    filtered_words = []
    words = np.asarray(words) #So that indexing with a list will work
    lev_similarity = -1*np.array([[distance.levenshtein(w1,w2) for w1 in words] for w2 in words])

    affprop = AffinityPropagation(affinity="precomputed", damping=0.5)
    affprop.fit(lev_similarity)
    for cluster_id in np.unique(affprop.labels_):
        #exemplar = words[affprop.cluster_centers_indices_[cluster_id]]
        cluster = np.unique(words[np.nonzero(affprop.labels_==cluster_id)])
        filtered_words.append(max(cluster, key = lambda x: len(x)))
    return filtered_words

In [311]:
def filter_not_in_question(answers, question):
    filtered_answers = []
    for answer in answers:
        vp = answer.split('@')[1]
        np = answer.split('$')[1]
        if vp not in question and np not in question:
            filtered_answers.append(answer)
    return filtered_answers

In [312]:
def main(filters):
    for sentence in sentences:
        sent = list(nlp(sentence).sents)[0]
        print(sent)
        phrases = get_phrases(sent, recursive = recursive)
        questions, answers = get_QA(phrases)
        answers = filter_edit_distance(answers)
        for question in questions:
            answers = filter_not_in_question(answers, question)
            print(questions)
            print(answers)
        print("---------------------------------------------------------------")

['Q: What if the underlying file system at the server is only accessible in a '
 'read-only mode and the open request has specified open4_share_access_write '
 'or open4_share_access_both?']
['A: $OPEN4_SHARE_ACCESS_WRITE or OPEN4_SHARE_ACCESS_BOTH$ @will return NFS4ERR_ROFS to indicate a read-only file system@',
 'A: $OPEN4_SHARE_ACCESS_WRITE or OPEN4_SHARE_ACCESS_BOTH$ @has specified OPEN4_SHARE_ACCESS_WRITE or OPEN4_SHARE_ACCESS_BOTH@',
 'A: $OPEN4_SHARE_ACCESS_WRITE$ @will return NFS4ERR_ROFS to indicate a read-only file system@']
'---------------------------------------------------------------'
If the object does not exist, the server creates the object and stores the verifier in a stable storage.




['Q: What if the object does not exist?']
['A: $a stable storage$ @creates the object@',
 'A: $a stable storage$ @creates the object and stores the verifier in a stable storage@',
 'A: $a stable storage$ @stores the verifier in a stable storage@']
'---------------------------------------------------------------'
If the stored verifier does not match, then an error of NFS4ERR_EXIST is returned.




['Q: What if the stored verifier does not match?']
[]
'---------------------------------------------------------------'




If the server cannot support these exclusive create semantics, possibly because of the requirement to commit the verifier to stable storage, it should fail the OPEN request with the error NFS4ERR_NOTSUPP.
['Q: What if the server cannot support these exclusive create semantics, '
 'possibly because of the requirement to commit the verifier to stable '
 'storage,?']
[]
'---------------------------------------------------------------'
If the verifier do not match, the request is rejected with the status NFS4ERR_EXIST.




['Q: What if the verifier do not match?']
['A: $the status NFS4ERR_EXIST$ @is rejected with the status NFS4ERR_EXIST@']
'---------------------------------------------------------------'
If the requester is not authorized to READ or WRITE(depending on the share_access value), the server must return NFS4ERR_ACCESS.




['Q: What if the requester is not authorized to read or write(depending on the '
 'share_access value)?']
[]
'---------------------------------------------------------------'
If the component provided to OPEN resolves to something other than a regular file (or a named attribute), an error will be returned to the client.




['Q: What if the component provided to open resolves to something other than a '
 'regular file (or a named attribute)?']
['A: $the component provided to OPEN$ @will be returned to the client@']
'---------------------------------------------------------------'
If it is a directory, NFS4ERR_ISDIR is returned; otherwise, NFS4ERR_SYMLINK is returned.
['Q: What if it is a directory?']
[]
'---------------------------------------------------------------'




In [309]:
filters = []
main(filters)
