In [37]:
%run -i "../util/file_utils.ipynb"
%run -i "../util/lang_utils.ipynb"

# Finding triplets using spaCy

In [16]:
sentences = [
    "The big black cat stared at the small dog.", 
    "Jane watched her brother in the evenings.", 
    "Nick was driving to Madrid."
]
verb_patterns = [
    [{"POS": "VERB"}],
    [{"POS": "VERB"}, {"POS": "ADP"}],
    [{"POS": "AUX", "OP": "?"}, {"POS": "VERB"}, {"POS": "ADP", "OP": "?"}]
]

In [17]:
from spacy.matcher import Matcher
matcher = Matcher(small_model.vocab)
matcher.add("VP", verb_patterns)

In [25]:
def find_verb_phrase(doc, matcher):
    matches = matcher(doc)
    verb_phrases = [match for match in matches if small_model.vocab.strings[match[0]] == "VP"]
    verb_phrase_spans = [doc[match[1]:match[2]] for match in verb_phrases]
    verb_phrase_spans.sort(key=len, reverse=True)
    verb_phrase = verb_phrase_spans[0]
    root = verb_phrase[0]
    for token in verb_phrase:
        if token.dep_ == "ROOT":
            root = token
    return verb_phrase, root

In [39]:
for sentence in sentences:
    doc = small_model(sentence)
    verb_phrase, root = find_verb_phrase(doc, matcher)
    subject_phrase = get_subject_phrase(doc)
    object_phrase = get_object_phrase(doc)
    prep_phrases = get_prepositional_phrase_objs(doc)
    if object_phrase is None:
        object_phrase = prep_phrases[0]
    print(subject_phrase, "\t", verb_phrase, "\t", object_phrase)

The big black cat 	 stared at 	 the small dog
Jane 	 watched 	 her brother
Nick 	 was driving to 	 Madrid


# Finding triplets using GPT

In [40]:
import openai
openai.api_key = OPEN_AI_KEY

In [45]:
prompt="""Find subject, verb, object triplets in the following sentence.
Create a python dictionary structure of the form: {"subject": Subject, "verb": Verb, "object": Object}
Sentence: Nick was driving to Madrid."""
response = openai.Completion.create(
    model="text-davinci-003",
    prompt=prompt,
    temperature=0,
    max_tokens=256,
    top_p=1.0,
    frequency_penalty=0,
    presence_penalty=0
)
print(response)

{
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "text": "\n\n{\"subject\": \"Nick\", \"verb\": \"was driving\", \"object\": \"to Madrid\"}"
    }
  ],
  "created": 1687940898,
  "id": "cmpl-7WKp8Cv4RVOXN4GE3RBd53gRyNlOe",
  "model": "text-davinci-003",
  "object": "text_completion",
  "usage": {
    "completion_tokens": 22,
    "prompt_tokens": 48,
    "total_tokens": 70
  }
}
