In [3]:
import stanza
from stanza.server.semgrex import Semgrex
from stanza.models.common.constant import is_right_to_left
import spacy
from spacy import displacy
from spacy.tokens import Doc
from IPython.core.display import display, HTML


# Things needed:
# For each semgrex search, we want to get the sentence(s) that it came from.
# Store sentence and pass it through visualize_str()
# take the output of visualize_str() and store it
def get_sentences_html(doc, language):
    """
    Returns a list of the HTML strings of the dependency visualizations of a given stanza doc object.

    First converts the stanza doc object to a spacy doc object and uses displacy to generate an HTML
    string for each sentence of the doc object.
    """
    html_strings = []

    # blank model - we don't use any of the model features, just the viz
    nlp = spacy.blank("en")
    sentences_to_visualize = []
    for sentence in doc.sentences:
        words, lemmas, heads, deps, tags = [], [], [], [], []
        if is_right_to_left(language):  # order of words displayed is reversed, dependency arcs remain intact
            sent_len = len(sentence.words)
            for word in reversed(sentence.words):
                words.append(word.text)
                lemmas.append(word.lemma)
                deps.append(word.deprel)
                tags.append(word.upos)
                if word.head == 0:  # spaCy head indexes are formatted differently than that of Stanza
                    heads.append(sent_len - word.id)
                else:
                    heads.append(sent_len - word.head)
        else:  # left to right rendering
            for word in sentence.words:
                words.append(word.text)
                lemmas.append(word.lemma)
                deps.append(word.deprel)
                tags.append(word.upos)
                if word.head == 0:
                    heads.append(word.id - 1)
                else:
                    heads.append(word.head - 1)
        document_result = Doc(nlp.vocab, words=words, lemmas=lemmas, heads=heads, deps=deps, pos=tags)
        sentences_to_visualize.append(document_result)

    for line in sentences_to_visualize:  # render all sentences through displaCy
        html_strings.append(displacy.render(line, style="dep", jupyter=False))
    return html_strings


# Process HTML:
# By this point, we have HTML to visualize the sentences' words and dependencies
# we also have a data structure storing each of the indexes of the words involved in the hits for each of the sentences
# we also has something that tracks which label goes on whch word in each node of the sentence
# What we do:
# For each sentence to be visualized:
#     take the raw HTML for that sentence
#     build a style class
#     for each semgrex query:
#         for each semgrex hit:
#             set a new color style
#             for each node in the hit
#                 go to that index (recalculate the off by one index)
#                 color the word color var abd attach the style class
#                 if applicable, attach the label below (w/ the appropriate Y value)
#                 ^ calculate the number of levels to have y values for by looking at the semgrex hits

def process_sentence_html(orig_html, semgrex_sentence):
    """
    Takes the semgrex sentence object and modifies the HTML of the original sentence, highlighting
    words involved in the search queries and adding the label of the word inside of the semgrex match.
    """
    # TODO: Write this function!
    return


# Once HTMLs are configured:
def render_html_strings(edited_html_strings):
    for html_string in edited_html_strings:
        display(HTML(html_string))
    return


def visualize_search_doc(doc, semgrex_queries, lang_code):
    # nlp = stanza.Pipeline(lang_code, processors="tokenize,pos,lemma,depparse")

    # doc = nlp("Banning opal removed all artifact decks from the meta.  I miss playing lantern.")

    # A single result .result[i].result[j] is a list of matches for sentence i on semgrex query j.

    with Semgrex(classpath="$CLASSPATH") as sem:
        # semgrex_results = sem.process(doc,
        #                               "{pos:NN}=object <obl {}=action",
        #                               "{cpos:NOUN}=thing <obj {cpos:VERB}=action")
        edited_html_strings = []
        semgrex_results = sem.process(doc, *semgrex_queries)
        # one html string for each sentence
        html_strings = get_sentences_html(doc, lang_code)
        print(html_strings)
        for i in range(len(html_strings)):
            edited_string = process_sentence_html(html_strings[i], semgrex_results.result[i])
            edited_html_strings.append(edited_string)
        render_html_strings(edited_html_strings)
    return


def visualize_search_str(text, semgrex_queries, lang_code):
    nlp = stanza.Pipeline(lang_code, processors="tokenize, pos, lemma, depparse")
    doc = nlp(text)
    visualize_search_doc(doc, semgrex_queries, lang_code)


def main():
    nlp = stanza.Pipeline("en", processors="tokenize,pos,lemma,depparse")

    doc = nlp("Banning opal removed all artifact decks from the meta.  I miss playing lantern.")

    # A single result .result[i].result[j] is a list of matches for sentence i on semgrex query j.
    queries = ["{pos:NN}=object <obl {}=action",
                                      "{cpos:NOUN}=thing <obj {cpos:VERB}=action"]
    visualize_search_doc(doc, queries, "en")

    with Semgrex(classpath="$CLASSPATH") as sem:
        semgrex_results = sem.process(doc,
                                      "{pos:NN}=object <obl {}=action",
                                      "{cpos:NOUN}=thing <obj {cpos:VERB}=action")
        print(semgrex_results.result[1])
        print(semgrex_results.result[1].result[1])
        print("-------------------------------------")
        print(semgrex_results.result[1].result[1].match[0].node[0])
        print(semgrex_results.result[1].result[1].match[0].node[0].name)
        print(semgrex_results.result[1].result[1].match[0].node[0].matchIndex)

        print(semgrex_results.result[1].result[1].match[0].node[1].name)
        print(semgrex_results.result[1].result[1].match[0].node[1].matchIndex)

    return


if __name__ == '__main__':
    main()


  from IPython.core.display import display, HTML
2022-08-22 10:11:29 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json:   0%|   …

2022-08-22 10:11:31 INFO: Loading these models for language: en (English):
| Processor | Package  |
------------------------
| tokenize  | combined |
| pos       | combined |
| lemma     | combined |
| depparse  | combined |

2022-08-22 10:11:31 INFO: Use device: cpu
2022-08-22 10:11:31 INFO: Loading: tokenize
2022-08-22 10:11:31 INFO: Loading: pos
2022-08-22 10:11:32 INFO: Loading: lemma
2022-08-22 10:11:32 INFO: Loading: depparse
2022-08-22 10:11:32 INFO: Done loading processors!


TypeError: expected str, bytes or os.PathLike object, not NoneType

In [None]:
['<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="en" id="6b2a8c3bbbf648d4ac657842ec0b55c8-0" class="displacy" width="1625" height="487.0" direction="ltr" style="max-width: none; height: 487.0px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr">\n<text class="displacy-token" fill="currentColor" text-anchor="middle" y="397.0">\n    <tspan class="displacy-word" fill="currentColor" x="50">Banning</tspan>\n    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="50">VERB</tspan>\n</text>\n\n<text class="displacy-token" fill="currentColor" text-anchor="middle" y="397.0">\n    <tspan class="displacy-word" fill="currentColor" x="225">opal</tspan>\n    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="225">NOUN</tspan>\n</text>\n\n<text class="displacy-token" fill="currentColor" text-anchor="middle" y="397.0">\n    <tspan class="displacy-word" fill="currentColor" x="400">removed</tspan>\n    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="400">VERB</tspan>\n</text>\n\n<text class="displacy-token" fill="currentColor" text-anchor="middle" y="397.0">\n    <tspan class="displacy-word" fill="currentColor" x="575">all</tspan>\n    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="575">DET</tspan>\n</text>\n\n<text class="displacy-token" fill="currentColor" text-anchor="middle" y="397.0">\n    <tspan class="displacy-word" fill="currentColor" x="750">artifact</tspan>\n    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="750">NOUN</tspan>\n</text>\n\n<text class="displacy-token" fill="currentColor" text-anchor="middle" y="397.0">\n    <tspan class="displacy-word" fill="currentColor" x="925">decks</tspan>\n    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="925">NOUN</tspan>\n</text>\n\n<text class="displacy-token" fill="currentColor" text-anchor="middle" y="397.0">\n    <tspan class="displacy-word" fill="currentColor" x="1100">from</tspan>\n    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="1100">ADP</tspan>\n</text>\n\n<text class="displacy-token" fill="currentColor" text-anchor="middle" y="397.0">\n    <tspan class="displacy-word" fill="currentColor" x="1275">the</tspan>\n    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="1275">DET</tspan>\n</text>\n\n<text class="displacy-token" fill="currentColor" text-anchor="middle" y="397.0">\n    <tspan class="displacy-word" fill="currentColor" x="1450">meta .</tspan>\n    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="1450">NOUN</tspan>\n</text>\n\n<g class="displacy-arrow">\n    <path class="displacy-arc" id="arrow-6b2a8c3bbbf648d4ac657842ec0b55c8-0-0" stroke-width="2px" d="M70,352.0 C70,177.0 390.0,177.0 390.0,352.0" fill="none" stroke="currentColor"/>\n    <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">\n        <textPath xlink:href="#arrow-6b2a8c3bbbf648d4ac657842ec0b55c8-0-0" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">csubj</textPath>\n    </text>\n    <path class="displacy-arrowhead" d="M70,354.0 L62,342.0 78,342.0" fill="currentColor"/>\n</g>\n\n<g class="displacy-arrow">\n    <path class="displacy-arc" id="arrow-6b2a8c3bbbf648d4ac657842ec0b55c8-0-1" stroke-width="2px" d="M70,352.0 C70,264.5 210.0,264.5 210.0,352.0" fill="none" stroke="currentColor"/>\n    <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">\n        <textPath xlink:href="#arrow-6b2a8c3bbbf648d4ac657842ec0b55c8-0-1" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">obj</textPath>\n    </text>\n    <path class="displacy-arrowhead" d="M210.0,354.0 L218.0,342.0 202.0,342.0" fill="currentColor"/>\n</g>\n\n<g class="displacy-arrow">\n    <path class="displacy-arc" id="arrow-6b2a8c3bbbf648d4ac657842ec0b55c8-0-2" stroke-width="2px" d="M595,352.0 C595,177.0 915.0,177.0 915.0,352.0" fill="none" stroke="currentColor"/>\n    <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">\n        <textPath xlink:href="#arrow-6b2a8c3bbbf648d4ac657842ec0b55c8-0-2" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">det</textPath>\n    </text>\n    <path class="displacy-arrowhead" d="M595,354.0 L587,342.0 603,342.0" fill="currentColor"/>\n</g>\n\n<g class="displacy-arrow">\n    <path class="displacy-arc" id="arrow-6b2a8c3bbbf648d4ac657842ec0b55c8-0-3" stroke-width="2px" d="M770,352.0 C770,264.5 910.0,264.5 910.0,352.0" fill="none" stroke="currentColor"/>\n    <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">\n        <textPath xlink:href="#arrow-6b2a8c3bbbf648d4ac657842ec0b55c8-0-3" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">compound</textPath>\n    </text>\n    <path class="displacy-arrowhead" d="M770,354.0 L762,342.0 778,342.0" fill="currentColor"/>\n</g>\n\n<g class="displacy-arrow">\n    <path class="displacy-arc" id="arrow-6b2a8c3bbbf648d4ac657842ec0b55c8-0-4" stroke-width="2px" d="M420,352.0 C420,89.5 920.0,89.5 920.0,352.0" fill="none" stroke="currentColor"/>\n    <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">\n        <textPath xlink:href="#arrow-6b2a8c3bbbf648d4ac657842ec0b55c8-0-4" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">obj</textPath>\n    </text>\n    <path class="displacy-arrowhead" d="M920.0,354.0 L928.0,342.0 912.0,342.0" fill="currentColor"/>\n</g>\n\n<g class="displacy-arrow">\n    <path class="displacy-arc" id="arrow-6b2a8c3bbbf648d4ac657842ec0b55c8-0-5" stroke-width="2px" d="M1120,352.0 C1120,177.0 1440.0,177.0 1440.0,352.0" fill="none" stroke="currentColor"/>\n    <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">\n        <textPath xlink:href="#arrow-6b2a8c3bbbf648d4ac657842ec0b55c8-0-5" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">case</textPath>\n    </text>\n    <path class="displacy-arrowhead" d="M1120,354.0 L1112,342.0 1128,342.0" fill="currentColor"/>\n</g>\n\n<g class="displacy-arrow">\n    <path class="displacy-arc" id="arrow-6b2a8c3bbbf648d4ac657842ec0b55c8-0-6" stroke-width="2px" d="M1295,352.0 C1295,264.5 1435.0,264.5 1435.0,352.0" fill="none" stroke="currentColor"/>\n    <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">\n        <textPath xlink:href="#arrow-6b2a8c3bbbf648d4ac657842ec0b55c8-0-6" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">det</textPath>\n    </text>\n    <path class="displacy-arrowhead" d="M1295,354.0 L1287,342.0 1303,342.0" fill="currentColor"/>\n</g>\n\n<g class="displacy-arrow">\n    <path class="displacy-arc" id="arrow-6b2a8c3bbbf648d4ac657842ec0b55c8-0-7" stroke-width="2px" d="M420,352.0 C420,2.0 1450.0,2.0 1450.0,352.0" fill="none" stroke="currentColor"/>\n    <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">\n        <textPath xlink:href="#arrow-6b2a8c3bbbf648d4ac657842ec0b55c8-0-7" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">obl</textPath>\n    </text>\n    <path class="displacy-arrowhead" d="M1450.0,354.0 L1458.0,342.0 1442.0,342.0" fill="currentColor"/>\n</g>\n</svg>', '<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="en" id="6029ba09946149f1ab3b8d319a64679a-0" class="displacy" width="750" height="224.5" direction="ltr" style="max-width: none; height: 224.5px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr">\n<text class="displacy-token" fill="currentColor" text-anchor="middle" y="134.5">\n    <tspan class="displacy-word" fill="currentColor" x="50">I</tspan>\n    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="50">PRON</tspan>\n</text>\n\n<text class="displacy-token" fill="currentColor" text-anchor="middle" y="134.5">\n    <tspan class="displacy-word" fill="currentColor" x="225">miss</tspan>\n    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="225">VERB</tspan>\n</text>\n\n<text class="displacy-token" fill="currentColor" text-anchor="middle" y="134.5">\n    <tspan class="displacy-word" fill="currentColor" x="400">playing</tspan>\n    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="400">VERB</tspan>\n</text>\n\n<text class="displacy-token" fill="currentColor" text-anchor="middle" y="134.5">\n    <tspan class="displacy-word" fill="currentColor" x="575">lantern .</tspan>\n    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="575">NOUN</tspan>\n</text>\n\n<g class="displacy-arrow">\n    <path class="displacy-arc" id="arrow-6029ba09946149f1ab3b8d319a64679a-0-0" stroke-width="2px" d="M70,89.5 C70,2.0 225.0,2.0 225.0,89.5" fill="none" stroke="currentColor"/>\n    <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">\n        <textPath xlink:href="#arrow-6029ba09946149f1ab3b8d319a64679a-0-0" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">nsubj</textPath>\n    </text>\n    <path class="displacy-arrowhead" d="M70,91.5 L62,79.5 78,79.5" fill="currentColor"/>\n</g>\n\n<g class="displacy-arrow">\n    <path class="displacy-arc" id="arrow-6029ba09946149f1ab3b8d319a64679a-0-1" stroke-width="2px" d="M245,89.5 C245,2.0 400.0,2.0 400.0,89.5" fill="none" stroke="currentColor"/>\n    <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">\n        <textPath xlink:href="#arrow-6029ba09946149f1ab3b8d319a64679a-0-1" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">xcomp</textPath>\n    </text>\n    <path class="displacy-arrowhead" d="M400.0,91.5 L408.0,79.5 392.0,79.5" fill="currentColor"/>\n</g>\n\n<g class="displacy-arrow">\n    <path class="displacy-arc" id="arrow-6029ba09946149f1ab3b8d319a64679a-0-2" stroke-width="2px" d="M420,89.5 C420,2.0 575.0,2.0 575.0,89.5" fill="none" stroke="currentColor"/>\n    <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">\n        <textPath xlink:href="#arrow-6029ba09946149f1ab3b8d319a64679a-0-2" class="displacy-label" startOffset="50%" side="left" fill="currentColor" text-anchor="middle">obj</textPath>\n    </text>\n    <path class="displacy-arrowhead" d="M575.0,91.5 L583.0,79.5 567.0,79.5" fill="currentColor"/>\n</g>\n</svg>']
