In [1]:
import numpy as np
import pandas as pd
import conllu
import zeyrek
import nltk
from tqdm import tqdm

from tr_dependency_parser.tr_parser import TurkishCKYParser
from tr_dependency_parser.tools.helper import *
from tr_dependency_parser.tools.visualizer import parse_visualizer

import spacy
from spacy import displacy

DEBUG = True
filename = "tr_dependency_parser/grammar/grammar.txt"
parser = TurkishCKYParser(filename, DEBUG = DEBUG)

In [2]:
sentence = "Ben okula geldim."
sentence = preprocess(sentence)
parser.parse(sentence)
parser.show_cky_chart()
print("##### BEST SENTENCE STRUCTURE #####")
parser.show_sentence_structure()
print()

Tokens : ['ben', 'okula', 'geldim']
POS Tags : [['PRO1'], ['DAT'], ['VPPAST1']]
Sentence is grammatically correct.
[_S[_PRO1 ben ][_VPPAST1[_DAT okula ][_VPPAST1 geldim ]]]
203.25967199999997

######### CKY CHART #########
--------  -------  -----------
ben       okula    geldim
['PRO1']  []       ['S']
[]        ['DAT']  ['VPPAST1']
[]        []       ['VPPAST1']
--------  -------  -----------
##### BEST SENTENCE STRUCTURE #####
[_S[_PRO1 ben ][_VPPAST1[_DAT okula ][_VPPAST1 geldim ]]]



In [3]:
terminals = parser.get_terminal_nodes(parser.get_tree())
[terminal.text for terminal in terminals]

['ben', 'okula', 'geldim']

In [4]:
visualizer = parse_visualizer()
visualizer.pos_vis(sentence, terminals)

In [5]:
visualizer.pos_tree_vis(sentence, parser.tokens, parser.get_tree())

In [23]:
from nltk.tree import Tree
from nltk.draw.tree import TreeView
import os
t = Tree.fromstring(tree_format(parser.get_tree()).replace("[_"," (").replace("]",")").strip())
TreeView(t)._cframe.print_to_file('output.ps')
os.system('convert output.ps output.png')

4

In [22]:
from PIL import Image

psimage=Image.open('output.ps')
psimage.save('/output.png')

OSError: Unable to locate Ghostscript on paths

In [None]:
ents = []
for terminal in terminals:
    ents.append({"start" : terminal.span[0], 
                 "end"   : terminal.span[1], 
                 "label" : terminal.tag })

In [None]:
doc = {"text" : sentence, "ents" : ents}

In [None]:
displacy.render(doc, 
                style = "ent",
                manual = True,
               )

In [None]:
def get_nodes(node):
    if node.terminal:
        return [node]

    return [node] + get_nodes(node.child1) + get_nodes(node.child2)

ents = []
for node in get_nodes(parser.get_tree()):
    ents.append({"start_token" : node.token_range[0], 
                 "end_token"   : node.token_range[1]+1, 
                 "label" : node.tag})
ents.reverse()
doc = {"text" : sentence, "spans" : ents, "tokens" : parser.tokens}
displacy.render(doc, 
                style = "span", 
                manual = True,
               )

In [None]:
from pathlib import Path

svg = displacy.render(doc, style="span", manual=True, jupyter=False)
file_name = "_".join(parser.tokens) + ".svg"
with open(file_name, 'w', encoding="utf-8") as f:
    f.write(svg)

In [None]:
with open("tr_dependency_parser/grammar/grammar.txt", "r") as f:
    lines = f.readlines()
    
nonterm = set([line.split(" ->")[0] if "#" not in line else "" for line in lines])

In [None]:
color_dict = {
    "NP" : "turquoise",
    "PRO" : "palevioletred",
    "ADJ" : "lime",
    "VP" : "lightpink",
    "ADV" : "khaki",
    "POSTP" : "cornflowerblue",
    "SG" : "tomato",
    "S" : "tomato",
    "DET" : "limegreen",
    "DAT" : "limegreen",
    "NUM" : "salmon",
    "Q" : "y",
    "GENITIVE" : "green"
}

colors = {}
for nont in nonterm:
    colors[nont] = None
    for key in color_dict.keys():
        if key in nont:
            colors[nont] = color_dict[key]
            break

In [None]:
options = {"ents" : nonterm, "colors" : colors}
displacy.render(doc, 
                style = "span",
                options = options, 
                manual = True,
               )