In [None]:
import spacy
from spacy import displacy
from lambeq.backend.grammar import Cap, Cup, Id, Ty, Word
from lambeq.backend.drawing import draw

n = Ty('n')
s = Ty('s')

class org_reader:
    def __init__(self, doc=None):
        self.doc = doc

    def POS_parsing(self, text):
        nlp = spacy.load("ja_ginza")
        self.doc = nlp(text)
        return displacy.render(self.doc, style='dep')

    def get_dependencies(self):
        dependencies = {i: [] for i in range(len(self.doc))}
        for token in self.doc:
            if token.dep_ != "ROOT":
                dependencies[token.head.i].append(token.i)
        return dependencies

    def assign_pregroup_type(self, dependencies):
        for token in self.doc:
            token_idx = token.i
            dependents = dependencies[token_idx]
            incoming_dependencies = sum(1 for t in self.doc if t.head.i == token_idx)

            if token.pos_ == "VERB":
                left = Ty()
                for _ in range(sum(1 for d in dependents if d < token_idx)):
                    left = left @ n.r

                right = Ty()
                for _ in range(sum(1 for d in dependents if d > token_idx)):
                    right = right @ n.l

                pregroup_type = left @ s @ right

            elif token.pos_ in ["NOUN", "PROPN", "PRON"]:
                total_n = len(dependents) + incoming_dependencies
                pregroup_type = Ty()
                if total_n == 0:
                    pregroup_type = n
                else:
                    for _ in range(total_n):
                        pregroup_type = pregroup_type @ n

            else:
                type_chain = Ty()
                for dep in dependents:
                    if dep < token_idx:
                        type_chain = type_chain @ Ty().r
                    else:
                        type_chain = type_chain @ Ty().l
                pregroup_type = type_chain if type_chain else Ty('x')

            words_with_types.append(Word(token.text, pregroup_type, z=set()))

        return words_with_types


In [2]:
# 動作確認
text = "私はギリシャのピザが食べたいです。"
reader = org_reader()

In [3]:
# 依存構造解析
reader.POS_parsing(text)

In [6]:
# 依存関係を取得
dependencies = reader.get_dependencies()
print(dependencies)

{0: [1], 1: [], 2: [3], 3: [], 4: [2, 5], 5: [], 6: [0, 4, 7, 8, 9], 7: [], 8: [], 9: []}


In [7]:
for token in reader.doc:
    print(token.text, token.pos_)

私 PRON
は ADP
ギリシャ PROPN
の ADP
ピザ NOUN
が ADP
食べ VERB
たい AUX
です AUX
。 PUNCT


In [13]:
reader.assign_pregroup_type(dependencies)

[Word(私, ('Ty(n) @ Ty(n)', {'0'})),
 Word(は, ('Ty(x)', {'0'})),
 Word(ギリシャ, ('Ty(n) @ Ty(n)', {'0'})),
 Word(の, ('Ty(x)', {'0'})),
 Word(ピザ, ('Ty(n) @ Ty(n) @ Ty(n) @ Ty(n)', {'0'})),
 Word(が, ('Ty(x)', {'0'})),
 Word(食べ, ('Ty(n).r @ Ty(n).r @ Ty(s) @ Ty(n).l @ Ty(n).l @ Ty(n).l', {'0'})),
 Word(たい, ('Ty(x)', {'0'})),
 Word(です, ('Ty(x)', {'0'})),
 Word(。, ('Ty(x)', {'0'}))]

In [17]:
for word in words:
    print(f"{word.name}: {pregroup_type}")

NameError: name 'pregroup_type' is not defined