In [None]:
from lambeq.backend.drawing import draw
from lambeq.backend.grammar import Cap, Cup, Id, Ty, Word


n, s = Ty('n'), Ty('s')

words = [
    Word('私', n@n),
    Word('は', n.r),
    Word('ギリシャ', n.l@n)
    Word('の',n.r)
    Word('ピザ',n@n@n)
    Word('が',n.r)
    Word('食べ',n.r@n.r@s@n.l@n.l)
    Word('たい',n.l.l)
    Word('です',n.l.l)
]

cups = Cup(n, n.r) @ Id(s) @ Cup(n.l, n)

assert Id().tensor(*words) == words[0] @ words[1] @ words[2]
assert Ty().tensor(*[n.r, s, n.l]) == n.r @ s @ n.l

diagram = Id().tensor(*words) >> cups
draw(diagram)

In [43]:
import spacy
from spacy import displacy
from lambeq.backend.grammar import Cap, Cup, Id, Ty, Word
from lambeq.backend.drawing import draw

n = Ty('n')
s = Ty('s')

class org_reader:
    def __init__(self, doc=None):
        self.doc = doc

    def POS_parsing(self, text):
        nlp = spacy.load("ja_ginza")
        self.doc = nlp(text)
        return displacy.render(self.doc, style='dep')

    def get_dependencies(self):
        dependencies = {i: [] for i in range(len(self.doc))}
        for token in self.doc:
            if token.dep_ != "ROOT":
                dependencies[token.head.i].append(token.i)
        return dependencies

    def assign_pregroup_type(self, dependencies):
        for i, word in enumerate(words):
            token = self.doc[i]
            token_idx = token.i
            dependents = dependencies[token_idx]
            incoming_dependencies = sum(1 for t in self.doc if t.head.i == token_idx)

            if token.pos_ == "VERB":
                left = Ty()
                for _ in range(sum(1 for d in dependents if d < token_idx)):
                    left = left @ n.r

                right = Ty()
                for _ in range(sum(1 for d in dependents if d > token_idx)):
                    right = right @ n.l

                word.dom = left @ s @ right

            elif token.pos_ in ["NOUN", "PROPN", "PRON"]:
                total_n = len(dependents) + incoming_dependencies
                pregroup_type = Ty()
                if total_n == 0:
                    word.dom = n
                else:
                    for _ in range(total_n):
                        word.dom = word.dom @ n

            else:
                type_chain = Ty()
                for dep in dependents:
                    if dep < token_idx:
                        type_chain = type_chain @ n.r
                    else:
                        type_chain = type_chain @ n.l
                word.dom = type_chain if type_chain else Ty('x')


            return words


In [44]:
# インスタンス作成
text = "私はギリシャのピザが食べたいです。"
reader = org_reader()

In [45]:
# 解析実行
reader.POS_parsing(text)

In [37]:
# 依存関係取得
dependencies = reader.get_dependencies()
print(dependencies)

{0: [1], 1: [], 2: [3], 3: [], 4: [2, 5], 5: [], 6: [0, 4, 7, 8, 9], 7: [], 8: [], 9: []}


In [38]:
for token in reader.doc:
    print(token.text, token.pos_)

私 PRON
は ADP
ギリシャ PROPN
の ADP
ピザ NOUN
が ADP
食べ VERB
たい AUX
です AUX
。 PUNCT


In [48]:
reader.assign_pregroup_type(dependencies)

[Word(私, ('Ty(n).l', {'0'})),
 Word(は, ('Ty(x)', {'0'})),
 Word(ギリシャ, ('Ty(n) @ Ty(n)', {'0'})),
 Word(の, ('Ty(x)', {'0'})),
 Word(ピザ, ('Ty(n) @ Ty(n) @ Ty(n) @ Ty(n)', {'0'})),
 Word(が, ('Ty(x)', {'0'})),
 Word(食べ, ('Ty(n).r @ Ty(n).r @ Ty(s) @ Ty(n).l @ Ty(n).l @ Ty(n).l', {'0'})),
 Word(たい, ('Ty(x)', {'0'})),
 Word(です, ('Ty(x)', {'0'})),
 Word(。, ('Ty(x)', {'0'}))]

In [47]:
for word in words:
    print(f"{word.name}: {word.dom}")

私: n @ n
は: Ty()
ギリシャ: Ty()
の: Ty()
ピザ: Ty()
が: Ty()
食べ: Ty()
たい: Ty()
です: Ty()
。: Ty()
