In [1]:
import nltk
import stanza

nlp = stanza.Pipeline(lang='en', processors='tokenize,pos,constituency')

text = 'She took the lesson to heart'

doc = nlp(text)

parse_tree = nltk.Tree.fromstring(str(doc.sentences[0].constituency))

parse_tree.pretty_print()

  from .autonotebook import tqdm as notebook_tqdm
2024-08-23 20:38:55 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES
Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.8.0.json: 386kB [00:00, 28.7MB/s]                    
2024-08-23 20:38:55 INFO: Downloaded file to /Users/pineapple/stanza_resources/resources.json
2024-08-23 20:38:56 INFO: Loading these models for language: en (English):
| Processor    | Package             |
--------------------------------------
| tokenize     | combined            |
| mwt          | combined            |
| pos          | combined_charlm     |
| constituency | ptb3-revised_charlm |

2024-08-23 20:38:56 INFO: Using device: cpu
2024-08-23 20:38:56 INFO: Loading: tokenize
2024-08-23 20:38:56 INFO: Loading: mwt
2024-08-23 20:38:56 INFO: Loading: pos
2024-08-23 20:38:57 

         ROOT                         
          |                            
          S                           
  ________|_________                   
 |                  VP                
 |    ______________|_________         
 |   |         |              PP      
 |   |         |           ___|____    
 NP  |         NP         |        NP 
 |   |     ____|____      |        |   
PRP VBD   DT        NN    IN       NN 
 |   |    |         |     |        |   
She took the      lesson  to     heart



In [2]:
# 输入文本
text = "workers dumped sacks of garbage and junk into a bin"

# 使用 stanza 解析文本
doc = nlp(text)

# 获取句法树并转换为 nltk 的树状结构
parse_tree = nltk.Tree.fromstring(str(doc.sentences[0].constituency))

# 打印句法树
parse_tree.pretty_print()

                           ROOT                               
                            |                                  
                            S                                 
    ________________________|_________                         
   |                                  VP                      
   |       ___________________________|_____________           
   |      |                 NP                      |         
   |      |       __________|_____                  |          
   |      |      |                PP                PP        
   |      |      |     ___________|___          ____|___       
   NP     |      NP   |               NP       |        NP    
   |      |      |    |      _________|___     |     ___|___   
  NNS    VBD    NNS   IN    NN        CC  NN   IN   DT      NN
   |      |      |    |     |         |   |    |    |       |  
workers dumped sacks  of garbage     and junk into  a      bin



In [5]:
import spacy
from nltk import CFG
from nltk import Tree

# 加载模型
nlp = spacy.load("en_core_web_sm")

# 输入文本
text = "workers dumped sacks of garbage and junk into a bin"

# 解析文本
doc = nlp(text)

# 打印依存关系
for token in doc:
    print(f"{token.text} ({token.dep_}) <-- {token.head.text}")

# 定义一个递归函数，将spacy的依存关系转换为nltk的树状结构
def to_nltk_tree(node):
    if node.n_lefts + node.n_rights > 0:
        return Tree(node.orth_, [to_nltk_tree(child) for child in node.children])
    else:
        return node.orth_

# 获取句子的根节点
root = [sent.root for sent in doc.sents][0]

# 转换为nltk的树状结构
nltk_tree = to_nltk_tree(root)

# 打印树状结构
nltk_tree.pretty_print()

# 手动构建CFG
grammar = CFG.fromstring("""
  S -> NP VP
  NP -> NNS | NNS PP
  VP -> VBD NP | VBD NP PP
  PP -> IN NP
  NNS -> 'workers'
  VBD -> 'dumped'
  NP -> NNS | NNS PP | NN | NN CC NN | DT NN
  NN -> 'sacks' | 'garbage' | 'junk' | 'bin'
  IN -> 'of' | 'into'
  DT -> 'a'
  CC -> 'and'
""")

# 解析器
parser = nltk.ChartParser(grammar)

# 输入句子
sentence = "workers dumped sacks of garbage and junk into a bin".split()

# 生成句法树
for tree in parser.parse(sentence):
    tree.pretty_print()
    tree.draw()

workers (nsubj) <-- dumped
dumped (ROOT) <-- dumped
sacks (dobj) <-- dumped
of (prep) <-- sacks
garbage (pobj) <-- of
and (cc) <-- garbage
junk (conj) <-- garbage
into (prep) <-- dumped
a (det) <-- bin
bin (pobj) <-- into
        dumped                  
    ______|__________________    
   |            sacks        |  
   |              |          |   
   |              of        into
   |              |          |   
   |           garbage      bin 
   |       _______|_____     |   
workers  and           junk  a  

