This demo follows https://github.com/nltk/nltk/wiki/Stanford-CoreNLP-API-in-NLTK

You can compare it against https://corenlp.run/

In [3]:
from nltk.parse import CoreNLPParser
port = 10002
url = f'http://localhost:{port}'

In [3]:
parser = CoreNLPParser(url=url)

In [4]:
sent = "The quick brown fox jumps over the lazy dog."
list(parser.parse(sent.split()))

[Tree('ROOT', [Tree('S', [Tree('NP', [Tree('DT', ['The']), Tree('JJ', ['quick']), Tree('JJ', ['brown']), Tree('NN', ['fox'])]), Tree('VP', [Tree('VBZ', ['jumps']), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])]), Tree('.', ['.'])])])]

In [5]:
list(parser.raw_parse('What is the airspeed of an unladen swallow ?'))

[Tree('ROOT', [Tree('SBARQ', [Tree('WHNP', [Tree('WP', ['What'])]), Tree('SQ', [Tree('VBZ', ['is']), Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('NN', ['airspeed'])]), Tree('PP', [Tree('IN', ['of']), Tree('NP', [Tree('DT', ['an']), Tree('JJ', ['unladen'])])]), Tree('S', [Tree('VP', [Tree('VB', ['swallow'])])])])]), Tree('.', ['?'])])])]

## Constituency parsing

In [19]:
res = next(parser.raw_parse('What is the airspeed of an unladen swallow ?'))
res.pretty_print()

                           ROOT                            
                            |                               
                          SBARQ                            
  __________________________|____________________________   
 |        SQ                                             | 
 |     ___|_________________                             |  
 |    |                     NP                           | 
 |    |        _____________|______________________      |  
 |    |       |                  PP                S     | 
 |    |       |              ____|___              |     |  
WHNP  |       NP            |        NP            VP    | 
 |    |    ___|_____        |     ___|_____        |     |  
 WP  VBZ  DT        NN      IN   DT        JJ      VB    . 
 |    |   |         |       |    |         |       |     |  
What  is the     airspeed   of   an     unladen swallow  ? 



## Dependency parsing

In [7]:
from nltk.parse.corenlp import CoreNLPDependencyParser
dep_parser = CoreNLPDependencyParser(url=url)

In [26]:
print(sent)
result = dep_parser.parse(sent.split())
for i, a in enumerate(result):
    print(f'In sentence {i}')
    for governor, dep, dependent in a.triples():
        print(governor, dep, dependent) 
# [[(governor, dep, dependent) for governor, dep, dependent in parse.triples()] for parse in parses]

The quick brown fox jumps over the lazy dog.
In sentence 0
('jumps', 'VBZ') nsubj ('fox', 'NN')
('fox', 'NN') det ('The', 'DT')
('fox', 'NN') amod ('quick', 'JJ')
('fox', 'NN') amod ('brown', 'JJ')
('jumps', 'VBZ') nmod ('dog', 'NN')
('dog', 'NN') case ('over', 'IN')
('dog', 'NN') det ('the', 'DT')
('dog', 'NN') amod ('lazy', 'JJ')
('jumps', 'VBZ') punct ('.', '.')


## NER

In [1]:
from nltk.parse.corenlp import CoreNLPParser, CoreNLPDependencyParser
from nltk.tokenize.treebank import TreebankWordTokenizer

In [4]:
# Ensure the Stanford CoreNLP server is running on localhost:9000
ner_parser = CoreNLPParser(url=url, tagtype='ner')

# Input text
text = "Apple is looking at buying U.K. startup for $1 billion. Tim Cook will be meeting with the company's CEO in London next week."

# Tokenize the text
tokenizer = TreebankWordTokenizer()
tokens = tokenizer.tokenize(text)

# Perform NER
ner_tagged = ner_parser.tag(tokens)

# Print the recognized entities
print("Entities in the text:")
for word, tag in ner_tagged:
    if tag != 'O':  # 'O' means no entity
        print(f"{word} ({tag})")

# Example to show how to extract entities in a more structured way
entities = {}
for word, tag in ner_tagged:
    if tag != 'O':  # 'O' means no entity
        if tag not in entities:
            entities[tag] = []
        entities[tag].append(word)

# Print structured entities
print("\nStructured Entities:")
for entity_type, words in entities.items():
    print(f"{entity_type}: {', '.join(words)}")

Entities in the text:
Apple (ORGANIZATION)
U.K. (LOCATION)
$ (MONEY)
1 (MONEY)
billion (MONEY)
Tim (PERSON)
Cook (PERSON)
CEO (TITLE)
London (CITY)
next (DATE)
week (DATE)

Structured Entities:
ORGANIZATION: Apple
LOCATION: U.K.
MONEY: $, 1, billion
PERSON: Tim, Cook
TITLE: CEO
CITY: London
DATE: next, week
