https://stackabuse.com/python-for-nlp-getting-started-with-the-stanfordcorenlp-library/ <br>
https://github.com/Lynten/stanford-corenlp

In [1]:
from stanfordcorenlp import StanfordCoreNLP
import logging
import json
import pandas as pd

In [2]:
class StanfordNLP:
    def __init__(self, host='http://localhost', port=9000):
        self.nlp = StanfordCoreNLP(host, port=port,
                                   timeout=30000)  # , quiet=False, logging_level=logging.DEBUG)
        self.props = {
            'annotators': 'tokenize,ssplit,pos,lemma,ner,parse,depparse,dcoref,relation',
            'pipelineLanguage': 'en',
            'outputFormat': 'json'
        }

    def word_tokenize(self, sentence):
        return self.nlp.word_tokenize(sentence)

    def pos(self, sentence):
        return self.nlp.pos_tag(sentence)

    def ner(self, sentence):
        return self.nlp.ner(sentence)

    def parse(self, sentence):
        return self.nlp.parse(sentence)

    def dependency_parse(self, sentence):
        return self.nlp.dependency_parse(sentence)

    def annotate(self, sentence):
        return json.loads(self.nlp.annotate(sentence, properties=self.props))

    @staticmethod
    def tokens_to_dict(_tokens):
        tokens = defaultdict(dict)
        for token in _tokens:
            tokens[int(token['index'])] = {
                'word': token['word'],
                'lemma': token['lemma'],
                'pos': token['pos'],
                'ner': token['ner']
            }
        return tokens

In [3]:
df = pd.read_csv('newsdataset1.csv')
df.event_summary=df.event_summary.astype(str)


In [4]:
df['event_summary'][0]

'The death toll of the suicide bombing in Ibb'

In [5]:
from nltk.parse.corenlp import CoreNLPParser


In [6]:
if __name__ == '__main__':
    sNLP = StanfordNLP()
    for i in range(0, 1):
        text = df['event_summary'][i]
#         print ("Annotate:", sNLP.annotate(text))
        print('\n\n')
        print ("POS:", sNLP.pos(text))
        print('\n\n')
        print ("Tokens:", sNLP.word_tokenize(text))
        print('\n\n')
        print ("NER:", sNLP.ner(text))
        print('\n\n')
        print ("Parse:", sNLP.parse(text))
        sentence=sNLP.parse(text)
        parser = CoreNLPParser()
        next(parser.raw_parse(text)).pretty_print()
        print('\n\n .......................................................................... \n\n')
#         print ("Dep Parse:", sNLP.dependency_parse(text))




POS: [('The', 'DT'), ('death', 'NN'), ('toll', 'NN'), ('of', 'IN'), ('the', 'DT'), ('suicide', 'NN'), ('bombing', 'NN'), ('in', 'IN'), ('Ibb', 'NNP')]



Tokens: ['The', 'death', 'toll', 'of', 'the', 'suicide', 'bombing', 'in', 'Ibb']



NER: [('The', 'O'), ('death', 'O'), ('toll', 'O'), ('of', 'O'), ('the', 'O'), ('suicide', 'CRIMINAL_CHARGE'), ('bombing', 'CRIMINAL_CHARGE'), ('in', 'O'), ('Ibb', 'O')]



Parse: (ROOT
  (NP
    (NP (DT The) (NN death))
    (NP
      (NP (NN toll))
      (PP (IN of)
        (NP
          (NP (DT the) (NN suicide) (NN bombing))
          (PP (IN in)
            (NP (NNP Ibb))))))))
         ROOT                                         
          |                                            
          NP                                          
      ____|_________                                   
     |              NP                                
     |          ____|_________                         
     |         |              PP          

In [7]:
from nltk.tree import Tree

In [8]:
parsestr=sNLP.parse(text)

In [9]:
parsestr

'(ROOT\r\n  (NP\r\n    (NP (DT The) (NN death))\r\n    (NP\r\n      (NP (NN toll))\r\n      (PP (IN of)\r\n        (NP\r\n          (NP (DT the) (NN suicide) (NN bombing))\r\n          (PP (IN in)\r\n            (NP (NNP Ibb))))))))'

In [10]:
# parsestr='(ROOT (S (NP (NP (DT The) (JJ old) (NN oak) (NN tree)) (PP (IN from) (NP (NNP India)))) (VP (VBD fell) (PRT (RP down)))))'

In [11]:
import math

In [12]:
# empty_list = []
for i in Tree.fromstring(parsestr).subtrees():
    if i.label() == 'NP':
        print (i)
        
#         ptempty_list.append(i)

(NP
  (NP (DT The) (NN death))
  (NP
    (NP (NN toll))
    (PP
      (IN of)
      (NP
        (NP (DT the) (NN suicide) (NN bombing))
        (PP (IN in) (NP (NNP Ibb)))))))
(NP (DT The) (NN death))
(NP
  (NP (NN toll))
  (PP
    (IN of)
    (NP
      (NP (DT the) (NN suicide) (NN bombing))
      (PP (IN in) (NP (NNP Ibb))))))
(NP (NN toll))
(NP
  (NP (DT the) (NN suicide) (NN bombing))
  (PP (IN in) (NP (NNP Ibb))))
(NP (DT the) (NN suicide) (NN bombing))
(NP (NNP Ibb))


In [13]:
for i in Tree.fromstring(parsestr).subtrees():
    if i.label() == 'NP':
        print (i.leaves())

['The', 'death', 'toll', 'of', 'the', 'suicide', 'bombing', 'in', 'Ibb']
['The', 'death']
['toll', 'of', 'the', 'suicide', 'bombing', 'in', 'Ibb']
['toll']
['the', 'suicide', 'bombing', 'in', 'Ibb']
['the', 'suicide', 'bombing']
['Ibb']


In [14]:
 fo = open('work', 'w')

In [15]:
# for subtree3 in tree.subtrees():
#     if subtree3.label() == 'CLAUSE':
#         print(subtree3)
#         print subtree3.leaves()
#         fo.write(str(subtree3.leaves()))
# fo.flush()
# fo.close()
#!/usr/bin/python
for i in Tree.fromstring(parsestr).subtrees():
    if i.label() == 'NP':
        print (i)
        print (i.leaves())
        fo.write(str(i )+ '\n\n\n')
        fo.write(str(i.leaves())+ '\n\n\n')
#         fo.write("\n")
                 
        print ("\n\n\n________________________________________________________________________\n\n\n")
fo.flush()
fo.close()

(NP
  (NP (DT The) (NN death))
  (NP
    (NP (NN toll))
    (PP
      (IN of)
      (NP
        (NP (DT the) (NN suicide) (NN bombing))
        (PP (IN in) (NP (NNP Ibb)))))))
['The', 'death', 'toll', 'of', 'the', 'suicide', 'bombing', 'in', 'Ibb']



________________________________________________________________________



(NP (DT The) (NN death))
['The', 'death']



________________________________________________________________________



(NP
  (NP (NN toll))
  (PP
    (IN of)
    (NP
      (NP (DT the) (NN suicide) (NN bombing))
      (PP (IN in) (NP (NNP Ibb))))))
['toll', 'of', 'the', 'suicide', 'bombing', 'in', 'Ibb']



________________________________________________________________________



(NP (NN toll))
['toll']



________________________________________________________________________



(NP
  (NP (DT the) (NN suicide) (NN bombing))
  (PP (IN in) (NP (NNP Ibb))))
['the', 'suicide', 'bombing', 'in', 'Ibb']



___________________________________________________________