https://stackabuse.com/python-for-nlp-getting-started-with-the-stanfordcorenlp-library/ <br>
https://github.com/Lynten/stanford-corenlp

In [1]:
from stanfordcorenlp import StanfordCoreNLP
import logging
import json
import pandas as pd

In [2]:
class StanfordNLP:
    def __init__(self, host='http://localhost', port=9000):
        self.nlp = StanfordCoreNLP(host, port=port,
                                   timeout=30000)  # , quiet=False, logging_level=logging.DEBUG)
        self.props = {
            'annotators': 'tokenize,ssplit,pos,lemma,ner,parse,depparse,dcoref,relation',
            'pipelineLanguage': 'en',
            'outputFormat': 'json'
        }

    def word_tokenize(self, sentence):
        return self.nlp.word_tokenize(sentence)

    def pos(self, sentence):
        return self.nlp.pos_tag(sentence)

    def ner(self, sentence):
        return self.nlp.ner(sentence)

    def parse(self, sentence):
        return self.nlp.parse(sentence)

    def dependency_parse(self, sentence):
        return self.nlp.dependency_parse(sentence)

    def annotate(self, sentence):
        return json.loads(self.nlp.annotate(sentence, properties=self.props))

    @staticmethod
    def tokens_to_dict(_tokens):
        tokens = defaultdict(dict)
        for token in _tokens:
            tokens[int(token['index'])] = {
                'word': token['word'],
                'lemma': token['lemma'],
                'pos': token['pos'],
                'ner': token['ner']
            }
        return tokens

In [3]:
df = pd.read_csv('newsdataset1.csv')
df.event_summary=df.event_summary.astype(str)


In [4]:
df['event_summary'][0]

'The death toll of the suicide bombing in Ibb'

In [5]:
from nltk.parse.corenlp import CoreNLPParser


In [6]:
if __name__ == '__main__':
    sNLP = StanfordNLP()
    for i in range(0, 50):
        text = df['event_summary'][i]
#         print ("Annotate:", sNLP.annotate(text))
        print('\n\n')
        print ("POS:", sNLP.pos(text))
        print('\n\n')
        print ("Tokens:", sNLP.word_tokenize(text))
        print('\n\n')
        print ("NER:", sNLP.ner(text))
        print('\n\n')
        print ("Parse:", sNLP.parse(text))
        sentence=sNLP.parse(text)
        parser = CoreNLPParser()
        next(parser.raw_parse(text)).pretty_print()
        print('\n\n .......................................................................... \n\n')
#         print ("Dep Parse:", sNLP.dependency_parse(text))




POS: [('The', 'DT'), ('death', 'NN'), ('toll', 'NN'), ('of', 'IN'), ('the', 'DT'), ('suicide', 'NN'), ('bombing', 'NN'), ('in', 'IN'), ('Ibb', 'NNP')]



Tokens: ['The', 'death', 'toll', 'of', 'the', 'suicide', 'bombing', 'in', 'Ibb']



NER: [('The', 'O'), ('death', 'O'), ('toll', 'O'), ('of', 'O'), ('the', 'O'), ('suicide', 'CRIMINAL_CHARGE'), ('bombing', 'CRIMINAL_CHARGE'), ('in', 'O'), ('Ibb', 'O')]



Parse: (ROOT
  (NP
    (NP (DT The) (NN death))
    (NP
      (NP (NN toll))
      (PP (IN of)
        (NP
          (NP (DT the) (NN suicide) (NN bombing))
          (PP (IN in)
            (NP (NNP Ibb))))))))
         ROOT                                         
          |                                            
          NP                                          
      ____|_________                                   
     |              NP                                
     |          ____|_________                         
     |         |              PP          

Parse: (ROOT
  (NP
    (NP
      (NP (DT The) (NNP Palestine) (NNP Authority) (NNS signs))
      (NP (DT a) (NN treaty)))
    (SBAR
      (S
        (VP (TO to)
          (VP (VB join)
            (CC and)
            (VB participate)
            (PP (IN in)
              (NP (DT the) (NNP International) (NNP Criminal) (NNP Court)))))))
    (. .)))
                                                                  ROOT                                                                     
                                                                   |                                                                        
                                                                   S                                                                       
         __________________________________________________________|_____________                                                           
        |                                                                        VP    

NER: [('The', 'O'), ('MS', 'STATE_OR_PROVINCE'), ('Norman', 'PERSON'), ('Atlantic', 'PERSON'), ('ferry', 'O'), ('fire', 'CAUSE_OF_DEATH'), ('death', 'O'), ('toll', 'O'), ('rises', 'O'), ('to', 'O'), ('at', 'O'), ('least', 'O'), ('eleven', 'NUMBER'), ('with', 'O'), ('as', 'O'), ('many', 'O'), ('as', 'O'), ('19', 'NUMBER'), ('people', 'O'), ('still', 'O'), ('unaccounted', 'O'), ('for', 'O'), ('.', 'O'), ('(', 'O'), ('The', 'O'), ('Independent', 'RELIGION'), (')', 'O')]



Parse: (ROOT
  (S
    (NP
      (NP (DT The) (NN MS) (NNP Norman) (NNP Atlantic) (NN ferry))
      (NP (NN fire) (NN death) (NN toll)))
    (VP (VBZ rises)
      (PP (TO to)
        (NP
          (NP
            (ADVP (IN at) (JJS least))
            (NNS eleven))
          (PP (IN with)
            (NP
              (NP
                (QP (RB as) (JJ many) (IN as) (CD 19))
                (NNS people))
              (ADJP (RB still) (JJ unaccounted))))))
      (PP (IN for)))
    (. .)))
                               

Parse: (ROOT
  (S
    (NP (DT The) (NNP United) (NNPS States))
    (VP (VBZ enacts)
      (NP
        (NP (JJR more) (NNS sanctions))
        (PP (IN on)
          (NP
            (NP (NNP North) (NNP Korea))
            (PP (IN in)
              (NP (NN response))))))
      (PP (TO to)
        (NP
          (NP (PRP$ their) (JJ alleged) (NN involvement))
          (PP (IN in)
            (NP
              (NP (DT the) (NN hacking) (NN attack))
              (PP (IN on)
                (NP (NNP Sony))))))))
    (. .)))
                   ROOT                                                                                                                                                                                                                                                          
                    |                                                                                                                                                                                    

NER: [('Fleeing', 'O'), ('villagers', 'O'), ('from', 'O'), ('a', 'O'), ('remote', 'O'), ('part', 'O'), ('of', 'O'), ('the', 'O'), ('Borno', 'ORGANIZATION'), ('State', 'ORGANIZATION'), ('report', 'O'), ('that', 'O'), ('Boko', 'PERSON'), ('Haram', 'PERSON'), ('had', 'O'), ('three', 'DURATION'), ('days', 'DURATION'), ('prior', 'O'), ('kidnapped', 'O'), ('around', 'O'), ('40', 'NUMBER'), ('boys', 'O'), ('and', 'O'), ('young', 'O'), ('men', 'O'), ('.', 'O'), ('(', 'O'), ('BBC', 'ORGANIZATION'), (')', 'O')]



Parse: (ROOT
  (NP
    (NP (VBG Fleeing) (NNS villagers))
    (PP (IN from)
      (NP (DT a) (JJ remote) (NN part)))
    (PP (IN of)
      (NP
        (NP (DT the) (NNP Borno) (NNP State) (NN report))
        (SBAR (IN that)
          (S
            (NP (NNP Boko) (NNP Haram))
            (VP (VBD had)
              (ADVP
                (NP (CD three) (NNS days))
                (RB prior))
              (VP (VBN kidnapped)
                (PP (IN around)
                  (NP
       

Parse: (ROOT
  (S
    (NP (DT The) (JJ Singapore-registered) (NN cargo) (NN ship) (NN MV) (NNP Höegh) (NNP Osaka))
    (VP (VBZ runs)
      (ADVP (RB aground))
      (PP (IN on)
        (NP
          (NP (NNP Bramble) (NNP Bank))
          (PP (IN off)
            (NP
              (NP (DT the) (NN coast))
              (PP (IN of)
                (NP
                  (NP (DT the) (NNP Isle))
                  (PP (IN of)
                    (NP (NNP Wight))))))))))))
                                     ROOT                                                                                             
                                      |                                                                                                
                                      S                                                                                               
                             _________|_________________                                                                

Parse: (ROOT
  (X
    (X (SYM _))
    (ADJP
      (ADJP (NNP January))
      (SBAR
        (X (SYM _))
        (S
          (NP (CD 2015))
          ('' '')
          (VP (-RRB- -RSB-) ('' '')))))))
      ROOT                             
       |                                
       X                               
  _____|_____                           
 |          ADJP                       
 |      _____|____                      
 |     |         SBAR                  
 |     |      ____|____                 
 |     |     |         S               
 |     |     |     ____|_________       
 X    ADJP   X    NP   |         VP    
 |     |     |    |    |     ____|___   
SYM   NNP   SYM   CD   '' -RRB-      ''
 |     |     |    |    |    |        |  
 _  January  _   2015  '' -RSB-      ''



 .......................................................................... 





POS: [('A', 'DT'), ('U.S.', 'NNP'), ('drone', 'NN'), ('strike', 'NN'), ('kills', 'VBZ'), ('eight', 'CD'), ('U

                                                         ROOT                                        
                                                          |                                           
                                                          S                                          
                    ______________________________________|____                                       
                   |                                           VP                                    
                   |                                       ____|____________                          
                   NP                                     |                 NP                       
      _____________|_____________                         |           ______|__________               
     |                           VP                       |          |                 PP            
     |              _____________|_____                   |          |        

POS: [('In', 'IN'), ('darts', 'NN')]



Tokens: ['In', 'darts']



NER: [('In', 'O'), ('darts', 'O')]



Parse: (ROOT
  (PP (IN In)
    (NP (NN darts))))
    ROOT      
     |         
     PP       
  ___|_____    
 |         NP 
 |         |   
 IN        NN 
 |         |   
 In      darts



 .......................................................................... 





POS: [('A', 'DT'), ('Libyan', 'JJ'), ('warplane', 'NN'), ('bombs', 'NNS'), ('a', 'DT'), ('Greek-operated', 'JJ'), ('oil', 'NN'), ('tanker', 'NN'), ('anchored', 'VBD'), ('offshore', 'RB'), ('the', 'DT'), ('city', 'NN'), ('of', 'IN'), ('Derna', 'NNP')]



Tokens: ['A', 'Libyan', 'warplane', 'bombs', 'a', 'Greek-operated', 'oil', 'tanker', 'anchored', 'offshore', 'the', 'city', 'of', 'Derna']



NER: [('A', 'O'), ('Libyan', 'NATIONALITY'), ('warplane', 'O'), ('bombs', 'O'), ('a', 'O'), ('Greek-operated', 'O'), ('oil', 'O'), ('tanker', 'TITLE'), ('anchored', 'O'), ('offshore', 'O'), ('the', 'O'), ('city', 'O'), ('of', 

Parse: (ROOT
  (S
    (NP (JJ Bangladeshi) (NNS police))
    (VP (VBP report)
      (SBAR (IN that)
        (S
          (NP
            (NP (CD two) (NN opposition) (NNP Bangladesh))
            (NP (JJ Nationalist) (NNP Party) (NNS activists)))
          (VP (VBP are)
            (VP (VBN shot)
              (S
                (ADJP (JJ dead)
                  (PP (IN in)
                    (NP
                      (NP (NNS clashes))
                      (PP (IN with)
                        (NP
                          (NP (NNS members))
                          (PP (IN of)
                            (NP (DT the) (NN ruling))))))))
                (NP
                  (NP (NNP Awami) (NNP League))
                  (PP (IN in)
                    (NP
                      (NP (DT the) (NN town))
                      (PP (IN of)
                        (NP (NNP Natore)))))))
              (PP (IN on)
                (NP
                  (NP (DT the) (JJ first) (NN anniversar

NER: [('A', 'O'), ('Czech', 'NATIONALITY'), ('archaeological', 'O'), ('team', 'O'), ('discovers', 'O'), ('the', 'O'), ('tomb', 'O'), ('of', 'O'), ('formerly', 'O'), ('unknown', 'O'), ('Ancient', 'O'), ('Egyptian', 'NATIONALITY'), ('queen', 'TITLE'), ('Khentakawess', 'PERSON'), ('III', 'PERSON'), ('who', 'O'), ('lived', 'O'), ('during', 'O'), ('the', 'O'), ('Fifth', 'ORDINAL'), ('Dynasty', 'MISC'), ('.', 'O'), ('(', 'O'), ('CNN', 'ORGANIZATION'), (')', 'O')]



Parse: (ROOT
  (S
    (NP (DT A) (JJ Czech) (JJ archaeological) (NN team))
    (VP (VBZ discovers)
      (NP
        (NP (DT the) (NN tomb))
        (PP (IN of)
          (NP
            (NP (RB formerly) (JJ unknown))
            (NP (JJ Ancient) (JJ Egyptian) (NN queen) (NNP Khentakawess) (NNP III))))
        (SBAR
          (WHNP (WP who))
          (S
            (VP (VBD lived)
              (PP (IN during)
                (NP (DT the) (JJ Fifth)
                  (NX (NNP Dynasty)))))))))
    (. .)))
                       

Parse: (ROOT
  (S
    (NP (NNP China))
    (VP (VBZ relaxes)
      (NP
        (NP (NNS controls))
        (PP (IN over)
          (NP
            (NP (DT the) (NN export))
            (PP (IN of)
              (NP (JJ rare) (NN earth) (NNS elements))))))
      (PP (IN after)
        (S
          (VP (VBG losing)
            (NP
              (NP (DT a) (NN case))
              (VP (VBN brought)
                (PP (IN by)
                  (NP (DT the) (NNP United) (NNPS States)))
                (PP (IN at)
                  (NP (DT the) (NNP World) (NNP Trade) (NNP Organization)))))))))
    (. .)))
                ROOT                                                                                                                                                                   
                 |                                                                                                                                                                      
                 S    




Parse: (ROOT
  (NP
    (NP
      (NP (DT An) (NN avalanche))
      (PP (IN near)
        (NP (DT the) (NNP Rettenbach) (NN glacier))))
    (PP (IN in)
      (NP (DT the) (JJ Austrian) (NNP Alps)))))
                                  ROOT                                     
                                   |                                        
                                   NP                                      
                    _______________|_______________________                 
                   NP                                      |               
      _____________|________                               |                
     |                      PP                             PP              
     |              ________|______                 _______|_____           
     NP            |               NP              |             NP        
  ___|______       |     __________|_________      |    _________|______    
 DT         NN     IN   DT       

                                                  ROOT                                                                                                          
                                                   |                                                                                                             
                                                   S                                                                                                            
    _______________________________________________|___________________________________                                                                          
   |                                                                                   VP                                                                       
   |         __________________________________________________________________________|____________                                                             
   |        |                  

POS: [('The', 'DT'), ('anti-graft', 'JJ'), ('body', 'NN'), ('of', 'IN'), ('the', 'DT'), ('Communist', 'NNP'), ('Party', 'NNP'), ('of', 'IN'), ('China', 'NNP'), ('detains', 'VBZ'), ('Yang', 'NNP'), ('Weize', 'NNP')]



Tokens: ['The', 'anti-graft', 'body', 'of', 'the', 'Communist', 'Party', 'of', 'China', 'detains', 'Yang', 'Weize']



NER: [('The', 'O'), ('anti-graft', 'O'), ('body', 'O'), ('of', 'O'), ('the', 'O'), ('Communist', 'ORGANIZATION'), ('Party', 'ORGANIZATION'), ('of', 'ORGANIZATION'), ('China', 'ORGANIZATION'), ('detains', 'O'), ('Yang', 'PERSON'), ('Weize', 'PERSON')]



Parse: (ROOT
  (SINV
    (FRAG
      (NP
        (NP (DT The) (JJ anti-graft) (NN body))
        (PP (IN of)
          (NP (DT the) (NNP Communist) (NNP Party)))
        (PP (IN of)
          (NP (NNP China)))))
    (VP (VBZ detains))
    (NP (NNP Yang) (NNP Weize))))
                                    ROOT                                             
                                     |                

                  ROOT                                                                                                                             
                   |                                                                                                                                
                   NP                                                                                                                              
    _______________|___________________                                                                                                             
   |                                   PP                                                                                                          
   |      _____________________________|_____________________________________                                                                       
   |     |                                                                   NP                              

In [7]:
from nltk.tree import Tree
parsestr=sNLP.parse(text)
parsestr

'(ROOT\r\n  (S\r\n    (NP (DT A) (NN suicide) (NN bomber))\r\n    (VP\r\n      (VP (VBZ kills)\r\n        (NP (PRP herself)))\r\n      (CC and)\r\n      (VP\r\n        (NP\r\n          (NP (DT a) (NN policeman))\r\n          (PP (IN in)\r\n            (NP\r\n              (NP (DT an) (NN attack))\r\n              (PP (IN on)\r\n                (NP (DT a) (NN police) (NN station))))))\r\n        (PP (IN in)\r\n          (NP\r\n            (NP (DT a) (JJ popular) (NN tourist) (NN district))\r\n            (PP (IN in)\r\n              (NP\r\n                (NP (DT the) (JJ Turkish) (NN city))\r\n                (PP (IN of)\r\n                  (NP (NNP Istanbul)))))))))\r\n    (. .)))'

# Printing noun phrases

In [8]:
import math
for i in Tree.fromstring(parsestr).subtrees():
    if i.label() == 'VP':
        print (i)
        
#         ptempty_list.append(i)

(VP
  (VP (VBZ kills) (NP (PRP herself)))
  (CC and)
  (VP
    (NP
      (NP (DT a) (NN policeman))
      (PP
        (IN in)
        (NP
          (NP (DT an) (NN attack))
          (PP (IN on) (NP (DT a) (NN police) (NN station))))))
    (PP
      (IN in)
      (NP
        (NP (DT a) (JJ popular) (NN tourist) (NN district))
        (PP
          (IN in)
          (NP
            (NP (DT the) (JJ Turkish) (NN city))
            (PP (IN of) (NP (NNP Istanbul)))))))))
(VP (VBZ kills) (NP (PRP herself)))
(VP
  (NP
    (NP (DT a) (NN policeman))
    (PP
      (IN in)
      (NP
        (NP (DT an) (NN attack))
        (PP (IN on) (NP (DT a) (NN police) (NN station))))))
  (PP
    (IN in)
    (NP
      (NP (DT a) (JJ popular) (NN tourist) (NN district))
      (PP
        (IN in)
        (NP
          (NP (DT the) (JJ Turkish) (NN city))
          (PP (IN of) (NP (NNP Istanbul))))))))


# Printing noun phrases leaves

In [9]:
for i in Tree.fromstring(parsestr).subtrees():
    if i.label() == 'NP':
        print (i.leaves())

['A', 'suicide', 'bomber']
['herself']
['a', 'policeman', 'in', 'an', 'attack', 'on', 'a', 'police', 'station']
['a', 'policeman']
['an', 'attack', 'on', 'a', 'police', 'station']
['an', 'attack']
['a', 'police', 'station']
['a', 'popular', 'tourist', 'district', 'in', 'the', 'Turkish', 'city', 'of', 'Istanbul']
['a', 'popular', 'tourist', 'district']
['the', 'Turkish', 'city', 'of', 'Istanbul']
['the', 'Turkish', 'city']
['Istanbul']


# Creating a new file to convert tree into string

In [10]:

#!/usr/bin/python
fo = open('NP', 'w')
for i in Tree.fromstring(parsestr).subtrees():
    if i.label() == 'NP':
#         print (i)
#         print (i.leaves())
        fo.write(str(i )+ ';'+ str(i.leaves())+ ';')
#         fo.write(str(i.leaves())+ ';')
#         fo.write("\n")
                 
#         print ("\n\n\n________________________________________________________________________\n\n\n")
fo.flush()
fo.close()

# Reading text file and converting it into one string 

In [11]:
with open('NP', 'r') as file:
    data = file.read().replace('\n', '')
# print(data)

In [12]:

#Creating new columns in data set
df['Noun_Phrases'] = 'default value'
df['Propositional_Phrases'] = 'default value'
df['Verb_Phrases'] = 'default value'
#copying all nounphrases to new colum NP
# df['NP'][0]=data

In [13]:
df['event_summary'][1]

'A shooting kills one and injures six people in Killarney'

# Full code for all dataset. Extracting all NP, VP, PP for all dataset

In [14]:
import os
#!/usr/bin/python
fo = open('nounphrase', 'w', encoding="utf-8")
for j in range(0,50):
    text = df['event_summary'][j]
    parsestr=sNLP.parse(text)
    for i in Tree.fromstring(parsestr).subtrees():
        if i.label() == 'NP':
            fo.write(str(i)+ ';'+ str(i.leaves())+ ';')       
    with open('nounphrase', 'r') as file:
        data = file.read().replace('\n', '')
        df['Noun_Phrases'][j]=data
        file.seek(0)
    fo = open('nounphrase', 'w', encoding="utf-8")
    fo.write("")



In [15]:
df['Noun_Phrases']

0                                                         
1        (NP  (NP (DT The) (NN death))  (NP    (NP (NN ...
2        (NP (DT A) (NN shooting));['A', 'shooting'];(N...
3        (NP (NNP Lithuania));['Lithuania'];(NP  (NP (D...
4        (NP  (NP (NNP U.S.) (NN fast-food) (NN restaur...
5        (NP  (NP    (NP (DT The) (NNP Palestine) (NNP ...
6        (NP (DT The) (JJ Eurasian) (NNP Economic) (NNP...
7        (NP (NP (NNP Vietnam) (POS 's)) (JJ new) (NN m...
8         (NP (NNP Mario) (NNP Cuomo));['Mario', 'Cuomo'];
9        (NP (JJ Somali) (JJ al-Shabaab) (NNS militants...
10       (NP (NNP Boko) (NNP Haram) (NNS militants));['...
11       (NP (NNP Abu) (NNP Anas) (NNP al-Libi));['Abu'...
12       (NP  (NP (DT The) (NN MS) (NNP Norman) (NNP At...
13       (NP (DT The) (NNP United) (NNPS States));['The...
14       (NP  (NP (VBG Fleeing) (NNS villagers))  (PP (...
15       (NP (NNP Turkey));['Turkey'];(NP  (NP (DT the)...
16       (NP  (DT The)  (JJ Singapore-registered)  (NN .

In [16]:

#!/usr/bin/python
fo = open('propphrase', 'w', encoding="utf-8")
for j in range(0,10):
    text = df['event_summary'][j]
    parsestr=sNLP.parse(text)
    for i in Tree.fromstring(parsestr).subtrees():
        if i.label() == 'PP':
            fo.write(str(i)+ ';'+ str(i.leaves())+ ';')       
    with open('propphrase', 'r') as file:
        data = file.read().replace('\n', '')
        df['Propositional_Phrases'][j]=data
        file.seek(0)
    fo = open('propphrase', 'w', encoding="utf-8")
    fo.write("")

In [19]:

#!/usr/bin/python
fo = open('verbphrase', 'w', encoding="utf-8")
for j in range(0,50):
    text = df['event_summary'][j]
    parsestr=sNLP.parse(text)
    for i in Tree.fromstring(parsestr).subtrees():
        if i.label() == 'VP':
            fo.write(str(i)+ ';'+ str(i.leaves())+ ';')       
    with open('verbphrase', 'r') as file:
        data = file.read().replace('\n', '')
        df['Verb_Phrases'][j]=data
        file.seek(0)
    fo = open('verbphrase', 'w', encoding="utf-8")
    fo.write("")

In [20]:
df['Verb_Phrases']

0                                                         
1                                                         
2        (VP  (VP (VBZ kills) (NP (CD one)))  (CC and) ...
3        (VP  (VBZ adopts)  (NP    (NP (DT the) (NN eur...
4        (VP  (VBZ warns)  (SBAR    (IN that)    (S    ...
5        (VP  (TO to)  (VP    (VB join)    (CC and)    ...
6                                                         
7        (VP (VBZ goes) (PP (IN into) (NP (NN effect)))...
8                                                         
9        (VP  (VBP attack)  (NP (DT an) (NN army) (NN b...
10       (VP  (VBP attack)  (NP (NP (DT a) (NN bus)) (P...
11                                                        
12       (VP  (VBZ rises)  (PP    (TO to)    (NP      (...
13       (VP  (VBZ enacts)  (NP    (NP (JJR more) (NNS ...
14       (VP  (VBD had)  (ADVP (NP (CD three) (NNS days...
15       (VP  (VBZ permits)  (NP    (NP (DT the) (NN bu...
16       (VP  (VBZ runs)  (ADVP (RB aground))  (PP    (.