https://stackabuse.com/python-for-nlp-getting-started-with-the-stanfordcorenlp-library/ <br>
https://github.com/Lynten/stanford-corenlp

In [1]:
from stanfordcorenlp import StanfordCoreNLP
import logging
import json
import pandas as pd

In [2]:
class StanfordNLP:
    def __init__(self, host='http://localhost', port=9000):
        self.nlp = StanfordCoreNLP(host, port=port,
                                   timeout=30000)  # , quiet=False, logging_level=logging.DEBUG)
        self.props = {
            'annotators': 'tokenize,ssplit,pos,lemma,ner,parse,depparse,dcoref,relation',
            'pipelineLanguage': 'en',
            'outputFormat': 'json'
        }

    def word_tokenize(self, sentence):
        return self.nlp.word_tokenize(sentence)

    def pos(self, sentence):
        return self.nlp.pos_tag(sentence)

    def ner(self, sentence):
        return self.nlp.ner(sentence)

    def parse(self, sentence):
        return self.nlp.parse(sentence)

    def dependency_parse(self, sentence):
        return self.nlp.dependency_parse(sentence)

    def annotate(self, sentence):
        return json.loads(self.nlp.annotate(sentence, properties=self.props))

    @staticmethod
    def tokens_to_dict(_tokens):
        tokens = defaultdict(dict)
        for token in _tokens:
            tokens[int(token['index'])] = {
                'word': token['word'],
                'lemma': token['lemma'],
                'pos': token['pos'],
                'ner': token['ner']
            }
        return tokens

In [3]:
df = pd.read_csv('newsdataset1.csv')
df.event_summary=df.event_summary.astype(str)


In [4]:
df['event_summary'][0]

'The death toll of the suicide bombing in Ibb'

In [5]:
from nltk.parse.corenlp import CoreNLPParser


In [6]:
if __name__ == '__main__':
    sNLP = StanfordNLP()
    for i in range(0, 10):
        text = df['event_summary'][i]
#         print ("Annotate:", sNLP.annotate(text))
        print('\n\n')
        print ("POS:", sNLP.pos(text))
        print('\n\n')
        print ("Tokens:", sNLP.word_tokenize(text))
        print('\n\n')
        print ("NER:", sNLP.ner(text))
        print('\n\n')
        print ("Parse:", sNLP.parse(text))
        sentence=sNLP.parse(text)
        parser = CoreNLPParser()
        next(parser.raw_parse(text)).pretty_print()
        print('\n\n .......................................................................... \n\n')
#         print ("Dep Parse:", sNLP.dependency_parse(text))




POS: [('The', 'DT'), ('death', 'NN'), ('toll', 'NN'), ('of', 'IN'), ('the', 'DT'), ('suicide', 'NN'), ('bombing', 'NN'), ('in', 'IN'), ('Ibb', 'NNP')]



Tokens: ['The', 'death', 'toll', 'of', 'the', 'suicide', 'bombing', 'in', 'Ibb']



NER: [('The', 'O'), ('death', 'O'), ('toll', 'O'), ('of', 'O'), ('the', 'O'), ('suicide', 'CRIMINAL_CHARGE'), ('bombing', 'CRIMINAL_CHARGE'), ('in', 'O'), ('Ibb', 'O')]



Parse: (ROOT
  (NP
    (NP (DT The) (NN death))
    (NP
      (NP (NN toll))
      (PP (IN of)
        (NP
          (NP (DT the) (NN suicide) (NN bombing))
          (PP (IN in)
            (NP (NNP Ibb))))))))
         ROOT                                         
          |                                            
          NP                                          
      ____|_________                                   
     |              NP                                
     |          ____|_________                         
     |         |              PP          

                                                                  ROOT                                                                     
                                                                   |                                                                        
                                                                   S                                                                       
         __________________________________________________________|_____________                                                           
        |                                                                        VP                                                        
        |                                                   _____________________|_______________________________________                   
        |                                                  NP                                                            |                 
        |        

                      ROOT                              
                       |                                 
                       S                                
        _______________|_____________                    
       |                             VP                 
       |                _____________|___                
       |               |                 NP             
       |               |          _______|_______        
       |               |         |               PP     
       |               |         |            ___|___    
       NP              |         NP          |       NP 
  _____|_______        |      ___|___        |       |   
NNP   NNP     NNS     VBP    DT      NN      IN     NNP 
 |     |       |       |     |       |       |       |   
Boko Haram militants attack  a      bus      in     Waza



 .......................................................................... 




In [7]:
from nltk.tree import Tree
parsestr=sNLP.parse(text)
parsestr

'(ROOT\r\n  (S\r\n    (NP (NNP Boko) (NNP Haram) (NNS militants))\r\n    (VP (VBP attack)\r\n      (NP\r\n        (NP (DT a) (NN bus))\r\n        (PP (IN in)\r\n          (NP (NNP Waza)))))))'

# Printing noun phrases

In [22]:
import math
for i in Tree.fromstring(parsestr).subtrees():
    if i.label() == 'VP':
        print (i)
        
#         ptempty_list.append(i)

(VP
  (VBP attack)
  (NP (NP (DT a) (NN bus)) (PP (IN in) (NP (NNP Waza)))))


# Printing noun phrases leaves

In [9]:
for i in Tree.fromstring(parsestr).subtrees():
    if i.label() == 'NP':
        print (i.leaves())

['Boko', 'Haram', 'militants']
['a', 'bus', 'in', 'Waza']
['a', 'bus']
['Waza']


# Creating a new file to convert tree into string

In [10]:

#!/usr/bin/python
fo = open('NP', 'w')
for i in Tree.fromstring(parsestr).subtrees():
    if i.label() == 'NP':
#         print (i)
#         print (i.leaves())
        fo.write(str(i )+ ';'+ str(i.leaves())+ ';')
#         fo.write(str(i.leaves())+ ';')
#         fo.write("\n")
                 
#         print ("\n\n\n________________________________________________________________________\n\n\n")
fo.flush()
fo.close()

# Reading text file and converting it into one string 

In [11]:
with open('NP', 'r') as file:
    data = file.read().replace('\n', '')
# print(data)

In [13]:

#Creating new coulm in data set
df['Noun_Phrases'] = 'default value'
df['Propositional_Phrases'] = 'default value'
df['Verb_Phrases'] = 'default value'
#copying all nounphrases to new colum NP
# df['NP'][0]=data

In [14]:
df['event_summary'][1]

'A shooting kills one and injures six people in Killarney'

In [15]:
import os
#!/usr/bin/python
fo = open('nounphrase', 'w', encoding="utf-8")
for j in range(0,10):
    text = df['event_summary'][j]
    parsestr=sNLP.parse(text)
    for i in Tree.fromstring(parsestr).subtrees():
        if i.label() == 'NP':
            fo.write(str(i)+ ';'+ str(i.leaves())+ ';')       
    with open('nounphrase', 'r') as file:
        data = file.read().replace('\n', '')
        df['Noun_Phrases'][j]=data
        file.seek(0)
    fo = open('nounphrase', 'w', encoding="utf-8")
    fo.write("")



In [16]:
df['Noun_Phrases']

0                                                         
1        (NP  (NP (DT The) (NN death))  (NP    (NP (NN ...
2        (NP (DT A) (NN shooting));['A', 'shooting'];(N...
3        (NP (NNP Lithuania));['Lithuania'];(NP  (NP (D...
4        (NP  (NP (NNP U.S.) (NN fast-food) (NN restaur...
5        (NP  (NP    (NP (DT The) (NNP Palestine) (NNP ...
6        (NP (DT The) (JJ Eurasian) (NNP Economic) (NNP...
7        (NP (NP (NNP Vietnam) (POS 's)) (JJ new) (NN m...
8         (NP (NNP Mario) (NNP Cuomo));['Mario', 'Cuomo'];
9        (NP (JJ Somali) (JJ al-Shabaab) (NNS militants...
10                                           default value
11                                           default value
12                                           default value
13                                           default value
14                                           default value
15                                           default value
16                                           default val

In [17]:

#!/usr/bin/python
fo = open('propphrase', 'w', encoding="utf-8")
for j in range(0,10):
    text = df['event_summary'][j]
    parsestr=sNLP.parse(text)
    for i in Tree.fromstring(parsestr).subtrees():
        if i.label() == 'PP':
            fo.write(str(i)+ ';'+ str(i.leaves())+ ';')       
    with open('propphrase', 'r') as file:
        data = file.read().replace('\n', '')
        df['Propositional_Phrases'][j]=data
        file.seek(0)
    fo = open('propphrase', 'w', encoding="utf-8")
    fo.write("")

In [25]:

#!/usr/bin/python
fo = open('verbphrase', 'w', encoding="utf-8")
for j in range(0,10):
    text = df['event_summary'][j]
    parsestr=sNLP.parse(text)
    for i in Tree.fromstring(parsestr).subtrees():
        if i.label() == 'VP':
            fo.write(str(i)+ ';'+ str(i.leaves())+ ';')       
    with open('verbphrase', 'r') as file:
        data = file.read().replace('\n', '')
        df['Verb_Phrases'][j]=data
        file.seek(0)
    fo = open('verbphrase', 'w', encoding="utf-8")
    fo.write("")

In [27]:
df['Verb_Phrases']

0                                                         
1                                                         
2        (VP  (VP (VBZ kills) (NP (CD one)))  (CC and) ...
3        (VP  (VBZ adopts)  (NP    (NP (DT the) (NN eur...
4        (VP  (VBZ warns)  (SBAR    (IN that)    (S    ...
5        (VP  (TO to)  (VP    (VB join)    (CC and)    ...
6                                                         
7        (VP (VBZ goes) (PP (IN into) (NP (NN effect)))...
8                                                         
9        (VP  (VBP attack)  (NP (DT an) (NN army) (NN b...
10                                           default value
11                                           default value
12                                           default value
13                                           default value
14                                           default value
15                                           default value
16                                           default val