https://stackabuse.com/python-for-nlp-getting-started-with-the-stanfordcorenlp-library/ <br>
https://github.com/Lynten/stanford-corenlp

In [1]:
from stanfordcorenlp import StanfordCoreNLP
import logging
import json
import pandas as pd
# from nltk.parse.stanford import StanfordDependencyParser
import os

from graphviz import Source


In [2]:
class StanfordNLP:
    def __init__(self, host='http://localhost', port=9000):
        self.nlp = StanfordCoreNLP(host, port=port,
                                   timeout=30000)  # , quiet=False, logging_level=logging.DEBUG)
        self.props = {
            'annotators': 'tokenize,ssplit,pos,lemma,ner,parse,depparse,dcoref,relation',
            'pipelineLanguage': 'en',
            'outputFormat': 'json'
        }

    def word_tokenize(self, sentence):
        return self.nlp.word_tokenize(sentence)

    def pos(self, sentence):
        return self.nlp.pos_tag(sentence)

    def ner(self, sentence):
        return self.nlp.ner(sentence)

    def parse(self, sentence):
        return self.nlp.parse(sentence)

    def dependency_parse(self, sentence):
        return self.nlp.dependency_parse(sentence)

    def annotate(self, sentence):
        return json.loads(self.nlp.annotate(sentence, properties=self.props))

    @staticmethod
    def tokens_to_dict(_tokens):
        tokens = defaultdict(dict)
        for token in _tokens:
            tokens[int(token['index'])] = {
                'word': token['word'],
                'lemma': token['lemma'],
                'pos': token['pos'],
                'ner': token['ner']
            }
        return tokens

In [3]:
df = pd.read_csv('newsdataset1.csv')
df.event_summary=df.event_summary.astype(str)


In [4]:
df['event_summary'].str.replace('(','').astype(str)
df['event_summary'].str.replace(')','').astype(str)
df['event_summary'].str.replace('[','').astype(str)
df['event_summary'].str.replace(']','').astype(str)
df['event_summary'].str.replace('"','').astype(str)
df['event_summary'].str.replace('’','').astype(str)


0             The death toll of the suicide bombing in Ibb
1        A shooting kills one and injures six people in...
2        Lithuania adopts the euro as its official curr...
3        U.S. fast-food restaurant chain Chick-fil-A wa...
4        The Palestine Authority signs a treaty to join...
5               The Eurasian Economic Union between Russia
6               Vietnams new marriage law goes into effect
7                                              Mario Cuomo
8        Somali al-Shabaab militants attack an army bas...
9                Boko Haram militants attack a bus in Waza
10                                        Abu Anas al-Libi
11       The MS Norman Atlantic ferry fire death toll r...
12       The United States enacts more sanctions on Nor...
13       Fleeing villagers from a remote part of the Bo...
14       Turkey permits the building of a Syriac church...
15       The Singapore-registered cargo ship MV Höegh O...
16                                         MS Bulk Jupit

In [5]:
# for i in range(0,30):
#     df=df.drop([df['event_summary'].map(len)==i])
# #     df=df.drop(df[df['event_summary']=='nan'].index)
# #     df=df.drop(df[df['event_summary']=='Vietnam’s new marriage law goes into effect'].index)
# # #     print("yes")




indexes=df[df['event_summary'].map(len)  < 35].index
# print(indexes)
df=df.drop(indexes)
index2=df[df['event_summary']=='nan'].index
df=df.drop(index2)

In [6]:
df=df.reset_index()

In [7]:
df['event_summary']

0            The death toll of the suicide bombing in Ibb
1       A shooting kills one and injures six people in...
2       Lithuania adopts the euro as its official curr...
3       U.S. fast-food restaurant chain Chick-fil-A wa...
4       The Palestine Authority signs a treaty to join...
5              The Eurasian Economic Union between Russia
6             Vietnam’s new marriage law goes into effect
7       Somali al-Shabaab militants attack an army bas...
8               Boko Haram militants attack a bus in Waza
9       The MS Norman Atlantic ferry fire death toll r...
10      The United States enacts more sanctions on Nor...
11      Fleeing villagers from a remote part of the Bo...
12      Turkey permits the building of a Syriac church...
13      The Singapore-registered cargo ship MV Höegh O...
14      The Cypriot-registered cargo ship Cemfjord sin...
15      A U.S. drone strike kills eight Uzbek people c...
16      A vehicle carrying United Nations peacekeepers...
17      A bomb

In [8]:
df['event_summary'][1]

'A shooting kills one and injures six people in Killarney'

In [9]:
from nltk.parse.corenlp import CoreNLPParser
from nltk.parse.stanford import StanfordDependencyParser


https://towardsdatascience.com/a-practitioners-guide-to-natural-language-processing-part-i-processing-understanding-text-9f4abfd13e72

In [10]:
if __name__ == '__main__':
    sNLP = StanfordNLP()
    for i in range(0, 100):
        print(i)
        text = df['event_summary'][i]
#         print ("Annotate:", sNLP.annotate(text))
        print('\n\n')
        print(text)
        print('\n\n')
        print ("POS:", sNLP.pos(text))
        print('\n\n')
        print ("Tokens:", sNLP.word_tokenize(text))
        print('\n\n')
        print ("NER:", sNLP.ner(text))
        print('\n\n')
        print ("Parse:", sNLP.parse(text))
#         print ("Dep Parse:", sNLP.dependency_parse(text))
        sentence=sNLP.parse(text)
        parser = CoreNLPParser()
        next(parser.raw_parse(text)).pretty_print()
        
#         sdp =  StanfordDependencyParser()
#         next(sdp.raw_parse(text)).pretty_print()
#         result = list(sdp.raw_parse(text))

#         dep_tree_dot_repr = [parse for parse in result][0].to_dot()
#         source = Source(dep_tree_dot_repr, filename="dep_tree", format="png")
#         source.view()

        
        print('\n\n .......................................................................... \n\n')
            

0



The death toll of the suicide bombing in Ibb



POS: [('The', 'DT'), ('death', 'NN'), ('toll', 'NN'), ('of', 'IN'), ('the', 'DT'), ('suicide', 'NN'), ('bombing', 'NN'), ('in', 'IN'), ('Ibb', 'NNP')]



Tokens: ['The', 'death', 'toll', 'of', 'the', 'suicide', 'bombing', 'in', 'Ibb']



NER: [('The', 'O'), ('death', 'O'), ('toll', 'O'), ('of', 'O'), ('the', 'O'), ('suicide', 'CRIMINAL_CHARGE'), ('bombing', 'CRIMINAL_CHARGE'), ('in', 'O'), ('Ibb', 'O')]



Parse: (ROOT
  (NP
    (NP (DT The) (NN death))
    (NP
      (NP (NN toll))
      (PP (IN of)
        (NP
          (NP (DT the) (NN suicide) (NN bombing))
          (PP (IN in)
            (NP (NNP Ibb))))))))
         ROOT                                         
          |                                            
          NP                                          
      ____|_________                                   
     |              NP                                
     |          ____|_________                  

NER: [('The', 'O'), ('Palestine', 'ORGANIZATION'), ('Authority', 'ORGANIZATION'), ('signs', 'O'), ('a', 'O'), ('treaty', 'O'), ('to', 'O'), ('join', 'O'), ('and', 'O'), ('participate', 'O'), ('in', 'O'), ('the', 'O'), ('International', 'ORGANIZATION'), ('Criminal', 'ORGANIZATION'), ('Court', 'ORGANIZATION'), ('.', 'O'), ('(', 'O'), ('Wall', 'ORGANIZATION'), ('Street', 'ORGANIZATION'), ('Journal', 'ORGANIZATION'), (')', 'O')]



Parse: (ROOT
  (NP
    (NP
      (NP (DT The) (NNP Palestine) (NNP Authority) (NNS signs))
      (NP (DT a) (NN treaty)))
    (SBAR
      (S
        (VP (TO to)
          (VP (VB join)
            (CC and)
            (VB participate)
            (PP (IN in)
              (NP (DT the) (NNP International) (NNP Criminal) (NNP Court)))))))
    (. .)))
                                                                  ROOT                                                                     
                                                                   |         

                      ROOT                              
                       |                                 
                       S                                
        _______________|_____________                    
       |                             VP                 
       |                _____________|___                
       |               |                 NP             
       |               |          _______|_______        
       |               |         |               PP     
       |               |         |            ___|___    
       NP              |         NP          |       NP 
  _____|_______        |      ___|___        |       |   
NNP   NNP     NNS     VBP    DT      NN      IN     NNP 
 |     |       |       |     |       |       |       |   
Boko Haram militants attack  a      bus      in     Waza



 .......................................................................... 


9



The MS Norman Atlantic ferry fire death toll rises 

NER: [('The', 'O'), ('United', 'COUNTRY'), ('States', 'COUNTRY'), ('enacts', 'O'), ('more', 'O'), ('sanctions', 'O'), ('on', 'O'), ('North', 'COUNTRY'), ('Korea', 'COUNTRY'), ('in', 'O'), ('response', 'O'), ('to', 'O'), ('their', 'O'), ('alleged', 'O'), ('involvement', 'O'), ('in', 'O'), ('the', 'O'), ('hacking', 'O'), ('attack', 'CAUSE_OF_DEATH'), ('on', 'O'), ('Sony', 'ORGANIZATION'), ('.', 'O'), ('The', 'O'), ('sanctions', 'O'), ('target', 'O'), ('the', 'O'), ('North', 'NATIONALITY'), ('Korean', 'NATIONALITY'), ('Special', 'O'), ('Operation', 'O'), ('Force', 'O'), ('and', 'O'), ('10', 'NUMBER'), ('government', 'O'), ('officials', 'O'), ('.', 'O'), ('(', 'O'), ('Reuters', 'ORGANIZATION'), (')', 'O')]



Parse: (ROOT
  (S
    (NP (DT The) (NNP United) (NNPS States))
    (VP (VBZ enacts)
      (NP
        (NP (JJR more) (NNS sanctions))
        (PP (IN on)
          (NP
            (NP (NNP North) (NNP Korea))
            (PP (IN in)
              (NP (NN response))))))
      (PP (TO t

NER: [('Fleeing', 'O'), ('villagers', 'O'), ('from', 'O'), ('a', 'O'), ('remote', 'O'), ('part', 'O'), ('of', 'O'), ('the', 'O'), ('Borno', 'ORGANIZATION'), ('State', 'ORGANIZATION'), ('report', 'O'), ('that', 'O'), ('Boko', 'PERSON'), ('Haram', 'PERSON'), ('had', 'O'), ('three', 'DURATION'), ('days', 'DURATION'), ('prior', 'O'), ('kidnapped', 'O'), ('around', 'O'), ('40', 'NUMBER'), ('boys', 'O'), ('and', 'O'), ('young', 'O'), ('men', 'O'), ('.', 'O'), ('(', 'O'), ('BBC', 'ORGANIZATION'), (')', 'O')]



Parse: (ROOT
  (NP
    (NP (VBG Fleeing) (NNS villagers))
    (PP (IN from)
      (NP (DT a) (JJ remote) (NN part)))
    (PP (IN of)
      (NP
        (NP (DT the) (NNP Borno) (NNP State) (NN report))
        (SBAR (IN that)
          (S
            (NP (NNP Boko) (NNP Haram))
            (VP (VBD had)
              (ADVP
                (NP (CD three) (NNS days))
                (RB prior))
              (VP (VBN kidnapped)
                (PP (IN around)
                  (NP
       

Parse: (ROOT
  (S
    (NP (DT The) (JJ Singapore-registered) (NN cargo) (NN ship) (NN MV) (NNP Höegh) (NNP Osaka))
    (VP (VBZ runs)
      (ADVP (RB aground))
      (PP (IN on)
        (NP
          (NP (NNP Bramble) (NNP Bank))
          (PP (IN off)
            (NP
              (NP (DT the) (NN coast))
              (PP (IN of)
                (NP
                  (NP (DT the) (NNP Isle))
                  (PP (IN of)
                    (NP (NNP Wight))))))))))))
                                     ROOT                                                                                             
                                      |                                                                                                
                                      S                                                                                               
                             _________|_________________                                                                

Parse: (ROOT
  (S
    (NP (DT A) (NNP U.S.) (NN drone) (NN strike))
    (VP (VBZ kills)
      (SBAR
        (S
          (NP (CD eight) (JJ Uzbek) (NNS people))
          (VP (VBD claimed)
            (S
              (VP (TO to)
                (VP (VB be)
                  (VP (VBN linked)
                    (PP (IN with)
                      (NP (NNP al-Qaeda)))
                    (PP (IN in)
                      (NP (NNP Pakistan)))))))))))
    (. .)))
                ROOT                                                                                                                     
                 |                                                                                                                        
                 S                                                                                                                       
      ___________|____________                                                                                          

                               ROOT                                                  
                                |                                                     
                                NP                                                   
                      __________|________________________________                     
                     NP                                          |                   
      _______________|____                                       |                    
     |                    PP                                     PP                  
     |            ________|_____                             ____|_____               
     NP          |              NP                          |          NP            
  ___|_____      |    __________|__________________         |     _____|_______       
 DT        NN    IN  DT   JJ   NNP      NN         NN       IN  NNP           NNP    
 |         |     |   |    |     |       |        

NER: [('News', 'O'), ('emerges', 'O'), ('that', 'O'), ('two', 'DURATION'), ('days', 'DURATION'), ('prior', 'O'), ('hundreds', 'O'), ('of', 'O'), ('Boko', 'PERSON'), ('Haram', 'PERSON'), ('militants', 'O'), ('had', 'O'), ('overrun', 'O'), ('several', 'O'), ('towns', 'O'), ('in', 'O'), ('northeast', 'O'), ('Nigeria', 'COUNTRY'), ('and', 'O'), ('captured', 'O'), ('the', 'O'), ('military', 'O'), ('base', 'O'), ('in', 'O'), ('Baga', 'O'), ('.', 'O'), ('(', 'O'), ('Wall', 'ORGANIZATION'), ('Street', 'ORGANIZATION'), ('Journal', 'ORGANIZATION'), (')', 'O')]



Parse: (ROOT
  (S
    (NP (NNP News))
    (VP (VBZ emerges)
      (SBAR (IN that)
        (S
          (ADVP
            (NP (CD two) (NNS days))
            (RB prior))
          (NP
            (NP (NNS hundreds))
            (PP (IN of)
              (NP (NNP Boko) (NNP Haram) (NNS militants))))
          (VP (VBD had)
            (VP
              (VP (VBN overrun)
                (NP (JJ several) (NNS towns))
                (PP (I

Parse: (ROOT
  (S
    (NP (JJ Bangladeshi) (NNS police))
    (VP (VBP report)
      (SBAR (IN that)
        (S
          (NP
            (NP (CD two) (NN opposition) (NNP Bangladesh))
            (NP (JJ Nationalist) (NNP Party) (NNS activists)))
          (VP (VBP are)
            (VP (VBN shot)
              (S
                (ADJP (JJ dead)
                  (PP (IN in)
                    (NP
                      (NP (NNS clashes))
                      (PP (IN with)
                        (NP
                          (NP (NNS members))
                          (PP (IN of)
                            (NP (DT the) (NN ruling))))))))
                (NP
                  (NP (NNP Awami) (NNP League))
                  (PP (IN in)
                    (NP
                      (NP (DT the) (NN town))
                      (PP (IN of)
                        (NP (NNP Natore)))))))
              (PP (IN on)
                (NP
                  (NP (DT the) (JJ first) (NN anniversar

                            ROOT                                                             
                             |                                                                
                             S                                                               
        _____________________|_____________________                                           
       |                                           VP                                        
       |                 __________________________|____                                      
       |                |                               NP                                   
       |                |          _____________________|____________                         
       |                |         |                                  PP                      
       |                |         |                 _________________|_______                 
       |                |         |                |   

Tokens: ['China', 'relaxes', 'controls', 'over', 'the', 'export', 'of', 'rare', 'earth', 'elements', 'after', 'losing', 'a', 'case', 'brought', 'by', 'the', 'United', 'States', 'at', 'the', 'World', 'Trade', 'Organization', '.', '(', 'AP', ')']



NER: [('China', 'COUNTRY'), ('relaxes', 'O'), ('controls', 'O'), ('over', 'O'), ('the', 'O'), ('export', 'O'), ('of', 'O'), ('rare', 'O'), ('earth', 'O'), ('elements', 'O'), ('after', 'O'), ('losing', 'O'), ('a', 'O'), ('case', 'O'), ('brought', 'O'), ('by', 'O'), ('the', 'O'), ('United', 'COUNTRY'), ('States', 'COUNTRY'), ('at', 'O'), ('the', 'O'), ('World', 'ORGANIZATION'), ('Trade', 'ORGANIZATION'), ('Organization', 'ORGANIZATION'), ('.', 'O'), ('(', 'O'), ('AP', 'ORGANIZATION'), (')', 'O')]



Parse: (ROOT
  (S
    (NP (NNP China))
    (VP (VBZ relaxes)
      (NP
        (NP (NNS controls))
        (PP (IN over)
          (NP
            (NP (DT the) (NN export))
            (PP (IN of)
              (NP (JJ rare) (NN earth) (NNS elements

          ROOT                                                                                                                                                                                    
           |                                                                                                                                                                                       
           S                                                                                                                                                                                      
    _______|__________                                                                                                                                                                             
   |                  VP                                                                                                                                                                          
   |        __________|

Tokens: ['After', 'being', 'grounded', 'in', 'Abu', 'Dhabi', 'for', 'over', '12', 'hours', 'with', 'all', 'passengers', 'on', 'board']



NER: [('After', 'O'), ('being', 'O'), ('grounded', 'O'), ('in', 'O'), ('Abu', 'CITY'), ('Dhabi', 'CITY'), ('for', 'O'), ('over', 'O'), ('12', 'DURATION'), ('hours', 'DURATION'), ('with', 'O'), ('all', 'O'), ('passengers', 'O'), ('on', 'O'), ('board', 'O')]



Parse: (ROOT
  (PP (IN After)
    (S
      (VP (VBG being)
        (VP (VBN grounded)
          (PP (IN in)
            (NP
              (NP (NNP Abu) (NNP Dhabi))
              (PP (IN for)
                (NP (IN over) (CD 12) (NNS hours)))))
          (PP (IN with)
            (NP
              (NP (DT all) (NNS passengers))
              (PP (IN on)
                (NP (NN board))))))))))
                     ROOT                                                                               
                      |                                                                             

                                                  ROOT                                                                                                          
                                                   |                                                                                                             
                                                   S                                                                                                            
    _______________________________________________|___________________________________                                                                          
   |                                                                                   VP                                                                       
   |         __________________________________________________________________________|____________                                                             
   |        |                  

                                    ROOT                                             
                                     |                                                
                                    SINV                                             
                          ___________|_______________________________________         
                        FRAG                                      |          |       
                         |                                        |          |        
                         NP                                       |          |       
         ________________|____________________________            |          |        
        |                     PP                      PP          |          |       
        |             ________|______              ___|____       |          |        
        NP           |               NP           |        NP     VP         NP      
  ______|_______     |    ___________|_______    

                  ROOT                                                                                                                             
                   |                                                                                                                                
                   NP                                                                                                                              
    _______________|___________________                                                                                                             
   |                                   PP                                                                                                          
   |      _____________________________|_____________________________________                                                                       
   |     |                                                                   NP                              

Parse: (ROOT
  (S
    (S
      (NP
        (NP (NNS Officials))
        (PP (IN in)
          (NP
            (NP (DT the) (JJ American) (NN state))
            (PP (IN of)
              (NP (NNP Massachusetts))))))
      (VP (VB open)
        (NP (DT a) (NN time) (NN capsule))))
    (VP (VBD left)
      (PRT (RP behind))
      (PP (IN by)
        (NP (VBG founding) (NNS fathers))))
    (NP
      (NP (NNP Paul) (NNP Revere))
      (CC and)
      (NP (NNP Samuel) (NNP Adams)))
    (. .)))
                                                       ROOT                                                                                                                                       
                                                        |                                                                                                                                          
                                                        S                                                           

NER: [('A', 'O'), ('military', 'O'), ('truck', 'O'), ('collides', 'O'), ('with', 'O'), ('a', 'O'), ('bus', 'O'), ('carrying', 'O'), ('members', 'O'), ('of', 'O'), ('the', 'O'), ('National', 'ORGANIZATION'), ('Guard', 'ORGANIZATION'), ('of', 'O'), ('Ukraine', 'COUNTRY'), ('in', 'O'), ('Ukraine', 'COUNTRY')]



Parse: (ROOT
  (S
    (NP (DT A) (JJ military) (NN truck))
    (VP (VBZ collides)
      (PP (IN with)
        (NP
          (NP (DT a) (NN bus))
          (VP (VBG carrying)
            (NP
              (NP (NNS members))
              (PP (IN of)
                (NP
                  (NP (DT the) (NNP National) (NNP Guard))
                  (PP (IN of)
                    (NP (NNP Ukraine))))))
            (PP (IN in)
              (NP (NNP Ukraine)))))))))
                     ROOT                                                                                           
                      |                                                                                    

NER: [('43', 'NUMBER'), ('US', 'COUNTRY'), ('states', 'O'), ('report', 'O'), ('an', 'O'), ('epidemic', 'O'), ('of', 'O'), ('influenza', 'CAUSE_OF_DEATH')]



Parse: (ROOT
  (S
    (NP (CD 43) (NNP US) (NNS states))
    (VP (VBP report)
      (NP
        (NP (DT an) (JJ epidemic))
        (PP (IN of)
          (NP (NN influenza)))))))
                ROOT                                        
                 |                                           
                 S                                          
      ___________|_______________                            
     |                           VP                         
     |            _______________|______                     
     |           |                      NP                  
     |           |          ____________|_______             
     |           |         |                    PP          
     |           |         |                 ___|______      
     NP          |         NP               |    

NER: [('Thai', 'NATIONALITY'), ('police', 'O'), ('arrest', 'O'), ('Gurmeet', 'PERSON'), ('Singh', 'PERSON'), ('convicted', 'O'), ('of', 'O'), ('involvement', 'O'), ('in', 'O'), ('a', 'O'), ('bombing', 'CRIMINAL_CHARGE'), ('in', 'O'), ('Chandigarh', 'CITY'), ('that', 'O'), ('killed', 'O'), ('18', 'NUMBER'), ('people', 'O'), ('in', 'O'), ('1996', 'DATE')]



Parse: (ROOT
  (S
    (NP (NNP Thai) (NN police) (NN arrest) (NNP Gurmeet) (NNP Singh))
    (VP (VBN convicted)
      (PP (IN of)
        (NP (NN involvement)))
      (PP (IN in)
        (NP
          (NP (DT a) (NN bombing))
          (PP (IN in)
            (NP (NNP Chandigarh)))
          (SBAR
            (WHNP (WDT that))
            (S
              (VP (VBD killed)
                (NP (CD 18) (NNS people))
                (PP (IN in)
                  (NP (CD 1996)))))))))))
                            ROOT                                                                                                              
           

Parse: (ROOT
  (S
    (NP
      (NP (DT A) (NN man))
      (VP (VBG claiming)
        (S
          (VP (TO to)
            (VP (VB be)
              (NP
                (NP
                  (NP (DT the) (NNP Lord) (POS 's))
                  (NN Resistance))
                (NP (NNP Army) (JJ top) (NN commander) (NNP Ugandan) (NNP Dominic) (NNP Ongwen))))))))
    (VP (VBZ turns)
      (NP (PRP himself))
      (PP (IN in)
        (PP (TO to)
          (NP (NNP United) (NNPS States) (NNS forces))))
      (PP (IN in)
        (NP (DT the)
          (ADJP (JJ Central) (JJ African))
          (NN Republic))))
    (. .)))
                                                                                                    ROOT                                                                                                   
                                                                                                     |                                                                      

                                               ROOT                                               
                                                |                                                  
                                                S                                                 
      __________________________________________|_______                                           
     |                                                  VP                                        
     |            ______________________________________|______________________________            
     |           |              NP                                                     |          
     |           |          ____|_________                                             |           
     |           |         |              PP                                           PP         
     |           |         |           ___|_____________                            ___|___        
     

Tokens: ['The', 'US', 'Congress', 're-elects', 'John', 'Boehner', 'to', 'his', 'third', 'term', 'as', 'Speaker', '.', '(', 'USA', 'Today', ')']



NER: [('The', 'O'), ('US', 'COUNTRY'), ('Congress', 'ORGANIZATION'), ('re-elects', 'O'), ('John', 'PERSON'), ('Boehner', 'PERSON'), ('to', 'O'), ('his', 'O'), ('third', 'ORDINAL'), ('term', 'O'), ('as', 'O'), ('Speaker', 'TITLE'), ('.', 'O'), ('(', 'O'), ('USA', 'ORGANIZATION'), ('Today', 'ORGANIZATION'), (')', 'O')]



Parse: (ROOT
  (S
    (NP (DT The) (NNP US) (NNP Congress))
    (VP (VBZ re-elects)
      (NP
        (NP (NNP John) (NNP Boehner))
        (PP (TO to)
          (NP (PRP$ his) (JJ third) (NN term))))
      (PP (IN as)
        (NP (NNP Speaker))))
    (. .)))
                                                          ROOT                                                   
                                                           |                                                      
                                          

      ROOT                                                                               
       |                                                                                  
      FRAG                                                                               
       |                                                                                  
       NP                                                                                
  _____|___________                                                                       
 |     |           S                                                                     
 |     |           |                                                                      
 |     |           VP                                                                    
 |     |       ____|________________                                                      
 |     |      |                     VP                                                   
 |   

Parse: (ROOT
  (S
    (NP (DT An) (NNP Andhra) (NNP Pradesh) (NNP State) (NNP Road) (NNP Transport) (NNP Corporation) (NN bus))
    (VP (VBZ falls)
      (PP (IN into)
        (NP
          (NP (DT a) (NN ravine))
          (PP (IN near)
            (NP
              (NP (DT the) (NN town))
              (PP (IN of)
                (NP (NNP Penukonda)))))))
      (PP (IN in)
        (NP
          (NP (DT the) (JJ Indian) (NN state))
          (PP (IN of)
            (NP
              (NP (NNP Andhra) (NNP Pradesh))
              (VP (VBG killing)
                (NP
                  (QP (IN at) (JJS least) (CD 20))
                  (NNS people))))))))
    (. .)))
                                                                                                                   ROOT                                                                                                    
                                                                                                          

                           ROOT                                                                                                                                             
                            |                                                                                                                                                
                            S                                                                                                                                               
   _________________________|__________________________                                                                                                                      
  |                                                    VP                                                                                                                   
  |         ___________________________________________|_________________________________                                            

                                                                                                          ROOT                                                                                  
                                                                                                           |                                                                                     
                                                                                                           S                                                                                    
                          _________________________________________________________________________________|_______________                                                                      
                         |                                                                                                 VP                                                                   
                         |       

    ROOT                                      
     |                                         
     NP                                       
  ___|______________                           
 |                  PP                        
 |    ______________|______                    
 |   |                     NP                 
 |   |         ____________|___                
 |   |        |                PP             
 |   |        |             ___|___            
 NP  |        NP           |       NP         
 |   |     ___|_____       |    ___|_____      
 CD  IN   DT       NNS     IN  DT       NNS   
 |   |    |         |      |   |         |     
One  of  the     suspects  in the     killings



 .......................................................................... 


65



[https://www.nytimes.com/2015/01/08/world/europe/charlie-hebdo-paris-shooting.html?smid=tw-nytimes&_r=0"



POS: [('[', '-LRB-'), ('https://www.nytimes.com/2015/01/08/world/europe/charlie-hebdo-paris-

NER: [("Dunkin'", 'ORGANIZATION'), ('Donuts', 'ORGANIZATION'), ('signs', 'O'), ('a', 'O'), ('franchise', 'O'), ('agreement', 'O'), ('for', 'O'), ('1', 'NUMBER')]



Parse: (ROOT
  (NP
    (NP (NNP Dunkin') (NNP Donuts) (NNS signs))
    (NP
      (NP (DT a) (NN franchise) (NN agreement))
      (PP (IN for)
        (NP (CD 1))))))
                     ROOT                                
                      |                                   
                      NP                                 
           ___________|_________________                  
          |                             NP               
          |                    _________|__________       
          |                   |                    PP    
          |                   |                 ___|___   
          NP                  NP               |       NP
    ______|______      _______|_________       |       |  
  NNP    NNP    NNS   DT      NN        NN     IN      CD
   |      |      |    |   

NER: [('The', 'O'), ('Obama', 'PERSON'), ('administration', 'O'), ('fines', 'O'), ('Honda', 'ORGANIZATION'), ('$', 'MONEY'), ('70', 'MONEY'), ('million', 'MONEY'), ('for', 'O'), ('failing', 'O'), ('to', 'O'), ('report', 'O'), ('deaths', 'O'), ('and', 'O'), ('injury', 'O'), ('complaints', 'O'), ('from', 'O'), ('2003', 'DATE'), ('to', 'DATE'), ('2014', 'DATE'), ('.', 'O'), ('(', 'O'), ('KNTV', 'ORGANIZATION'), (')', 'O')]



Parse: (ROOT
  (NP
    (NP
      (NP (DT The) (NNP Obama) (NN administration) (NNS fines))
      (NNP Honda))
    (NP
      (NP
        (QP ($ $) (CD 70) (CD million)))
      (PP (IN for)
        (S
          (VP (VBG failing)
            (S
              (VP (TO to)
                (VP (VB report)
                  (NP (NNS deaths)
                    (CC and)
                    (NN injury) (NNS complaints))
                  (PP (IN from)
                    (NP (CD 2003) (TO to) (CD 2014))))))))))
    (. .)))
                                                      

Tokens: ['The', 'US', 'Olympic', 'Committee', 'chooses', 'the', 'city', 'of', 'Boston', 'as', 'the', 'American', 'bid', 'city', 'to', 'host', 'the', '2024', 'Summer', 'Olympics', '.', '(', 'The', 'New', 'York', 'Times', ')']



NER: [('The', 'O'), ('US', 'ORGANIZATION'), ('Olympic', 'ORGANIZATION'), ('Committee', 'ORGANIZATION'), ('chooses', 'O'), ('the', 'O'), ('city', 'O'), ('of', 'O'), ('Boston', 'CITY'), ('as', 'O'), ('the', 'O'), ('American', 'NATIONALITY'), ('bid', 'O'), ('city', 'O'), ('to', 'O'), ('host', 'O'), ('the', 'DATE'), ('2024', 'DATE'), ('Summer', 'DATE'), ('Olympics', 'MISC'), ('.', 'O'), ('(', 'O'), ('The', 'O'), ('New', 'ORGANIZATION'), ('York', 'ORGANIZATION'), ('Times', 'ORGANIZATION'), (')', 'O')]



Parse: (ROOT
  (S
    (NP (DT The) (NNP US) (NNP Olympic) (NNP Committee))
    (VP (VBZ chooses)
      (NP
        (NP (DT the) (NN city))
        (PP (IN of)
          (NP (NNP Boston))))
      (PP (IN as)
        (NP (DT the) (JJ American) (NN bid) (NN city)))
    

Parse: (ROOT
  (S
    (NP
      (NP
        (NP (DT The) (NNPS People) (POS 's))
        (NN Republic))
      (PP (IN of)
        (NP (NNP China))))
    (VP (VBZ plans)
      (S
        (VP (TO to)
          (VP
            (ADVP (RB soon))
            (VB permit)
            (NP
              (NP (DT the) (JJ online) (NN sale))
              (PP (IN of)
                (NP (NN prescription) (NNS drugs))))))))
    (. .)))
                                     ROOT                                                                                         
                                      |                                                                                            
                                      S                                                                                           
                 _____________________|______________                                                                              
                |                               

Tokens: ['Russia', 'lists', 'transsexual', 'and', 'transgender', 'individuals', 'among', 'those', 'with', '"', 'personality', 'and', 'behavioural', 'disorders', '"', 'who', 'will', 'be', 'banned', 'from', 'obtaining', 'driving', 'licenses', '.', '(', 'BBC', ')', '(', 'The', 'Moscow', 'Times', ')']



NER: [('Russia', 'COUNTRY'), ('lists', 'O'), ('transsexual', 'O'), ('and', 'O'), ('transgender', 'O'), ('individuals', 'O'), ('among', 'O'), ('those', 'O'), ('with', 'O'), ('"', 'O'), ('personality', 'O'), ('and', 'O'), ('behavioural', 'O'), ('disorders', 'O'), ('"', 'O'), ('who', 'O'), ('will', 'O'), ('be', 'O'), ('banned', 'O'), ('from', 'O'), ('obtaining', 'O'), ('driving', 'O'), ('licenses', 'O'), ('.', 'O'), ('(', 'O'), ('BBC', 'ORGANIZATION'), (')', 'O'), ('(', 'O'), ('The', 'O'), ('Moscow', 'CITY'), ('Times', 'O'), (')', 'O')]



Parse: (ROOT
  (S
    (NP (NNP Russia))
    (VP (VBZ lists)
      (NP
        (NP (NN transsexual)
          (CC and)
          (JJ transgender) (NNS indiv

NER: [('Barack', 'PERSON'), ('Obama', 'PERSON'), ('proposes', 'O'), ('a', 'O'), ('new', 'O'), ('program', 'O'), ('to', 'O'), ('waive', 'O'), ('tuition', 'O'), ('at', 'O'), ('community', 'O'), ('colleges', 'O'), ('in', 'O'), ('the', 'O'), ('United', 'COUNTRY'), ('States', 'COUNTRY'), ('for', 'O'), ('the', 'DURATION'), ('first', 'DURATION'), ('two', 'DURATION'), ('years', 'DURATION'), ('.', 'O'), ('(', 'O'), ('The', 'O'), ('New', 'ORGANIZATION'), ('York', 'ORGANIZATION'), ('Times', 'ORGANIZATION'), (')', 'O')]



Parse: (ROOT
  (S
    (NP (NNP Barack) (NNP Obama))
    (VP (VBZ proposes)
      (NP (DT a) (JJ new) (NN program)
        (S
          (VP (TO to)
            (VP (VB waive)
              (NP (NN tuition))
              (PP (IN at)
                (NP
                  (NP (NN community) (NNS colleges))
                  (PP (IN in)
                    (NP (DT the) (NNP United) (NNPS States)))))
              (PP (IN for)
                (NP (DT the) (JJ first) (CD two) (NNS yea

                                                                 ROOT                                                    
                                                                  |                                                       
                                                                  S                                                      
        __________________________________________________________|______                                                 
       |                                                                 VP                                              
       |             ____________________________________________________|_______                                         
       |            |             |                                              PP                                      
       |            |             |             _________________________________|___                                     
       |            

POS: [('A', 'DT'), ('traffic', 'NN'), ('accident', 'NN'), ('between', 'IN'), ('an', 'DT'), ('oil', 'NN'), ('tanker', 'NN'), ('truck', 'NN'), ('and', 'CC'), ('passenger', 'NN'), ('coach', 'NN'), ('en', 'IN'), ('route', 'NN'), ('to', 'TO'), ('Shikarpur', 'NNP'), ('from', 'IN'), ('Karachi', 'NNP'), ('on', 'IN'), ('the', 'DT'), ('Pakistan', 'NNP'), ('National', 'NNP'), ('Highway', 'NNP'), ('Link', 'NNP'), ('Road', 'NNP'), ('near', 'IN'), ('Gulshan-e-Hadeed', 'NNP')]



Tokens: ['A', 'traffic', 'accident', 'between', 'an', 'oil', 'tanker', 'truck', 'and', 'passenger', 'coach', 'en', 'route', 'to', 'Shikarpur', 'from', 'Karachi', 'on', 'the', 'Pakistan', 'National', 'Highway', 'Link', 'Road', 'near', 'Gulshan-e-Hadeed']



NER: [('A', 'O'), ('traffic', 'CAUSE_OF_DEATH'), ('accident', 'CAUSE_OF_DEATH'), ('between', 'O'), ('an', 'O'), ('oil', 'O'), ('tanker', 'TITLE'), ('truck', 'O'), ('and', 'O'), ('passenger', 'O'), ('coach', 'TITLE'), ('en', 'O'), ('route', 'O'), ('to', 'O'), ('Shikarpur', 

                                                                ROOT                                                                                                                                                          
                                                                 |                                                                                                                                                             
                                                                 S                                                                                                                                                            
                     ____________________________________________|__________________________________________                                                                                                                   
                    |                                                                                     

NER: [('The', 'O'), ('Golden', 'CITY'), ('Gate', 'CITY'), ('Bridge', 'LOCATION'), ('Highway', 'LOCATION'), ('and', 'O'), ('Transportation', 'ORGANIZATION'), ('District', 'ORGANIZATION'), ('temporarily', 'O'), ('closes', 'O'), ('the', 'O'), ('Golden', 'CITY'), ('Gate', 'CITY'), ('Bridge', 'LOCATION'), ('until', 'O'), ('Monday', 'DATE'), ('to', 'O'), ('install', 'O'), ('a', 'O'), ('$', 'MONEY'), ('30', 'MONEY'), ('million', 'MONEY'), ('moveable', 'O'), ('barrier', 'O'), ('between', 'O'), ('opposite', 'O'), ('traffic', 'O'), ('flows', 'O'), ('.', 'O'), ('(', 'O'), ('SF', 'O'), ('Gate', 'O'), (')', 'O')]



Parse: (ROOT
  (S
    (NP (DT The) (NNP Golden) (NNP Gate) (NNP Bridge) (NNP Highway)
      (CC and)
      (NNP Transportation) (NNP District))
    (ADVP (RB temporarily))
    (VP (VBZ closes)
      (NP (DT the) (NNP Golden) (NNP Gate) (NNP Bridge))
      (PP (IN until)
        (NP (NNP Monday)))
      (S
        (VP (TO to)
          (VP (VB install)
            (NP
              (NP (

Parse: (ROOT
  (S
    (NP
      (NP (NNS Thousands))
      (PP (IN of)
        (NP (NN police)
          (CC and)
          (JJ military) (NNS personnel))))
    (VP (VBP prepare)
      (S
        (VP (TO to)
          (VP (VB secure)
            (NP (DT a) (NNP Paris) (NN unity) (NN rally))
            (PP (IN with)
              (NP
                (NP
                  (ADVP (RB up)
                    (PP (TO to)
                      (NP
                        (QP (DT a) (CD million)))))
                  (NNS people))
                (VP (VBN expected)
                  (S
                    (VP (TO to)
                      (VP (VB attend)))))))))))
    (. .)))
                           ROOT                                                                                                                                                         
                            |                                                                                                            

POS: [('The', 'DT'), ('death', 'NN'), ('toll', 'NN'), ('from', 'IN'), ('the', 'DT'), ('Karachi', 'NNP'), ('traffic', 'NN'), ('accident', 'NN'), ('rises', 'VBZ'), ('to', 'TO'), ('62', 'CD'), ('.', '.'), ('(', '-LRB-'), ('Indian', 'NNP'), ('Express', 'NNP'), (')', '-RRB-')]



Tokens: ['The', 'death', 'toll', 'from', 'the', 'Karachi', 'traffic', 'accident', 'rises', 'to', '62', '.', '(', 'Indian', 'Express', ')']



NER: [('The', 'O'), ('death', 'O'), ('toll', 'O'), ('from', 'O'), ('the', 'O'), ('Karachi', 'CITY'), ('traffic', 'CAUSE_OF_DEATH'), ('accident', 'CAUSE_OF_DEATH'), ('rises', 'O'), ('to', 'O'), ('62', 'NUMBER'), ('.', 'O'), ('(', 'O'), ('Indian', 'ORGANIZATION'), ('Express', 'ORGANIZATION'), (')', 'O')]



Parse: (ROOT
  (S
    (NP
      (NP (DT The) (NN death) (NN toll))
      (PP (IN from)
        (NP (DT the) (NNP Karachi) (NN traffic) (NN accident))))
    (VP (VBZ rises)
      (PP (TO to)
        (NP (CD 62))))
    (. .)))
                                              ROOT

                         ROOT                                                                                                                 
                          |                                                                                                                    
                          S                                                                                                                   
           _______________|____________                                                                                                        
          |                            VP                                                                                                     
          |                       _____|_________                                                                                              
          |                      |              SBAR                                                                                       

NER: [('A', 'O'), ('school', 'O'), ('bus', 'O'), ('plunges', 'O'), ('off', 'O'), ('of', 'O'), ('a', 'O'), ('cliff', 'O'), ('on', 'O'), ('the', 'O'), ('Caribbean', 'LOCATION'), ('island', 'O'), ('of', 'O'), ('St.', 'LOCATION'), ('Vincent', 'LOCATION'), ('leaving', 'O'), ('at', 'O'), ('least', 'O'), ('five', 'NUMBER'), ('people', 'O'), ('dead', 'O'), ('and', 'O'), ('two', 'NUMBER'), ('missing', 'O'), ('.', 'O'), ('(', 'O'), ('AP', 'ORGANIZATION'), (')', 'O')]



Parse: (ROOT
  (S
    (NP (DT A) (NN school) (NN bus))
    (VP (VBZ plunges)
      (PRT (IN off))
      (PP (IN of)
        (NP
          (NP (DT a) (NN cliff))
          (PP (IN on)
            (NP
              (NP
                (NP (DT the) (NNP Caribbean) (NN island))
                (PP (IN of)
                  (NP
                    (NP (NNP St.) (NNP Vincent))
                    (VP (VBG leaving)
                      (NP
                        (QP (IN at) (JJS least) (CD five))
                        (NNS people))


In [11]:
df['event_summary'].count()

9487

In [12]:
tokens=[]

In [13]:
# if __name__ == '__main__':
#     sNLP = StanfordNLP()
#     for i in range(0, 12817):
#         text = df['event_summary'][i]
# #         print ("Annotate:", sNLP.annotate(text))
        
#         print ("Tokens:", sNLP.word_tokenize(text))
#         tokens=sNLP.word_tokenize(text)
#         df['tokens'][i]=tokens
# #         print('\n\n .......................................................................... \n\n')
# #         print ("Dep Parse:", sNLP.dependency_parse(text))

In [14]:
 text = df['event_summary'][2]

In [15]:
text

'Lithuania adopts the euro as its official currency'

In [16]:
from nltk.tree import Tree
parsestr=sNLP.parse(text)
parsestr

'(ROOT\r\n  (S\r\n    (NP (NNP Lithuania))\r\n    (VP (VBZ adopts)\r\n      (NP\r\n        (NP (DT the) (NN euro))\r\n        (PP (IN as)\r\n          (NP (PRP$ its) (JJ official) (NN currency)))))))'

# Printing noun phrases

In [17]:
import math
for i in Tree.fromstring(parsestr).subtrees():
    if i.label() == 'NP':
        print (i)
        
#         ptempty_list.append(i)
#A shooting kills one and injures six people in Killarney'

(NP (NNP Lithuania))
(NP
  (NP (DT the) (NN euro))
  (PP (IN as) (NP (PRP$ its) (JJ official) (NN currency))))
(NP (DT the) (NN euro))
(NP (PRP$ its) (JJ official) (NN currency))


# Printing noun phrases leaves

In [18]:
for i in Tree.fromstring(parsestr).subtrees():
    if i.label() == 'VP':
        print (i.leaves())

['adopts', 'the', 'euro', 'as', 'its', 'official', 'currency']


# Creating a new file to convert tree into string

In [19]:

#!/usr/bin/python
fo = open('NP', 'w')
for i in Tree.fromstring(parsestr).subtrees():
    if i.label() == 'NP':
#         print (i)
#         print (i.leaves())
        fo.write(str(i )+ ';'+ str(i.leaves())+ ';')
#         fo.write(str(i.leaves())+ ';')
#         fo.write("\n")
                 
#         print ("\n\n\n________________________________________________________________________\n\n\n")
fo.flush()
fo.close()

# Reading text file and converting it into one string 

In [20]:
with open('NP', 'r') as file:
    data = file.read().replace('\n', '')
# print(data)

In [21]:
df['event_summary'][2]

'Lithuania adopts the euro as its official currency'

# Full code for all dataset. Extracting all NP, VP, PP for all dataset

In [22]:
df['event_summary'][2]
parsestr

'(ROOT\r\n  (S\r\n    (NP (NNP Lithuania))\r\n    (VP (VBZ adopts)\r\n      (NP\r\n        (NP (DT the) (NN euro))\r\n        (PP (IN as)\r\n          (NP (PRP$ its) (JJ official) (NN currency)))))))'

In [23]:
df['event_summary'].count()

9487

In [24]:
for i in range(0,30):
    df['Nounphrases'+ str(i)]="NULL VALUE"

In [25]:

for j in range(0,100):
    fo = open('verbphrase', 'w', encoding="utf-8")
    text = df['event_summary'][j]
    print(text+"\n")
    parsestr=sNLP.parse(text)
    for i in Tree.fromstring(parsestr).subtrees():
        if i.label() == 'NP':
#             fo.write(str(i)+ ';')
            fo.write(str( i.leaves())+ ';')
           
    fo.close()        
    with open('verbphrase', 'r',encoding="utf8") as file:
        data = file.read().replace('\n', '')
#         data = file.read().replace('[', '')
        
#         data = file.read()
        split_data=data.split(';')
        print (split_data[0])
        print(len(split_data))
        
        for k in range(0,len(split_data)):
#             df['Nounphrases'+ str(k) ]="random"
            df['Nounphrases'+ str(k)][j]=split_data[k]
#             print(df['Nounphrases'+ str(k)][j])

#         df['Verb_Phrases'][j]=data
#         print( df['Verb_Phrases'][j])
        file.seek(0)
    fo = open('verbphrase', 'w').close()
#     fo.write("")

The death toll of the suicide bombing in Ibb

['The', 'death', 'toll', 'of', 'the', 'suicide', 'bombing', 'in', 'Ibb']
8


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


A shooting kills one and injures six people in Killarney

['A', 'shooting']
6
Lithuania adopts the euro as its official currency

['Lithuania']
5
U.S. fast-food restaurant chain Chick-fil-A warns that a security breach may have leaked credit card details of 9

['U.S.', 'fast-food', 'restaurant', 'chain', 'Chick-fil-A']
8
The Palestine Authority signs a treaty to join and participate in the International Criminal Court. (Wall Street Journal)

['The', 'Palestine', 'Authority', 'signs', 'a', 'treaty', 'to', 'join', 'and', 'participate', 'in', 'the', 'International', 'Criminal', 'Court', '.']
6
The Eurasian Economic Union between Russia

['The', 'Eurasian', 'Economic', 'Union']
3
Vietnam’s new marriage law goes into effect

['Vietnam', "'s", 'new', 'marriage', 'law']
4
Somali al-Shabaab militants attack an army base on the outskirts of Baidoa

['Somali', 'al-Shabaab', 'militants']
6
Boko Haram militants attack a bus in Waza

['Boko', 'Haram', 'militants']
5
The MS Norman Atlantic ferry fir

The 114th United States Congress begins. (ABC News)

['The', '114th', 'United', 'States', 'Congress']
2
The US Congress re-elects John Boehner to his third term as Speaker. (USA Today)

['The', 'US', 'Congress']
6
NASA's Kepler space observatory announces the discovery of three new planets in the Goldilocks zone capable of supporting life. (News Limited)

['NASA', "'s", 'Kepler', 'space', 'observatory']
9
A proposal to change the demolition of Candlestick Park in San Francisco

['A', 'proposal', 'to', 'change', 'the', 'demolition', 'of', 'Candlestick', 'Park', 'in', 'San', 'Francisco']
7
Gunmen attack the Paris office of the French satirical magazine Charlie Hebdo

['Gunmen']
5
A car bomb explodes outside a police college in the Yemeni capital Sana'a with at least 38 people reported dead and more than 50 wounded. (Reuters)

['A', 'car', 'bomb']
11
An Andhra Pradesh State Road Transport Corporation bus falls into a ravine near the town of Penukonda in the Indian state of Andhra Pradesh 

In [26]:
df

Unnamed: 0,index,_id,date,category,event_title,event_summary,entities,person,external_link,Unnamed: 8,...,Nounphrases20,Nounphrases21,Nounphrases22,Nounphrases23,Nounphrases24,Nounphrases25,Nounphrases26,Nounphrases27,Nounphrases28,Nounphrases29
0,0,"ObjectId(""5d056c5bb5f3a01b25e85fd2"")","2015-01-01T00:00:00.000Z""""",Armed conflicts and attacks,"[/wiki/Terrorism_in_Yemen""]""",The death toll of the suicide bombing in Ibb,Yemen,that occurred on December 31 rises to 49 with...,"[/wiki/2014_Ibb_bombing""",/wiki/Ibb,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
1,1,"ObjectId(""5d056c5bb5f3a01b25e85fd3"")","2015-01-01T00:00:00.000Z""""",Armed conflicts and attacks,FALSE,A shooting kills one and injures six people in...,Calgary,Canada,with police making no arrests. (National Post),[Killarney,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
2,2,"ObjectId(""5d056c5bb5f3a01b25e85fd4"")","2015-01-01T00:00:00.000Z""""",Business and economy,FALSE,Lithuania adopts the euro as its official curr...,becoming the 19th member of the Eurozone. (CNN),"[Lithuania and the euro""","Eurozone]""",FALSE,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
3,3,"ObjectId(""5d056c5bb5f3a01b25e85fd5"")","2015-01-01T00:00:00.000Z""""",Business and economy,FALSE,U.S. fast-food restaurant chain Chick-fil-A wa...,000 customers in five states. (Daily Mail),"[Chick-fil-A""]""",FALSE,[http://www.dailymail.co.uk/news/article-28936...,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
4,4,"ObjectId(""5d056c5bb5f3a01b25e85fd6"")","2015-01-01T00:00:00.000Z""""",International relations,FALSE,The Palestine Authority signs a treaty to join...,"[Palestine Authority""","International Criminal Court]""",FALSE,[https://www.wsj.com/articles/abbas-oks-palest...,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
5,5,"ObjectId(""5d056c5bb5f3a01b25e85fd7"")","2015-01-01T00:00:00.000Z""""",International relations,FALSE,The Eurasian Economic Union between Russia,Kazakhstan,Belarus,Kyrgyzstan and Armenia comes into effect (Nov...,"[Eurasian Economic Union""",...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
6,6,"ObjectId(""5d056c5bb5f3a01b25e85fd8"")","2015-01-01T00:00:00.000Z""""",Law and crime,FALSE,Vietnam’s new marriage law goes into effect,abolishing regulations that “prohibit marriag...,"[Vietnam""","Same-sex marriage in Vietnam]""",FALSE,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
7,8,"ObjectId(""5d056c5bb5f3a01b25e85fda"")","2015-01-02T00:00:00.000Z""""",Armed conflicts and attacks,"[/wiki/Somali_Civil_War_(2009%E2%80%93present)""]""",Somali al-Shabaab militants attack an army bas...,killing seven soldiers. (Reuters via Daily Ti...,"[/wiki/Somalia""",/wiki/Al-Shabaab_(militant_group),"/wiki/Baidoa]""",...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
8,9,"ObjectId(""5d056c5bb5f3a01b25e85fdb"")","2015-01-02T00:00:00.000Z""""",Armed conflicts and attacks,"[/wiki/Islamist_insurgency_in_Nigeria""]""",Boko Haram militants attack a bus in Waza,Cameroon,killing eleven people and injuring six. (CNN ...,"[/wiki/Boko_Haram""",/wiki/Waza_National_Park,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
9,11,"ObjectId(""5d056c5bb5f3a01b25e85fdd"")","2015-01-02T00:00:00.000Z""""",Disasters and accidents,FALSE,The MS Norman Atlantic ferry fire death toll r...,"[MS Norman Atlantic""]""",FALSE,[http://www.independent.ie/world-news/europe/i...,,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE


In [27]:
df1=df[['event_summary','Nounphrases0','Nounphrases1','Nounphrases2','Nounphrases3','Nounphrases4','Nounphrases5','Nounphrases6','Nounphrases7','Nounphrases8','Nounphrases9','Nounphrases10','Nounphrases11','Nounphrases12','Nounphrases13','Nounphrases14','Nounphrases15','Nounphrases16','Nounphrases17','Nounphrases18','Nounphrases19']]

In [28]:
df1=df1.drop(df.index[range(100,9487)])

In [29]:
df1

Unnamed: 0,event_summary,Nounphrases0,Nounphrases1,Nounphrases2,Nounphrases3,Nounphrases4,Nounphrases5,Nounphrases6,Nounphrases7,Nounphrases8,...,Nounphrases10,Nounphrases11,Nounphrases12,Nounphrases13,Nounphrases14,Nounphrases15,Nounphrases16,Nounphrases17,Nounphrases18,Nounphrases19
0,The death toll of the suicide bombing in Ibb,"['The', 'death', 'toll', 'of', 'the', 'suicide...","['The', 'death']","['toll', 'of', 'the', 'suicide', 'bombing', 'i...",['toll'],"['the', 'suicide', 'bombing', 'in', 'Ibb']","['the', 'suicide', 'bombing']",['Ibb'],,NULL VALUE,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
1,A shooting kills one and injures six people in...,"['A', 'shooting']",['one'],"['six', 'people', 'in', 'Killarney']","['six', 'people']",['Killarney'],,NULL VALUE,NULL VALUE,NULL VALUE,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
2,Lithuania adopts the euro as its official curr...,['Lithuania'],"['the', 'euro', 'as', 'its', 'official', 'curr...","['the', 'euro']","['its', 'official', 'currency']",,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
3,U.S. fast-food restaurant chain Chick-fil-A wa...,"['U.S.', 'fast-food', 'restaurant', 'chain', '...","['U.S.', 'fast-food', 'restaurant', 'chain']",['Chick-fil-A'],"['a', 'security', 'breach']","['credit', 'card', 'details', 'of', '9']","['credit', 'card', 'details']",['9'],,NULL VALUE,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
4,The Palestine Authority signs a treaty to join...,"['The', 'Palestine', 'Authority', 'signs', 'a'...","['The', 'Palestine', 'Authority', 'signs', 'a'...","['The', 'Palestine', 'Authority', 'signs']","['a', 'treaty']","['the', 'International', 'Criminal', 'Court']",,NULL VALUE,NULL VALUE,NULL VALUE,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
5,The Eurasian Economic Union between Russia,"['The', 'Eurasian', 'Economic', 'Union']",['Russia'],,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
6,Vietnam’s new marriage law goes into effect,"['Vietnam', ""'s"", 'new', 'marriage', 'law']","['Vietnam', ""'s""]",['effect'],,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
7,Somali al-Shabaab militants attack an army bas...,"['Somali', 'al-Shabaab', 'militants']","['an', 'army', 'base']","['the', 'outskirts', 'of', 'Baidoa']","['the', 'outskirts']",['Baidoa'],,NULL VALUE,NULL VALUE,NULL VALUE,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
8,Boko Haram militants attack a bus in Waza,"['Boko', 'Haram', 'militants']","['a', 'bus', 'in', 'Waza']","['a', 'bus']",['Waza'],,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
9,The MS Norman Atlantic ferry fire death toll r...,"['The', 'MS', 'Norman', 'Atlantic', 'ferry', '...","['The', 'MS', 'Norman', 'Atlantic', 'ferry']","['fire', 'death', 'toll']","['at', 'least', 'eleven', 'with', 'as', 'many'...","['at', 'least', 'eleven']","['as', 'many', 'as', '19', 'people', 'still', ...","['as', 'many', 'as', '19', 'people']",,NULL VALUE,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE


In [30]:

for i in range(0,20):
    df1['Nounphrases'+ str(i)] = df1['Nounphrases'+ str(i)].str.strip('[]').astype(str)
    df1['Nounphrases'+ str(i)] = df1['Nounphrases'+ str(i)].str.replace(',','').astype(str)
    df1['Nounphrases'+ str(i)] = df1['Nounphrases'+ str(i)].str.replace('\'','').astype(str)
    df1['Nounphrases'+ str(i)] = df1['Nounphrases'+ str(i)].str.replace('\"','').astype(str)
    df1['Nounphrases'+ str(i)] = df1['Nounphrases'+ str(i)].str.replace('.','').astype(str)
    



In [31]:
for i in range(0,20):
    print("'"+'Nounphrases'+ str(i)+"'"+" "+'=>'+'$row['+ str(i+1)+"]"+","+"\n")

'Nounphrases0' =>$row[1],

'Nounphrases1' =>$row[2],

'Nounphrases2' =>$row[3],

'Nounphrases3' =>$row[4],

'Nounphrases4' =>$row[5],

'Nounphrases5' =>$row[6],

'Nounphrases6' =>$row[7],

'Nounphrases7' =>$row[8],

'Nounphrases8' =>$row[9],

'Nounphrases9' =>$row[10],

'Nounphrases10' =>$row[11],

'Nounphrases11' =>$row[12],

'Nounphrases12' =>$row[13],

'Nounphrases13' =>$row[14],

'Nounphrases14' =>$row[15],

'Nounphrases15' =>$row[16],

'Nounphrases16' =>$row[17],

'Nounphrases17' =>$row[18],

'Nounphrases18' =>$row[19],

'Nounphrases19' =>$row[20],



In [32]:
df1

Unnamed: 0,event_summary,Nounphrases0,Nounphrases1,Nounphrases2,Nounphrases3,Nounphrases4,Nounphrases5,Nounphrases6,Nounphrases7,Nounphrases8,...,Nounphrases10,Nounphrases11,Nounphrases12,Nounphrases13,Nounphrases14,Nounphrases15,Nounphrases16,Nounphrases17,Nounphrases18,Nounphrases19
0,The death toll of the suicide bombing in Ibb,The death toll of the suicide bombing in Ibb,The death,toll of the suicide bombing in Ibb,toll,the suicide bombing in Ibb,the suicide bombing,Ibb,,NULL VALUE,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
1,A shooting kills one and injures six people in...,A shooting,one,six people in Killarney,six people,Killarney,,NULL VALUE,NULL VALUE,NULL VALUE,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
2,Lithuania adopts the euro as its official curr...,Lithuania,the euro as its official currency,the euro,its official currency,,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
3,U.S. fast-food restaurant chain Chick-fil-A wa...,US fast-food restaurant chain Chick-fil-A,US fast-food restaurant chain,Chick-fil-A,a security breach,credit card details of 9,credit card details,9,,NULL VALUE,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
4,The Palestine Authority signs a treaty to join...,The Palestine Authority signs a treaty to join...,The Palestine Authority signs a treaty,The Palestine Authority signs,a treaty,the International Criminal Court,,NULL VALUE,NULL VALUE,NULL VALUE,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
5,The Eurasian Economic Union between Russia,The Eurasian Economic Union,Russia,,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
6,Vietnam’s new marriage law goes into effect,Vietnam s new marriage law,Vietnam s,effect,,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
7,Somali al-Shabaab militants attack an army bas...,Somali al-Shabaab militants,an army base,the outskirts of Baidoa,the outskirts,Baidoa,,NULL VALUE,NULL VALUE,NULL VALUE,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
8,Boko Haram militants attack a bus in Waza,Boko Haram militants,a bus in Waza,a bus,Waza,,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE
9,The MS Norman Atlantic ferry fire death toll r...,The MS Norman Atlantic ferry fire death toll,The MS Norman Atlantic ferry,fire death toll,at least eleven with as many as 19 people stil...,at least eleven,as many as 19 people still unaccounted,as many as 19 people,,NULL VALUE,...,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE,NULL VALUE


In [33]:
# export_csv = df1.to_csv (r'C:\Users\gulja\Python 3.7.3\Scripts\df_small.csv', index = None, header=True, sep=';') #Don't forget to add '.csv' at the end of the path


In [34]:
for i in range(0, 10):
        text = df['event_summary'][i]
        print(text)
        print ("Dep Parse:", sNLP.dependency_parse(text))
        sentence=sNLP.parse(text)
#         print(text.print_dependencies())
#         parser = CoreNLPParser()
#         next(parser.raw_parse(text)).pretty_print()
        
#         sdp =  StanfordDependencyParser()
#         next(sdp.raw_parse(text)).pretty_print()
#         result = list(sdp.raw_parse(text))

#         dep_tree_dot_repr = [parse for parse in result][0].to_dot()
#         source = Source(dep_tree_dot_repr, filename="dep_tree", format="png")
#         source.view()

        
        print('\n\n .......................................................................... \n\n')
        
#         https://nlp.stanford.edu/software/dependencies_manual.pdf

# det: determiner
# A determiner is the relation between the head of an NP and its determiner.
# “The man is here” det(man, the)


The death toll of the suicide bombing in Ibb
Dep Parse: [('ROOT', 0, 3), ('det', 3, 1), ('compound', 3, 2), ('case', 7, 4), ('det', 7, 5), ('compound', 7, 6), ('nmod', 3, 7), ('case', 9, 8), ('nmod', 7, 9)]


 .......................................................................... 


A shooting kills one and injures six people in Killarney
Dep Parse: [('ROOT', 0, 3), ('det', 2, 1), ('nsubj', 3, 2), ('dobj', 3, 4), ('cc', 3, 5), ('conj', 3, 6), ('nummod', 8, 7), ('dobj', 6, 8), ('case', 10, 9), ('nmod', 6, 10)]


 .......................................................................... 


Lithuania adopts the euro as its official currency
Dep Parse: [('ROOT', 0, 2), ('nsubj', 2, 1), ('det', 4, 3), ('dobj', 2, 4), ('case', 8, 5), ('nmod:poss', 8, 6), ('amod', 8, 7), ('nmod', 2, 8)]


 .......................................................................... 


U.S. fast-food restaurant chain Chick-fil-A warns that a security breach may have leaked credit card details of 9
Dep Parse

In [35]:
sentence="Boko Haram militants attack a bus in Waza"

In [36]:
nlp = StanfordNLP()
print (nlp.annotate(sentence))


{'sentences': [{'index': 0, 'parse': '(ROOT\r\n  (S\r\n    (NP (NNP Boko) (NNP Haram) (NNS militants))\r\n    (VP (VBP attack)\r\n      (NP\r\n        (NP (DT a) (NN bus))\r\n        (PP (IN in)\r\n          (NP (NNP Waza)))))))', 'basicDependencies': [{'dep': 'ROOT', 'governor': 0, 'governorGloss': 'ROOT', 'dependent': 4, 'dependentGloss': 'attack'}, {'dep': 'compound', 'governor': 3, 'governorGloss': 'militants', 'dependent': 1, 'dependentGloss': 'Boko'}, {'dep': 'compound', 'governor': 3, 'governorGloss': 'militants', 'dependent': 2, 'dependentGloss': 'Haram'}, {'dep': 'nsubj', 'governor': 4, 'governorGloss': 'attack', 'dependent': 3, 'dependentGloss': 'militants'}, {'dep': 'det', 'governor': 6, 'governorGloss': 'bus', 'dependent': 5, 'dependentGloss': 'a'}, {'dep': 'dobj', 'governor': 4, 'governorGloss': 'attack', 'dependent': 6, 'dependentGloss': 'bus'}, {'dep': 'case', 'governor': 8, 'governorGloss': 'Waza', 'dependent': 7, 'dependentGloss': 'in'}, {'dep': 'nmod', 'governor': 4, 

In [37]:
StanfordNLP()

<__main__.StanfordNLP at 0x1f972d6a828>

In [38]:
import nltk
import re
from pycorenlp import *

nlp = StanfordCoreNLP("http://localhost:9000/")

In [39]:
text = 'Mark Robert is the founder of 3trucks. 3trucks was founded in 2010'

In [40]:
output = nlp.annotate(text, properties={
'annotators': 'tokenize,ssplit,pos,depparse,parse',
"timeout": "50000",
'outputFormat': 'json'

 })

In [41]:
print(output['sentences'][0]['parse'])


(ROOT
  (S
    (NP (NNP Mark) (NNP Robert))
    (VP (VBZ is)
      (NP
        (NP (DT the) (NN founder))
        (PP (IN of)
          (NP (NNS 3trucks)))))
    (. .)))


In [42]:
output = nlp.annotate(text, properties={"annotators":"tokenize,ssplit,pos,depparse,natlog,openie",
                            "outputFormat": "json",
                             "openie.triple.strict":"true",
                             "openie.max_entailments_per_clause":"1"})

In [43]:
result = [output["sentences"][0]["openie"] for item in output]
for i in result:
    for rel in i:
        relationSent=rel['subject'],rel['relation'],rel['object']
        print(relationSent)

('Mark Robert', 'is founder of', '3trucks')


In [44]:
import nltk
import re
from pycorenlp import *
nlp = StanfordCoreNLP("http://localhost:9000/")
df['Entity Relation']="NULL VALUE"

In [45]:
for k in range(0,50):
    text = df['event_summary'][k]
    print('sentence:'+text+ '\n')
    output = nlp.annotate(text, properties={"annotators":"tokenize,ssplit,pos,depparse,natlog,openie",
                            "outputFormat": "json",
                             "openie.triple.strict":"true",
                             "openie.max_entailments_per_clause":"1"})
    result = [output["sentences"][0]["openie"] for item in output]
    for i in result:
        for rel in i:
            relationSent=rel['subject'],rel['relation'],rel['object']
            print(relationSent )
            print('\n')
            df['Entity Relation'][k]= relationSent

sentence:The death toll of the suicide bombing in Ibb

('suicide bombing', 'is in', 'Ibb')


sentence:A shooting kills one and injures six people in Killarney

sentence:Lithuania adopts the euro as its official currency



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


('Lithuania', 'adopts euro as', 'its official currency')


sentence:U.S. fast-food restaurant chain Chick-fil-A warns that a security breach may have leaked credit card details of 9

('security breach', 'leaked', 'credit card details of 9')


sentence:The Palestine Authority signs a treaty to join and participate in the International Criminal Court. (Wall Street Journal)

sentence:The Eurasian Economic Union between Russia

sentence:Vietnam’s new marriage law goes into effect

('Vietnam', 'has', 'new marriage law')


("Vietnam 's new marriage law", 'goes into', 'effect')


sentence:Somali al-Shabaab militants attack an army base on the outskirts of Baidoa

('Somali al-Shabaab militants', 'attack', 'army base on outskirts of Baidoa')


sentence:Boko Haram militants attack a bus in Waza

('Boko Haram militants', 'attack bus in', 'Waza')


sentence:The MS Norman Atlantic ferry fire death toll rises to at least eleven with as many as 19 people still unaccounted for. (The Independent)

('MS

In [46]:
df['Entity Relation']

0                           (suicide bombing, is in, Ibb)
1                                              NULL VALUE
2       (Lithuania, adopts euro as, its official curre...
3       (security breach, leaked, credit card details ...
4                                              NULL VALUE
5                                              NULL VALUE
6        (Vietnam 's new marriage law, goes into, effect)
7       (Somali al-Shabaab militants, attack, army bas...
8             (Boko Haram militants, attack bus in, Waza)
9       (eleven, is with, as many as 19 people still u...
10      (their alleged involvement, is in, hacking att...
11                                             NULL VALUE
12                       (Syriac church, is in, Istanbul)
13                                             NULL VALUE
14                                             NULL VALUE
15      (eight Uzbek people, be, linked with al-Qaeda ...
16       (vehicle, carrying, United Nations peacekeepers)
17      (local