https://stackabuse.com/python-for-nlp-getting-started-with-the-stanfordcorenlp-library/ <br>
https://github.com/Lynten/stanford-corenlp

In [1]:
from stanfordcorenlp import StanfordCoreNLP
import logging
import json
import pandas as pd

In [2]:
class StanfordNLP:
    def __init__(self, host='http://localhost', port=9000):
        self.nlp = StanfordCoreNLP(host, port=port,
                                   timeout=30000)  # , quiet=False, logging_level=logging.DEBUG)
        self.props = {
            'annotators': 'tokenize,ssplit,pos,lemma,ner,parse,depparse,dcoref,relation',
            'pipelineLanguage': 'en',
            'outputFormat': 'json'
        }

    def word_tokenize(self, sentence):
        return self.nlp.word_tokenize(sentence)

    def pos(self, sentence):
        return self.nlp.pos_tag(sentence)

    def ner(self, sentence):
        return self.nlp.ner(sentence)

    def parse(self, sentence):
        return self.nlp.parse(sentence)

    def dependency_parse(self, sentence):
        return self.nlp.dependency_parse(sentence)

    def annotate(self, sentence):
        return json.loads(self.nlp.annotate(sentence, properties=self.props))

    @staticmethod
    def tokens_to_dict(_tokens):
        tokens = defaultdict(dict)
        for token in _tokens:
            tokens[int(token['index'])] = {
                'word': token['word'],
                'lemma': token['lemma'],
                'pos': token['pos'],
                'ner': token['ner']
            }
        return tokens

In [3]:
df = pd.read_csv('newsdataset1.csv')
df.event_summary=df.event_summary.astype(str)


In [4]:
df['event_summary'][0]

'The death toll of the suicide bombing in Ibb'

In [5]:
if __name__ == '__main__':
    sNLP = StanfordNLP()
    text = df['event_summary'][0]
    #print ("Annotate:", sNLP.annotate(text))
    print('\n\n')
    print ("POS:", sNLP.pos(text))
    print('\n\n')
    print ("Tokens:", sNLP.word_tokenize(text))
    print('\n\n')
    print ("NER:", sNLP.ner(text))
    print('\n\n')
    print ("Parse:", sNLP.parse(text))
    print('\n\n')
    #print ("Dep Parse:", sNLP.dependency_parse(text))




POS: [('The', 'DT'), ('death', 'NN'), ('toll', 'NN'), ('of', 'IN'), ('the', 'DT'), ('suicide', 'NN'), ('bombing', 'NN'), ('in', 'IN'), ('Ibb', 'NNP')]



Tokens: ['The', 'death', 'toll', 'of', 'the', 'suicide', 'bombing', 'in', 'Ibb']



NER: [('The', 'O'), ('death', 'O'), ('toll', 'O'), ('of', 'O'), ('the', 'O'), ('suicide', 'CRIMINAL_CHARGE'), ('bombing', 'CRIMINAL_CHARGE'), ('in', 'O'), ('Ibb', 'O')]



Parse: (ROOT
  (NP
    (NP (DT The) (NN death))
    (NP
      (NP (NN toll))
      (PP (IN of)
        (NP
          (NP (DT the) (NN suicide) (NN bombing))
          (PP (IN in)
            (NP (NNP Ibb))))))))





https://www.khalidalnajjar.com/setup-use-stanford-corenlp-server-python/

## pos tags
CC Coordinating conjunction
CD Cardinal number
DT Determiner
EX Existential there
FW Foreign word
IN Preposition or subordinating conjunction
JJ Adjective
JJR Adjective, comparative
JJS Adjective, superlative
LS List item marker
MD Modal
NN Noun, singular or mass
NNS Noun, plural
NNP Proper noun, singular
NNPS Proper noun, plural
PDT Predeterminer
POS Possessive ending
PRP Personal pronoun
PRP Possessive pronoun
RB Adverb
RBR Adverb, comparative
RBS Adverb, superlative
RP Particle
SYM Symbol
TO to
UH Interjection
VB Verb, base form
VBD Verb, past tense
VBG Verb, gerund or present participle
VBN Verb, past participle
VBP Verb, non 3rd person singular present
VBZ Verb, 3rd person singular present
WDT Wh determiner
WP Wh pronoun
WP$ Possessive wh pronoun
WRB Wh adverb

In [9]:
from stanfordnlp.server import CoreNLPClient
from nltk.tree import Tree
with CoreNLPClient(annotators=[ 'tokenize','ssplit','pos','parse'],
                   timeout=30000,
                   output_format="json",
                   properties={'tokenize.language' :'fr',
                               'pos.model' : 'edu/stanford/nlp/models/pos-tagger/english/english.tagger',
                               'parse.model' : 'edu/stanford/nlp/models/lexparser/englishFactored.ser.gz'}) as client :
    ann = client.annotate(text)

output = ann['sentences'][0]['parse']
parsetree = Tree.fromstring(output)
parsetree.pretty_print()

ModuleNotFoundError: No module named 'stanfordcorenlp.server'

In [10]:
from pycorenlp import StanfordCoreNLP

nlp_wrapper = StanfordCoreNLP('http://localhost:9000')  

In [12]:
 
annot_doc = nlp_wrapper.annotate(df['event_summary'][0],  
    properties={
        'annotators': 'ner, pos',
        'outputFormat': 'json',
        'timeout': 1000,
    })

In [13]:
for sentence in annot_doc["sentences"]:  
    for word in sentence["tokens"]:
        print(word["word"] + " => " + word["lemma"])

The => the
death => death
toll => toll
of => of
the => the
suicide => suicide
bombing => bombing
in => in
Ibb => Ibb


In [14]:
for sentence in annot_doc["sentences"]:  
    for word in sentence["tokens"]:
        print (word["word"] + "=>" + word["pos"])

The=>DT
death=>NN
toll=>NN
of=>IN
the=>DT
suicide=>NN
bombing=>NN
in=>IN
Ibb=>NNP


In [15]:
for sentence in annot_doc["sentences"]:  
    for word in sentence["tokens"]:
        print (word["word"] + "=>" + word["ner"])

The=>O
death=>O
toll=>O
of=>O
the=>O
suicide=>CRIMINAL_CHARGE
bombing=>CRIMINAL_CHARGE
in=>O
Ibb=>O
