# Sentiment Analysis

In [1]:
from stanfordcorenlp import StanfordCoreNLP
import logging
import json

## Start server

In [None]:
# Please first run from the command line: pip install stanfordcorenlp
from stanfordcorenlp import StanfordCoreNLP

# specify the path to the stanford-corenlp-full-2018-10-05 and the language in use with CoreNLP parameters
nlp = StanfordCoreNLP(r'/media/carpediemmlf/MyPassport/Data/stanfordcorenlp/stanford-corenlp-full-2018-10-05', lang='en', quiet = True)

## Close Server

In [None]:
nlp.close() # Do not forget to close! The backend server will consume a lot memery.

## Usage Example

In [6]:
# Simple usage


sentence = 'Guangdong University of Foreign Studies is located in Guangzhou.'
print ('Tokenize:', nlp.word_tokenize(sentence))
print ('Part of Speech:', nlp.pos_tag(sentence))
print ('Named Entities:', nlp.ner(sentence))
print ('Constituency Parsing:', nlp.parse(sentence))
print ('Dependency Parsing:', nlp.dependency_parse(sentence))



INFO:root:Initializing native server...
INFO:root:java -Xmx4g -cp "/media/carpediemmlf/MyPassport/Data/stanfordcorenlp/stanford-corenlp-full-2018-10-05/*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9001
INFO:root:Server shell PID: 25600
INFO:root:Waiting until the server is available.
INFO:root:Waiting until the server is available.
INFO:root:The server is available.
INFO:root:{'properties': "{'annotators': 'ssplit,tokenize', 'outputFormat': 'json'}", 'pipelineLanguage': 'en'}
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): localhost:9001
DEBUG:urllib3.connectionpool:http://localhost:9001 "POST /?properties=%7B%27annotators%27%3A+%27ssplit%2Ctokenize%27%2C+%27outputFormat%27%3A+%27json%27%7D&pipelineLanguage=en HTTP/1.1" 200 2357
INFO:root:{'properties': "{'annotators': 'pos', 'outputFormat': 'json'}", 'pipelineLanguage': 'en'}
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): localhost:9001


Tokenize: ['Guangdong', 'University', 'of', 'Foreign', 'Studies', 'is', 'located', 'in', 'Guangzhou', '.']


DEBUG:urllib3.connectionpool:http://localhost:9001 "POST /?properties=%7B%27annotators%27%3A+%27pos%27%2C+%27outputFormat%27%3A+%27json%27%7D&pipelineLanguage=en HTTP/1.1" 200 2593
INFO:root:{'properties': "{'annotators': 'ner', 'outputFormat': 'json'}", 'pipelineLanguage': 'en'}
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): localhost:9001


Part of Speech: [('Guangdong', 'NNP'), ('University', 'NNP'), ('of', 'IN'), ('Foreign', 'NNP'), ('Studies', 'NNPS'), ('is', 'VBZ'), ('located', 'JJ'), ('in', 'IN'), ('Guangzhou', 'NNP'), ('.', '.')]


DEBUG:urllib3.connectionpool:http://localhost:9001 "POST /?properties=%7B%27annotators%27%3A+%27ner%27%2C+%27outputFormat%27%3A+%27json%27%7D&pipelineLanguage=en HTTP/1.1" 200 3748
INFO:root:{'properties': "{'annotators': 'pos,parse', 'outputFormat': 'json'}", 'pipelineLanguage': 'en'}
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): localhost:9001


Named Entities: [('Guangdong', 'ORGANIZATION'), ('University', 'ORGANIZATION'), ('of', 'ORGANIZATION'), ('Foreign', 'ORGANIZATION'), ('Studies', 'ORGANIZATION'), ('is', 'O'), ('located', 'O'), ('in', 'O'), ('Guangzhou', 'CITY'), ('.', 'O')]


DEBUG:urllib3.connectionpool:http://localhost:9001 "POST /?properties=%7B%27annotators%27%3A+%27pos%2Cparse%27%2C+%27outputFormat%27%3A+%27json%27%7D&pipelineLanguage=en HTTP/1.1" 200 8182
INFO:root:{'properties': "{'annotators': 'depparse', 'outputFormat': 'json'}", 'pipelineLanguage': 'en'}
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): localhost:9001


Constituency Parsing: (ROOT
  (S
    (NP
      (NP (NNP Guangdong) (NNP University))
      (PP (IN of)
        (NP (NNP Foreign) (NNPS Studies))))
    (VP (VBZ is)
      (VP (JJ located)
        (PP (IN in)
          (NP (NNP Guangzhou)))))
    (. .)))


DEBUG:urllib3.connectionpool:http://localhost:9001 "POST /?properties=%7B%27annotators%27%3A+%27depparse%27%2C+%27outputFormat%27%3A+%27json%27%7D&pipelineLanguage=en HTTP/1.1" 200 7947
INFO:root:Cleanup...
INFO:root:Killing pid: 25601, cmdline: ['java', '-Xmx4g', '-cp', '/media/carpediemmlf/MyPassport/Data/stanfordcorenlp/stanford-corenlp-full-2018-10-05/*', 'edu.stanford.nlp.pipeline.StanfordCoreNLPServer', '-port', '9001']
INFO:root:Killing shell pid: 25600, cmdline: ['/bin/sh', '-c', 'java -Xmx4g -cp "/media/carpediemmlf/MyPassport/Data/stanfordcorenlp/stanford-corenlp-full-2018-10-05/*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9001']


Dependency Parsing: [('ROOT', 0, 7), ('compound', 2, 1), ('nsubjpass', 7, 2), ('case', 5, 3), ('compound', 5, 4), ('nmod', 2, 5), ('auxpass', 7, 6), ('case', 9, 8), ('nmod', 7, 9), ('punct', 7, 10)]


In [2]:
class StanfordNLP:
    def __init__(self, host = 'http://localhost', port = 9000):
        self.nlp = StanfordCoreNLP(host, port = port, \
                                    timeout = 30000,quiet = False, logging_level = logging.DEBUG)
        self.props = {
            'annotators': 'tokenize,ssplit,pos,lemma,ner,parse,depparse,decoref,relation',
            'pipelineLanguage': 'en',
            'outputFormat': 'json'
        }
        
    def word_tokenize(self, sentence):
        return self.nlp.word_tokenize(sentence)
    
    def pos(self, sentence):
        return self.nlp.pos_tag(sentence)
    
    def ner(self, sentence):
        return self.nlp.ner(sentence)
    
    def parse(self, sentence):
        return self.nlp.parse(sentence)
    
    def dependency_parse(self, sentence):
        return self.nlp.dependency_parse(sentence)
    
    def annotate(self, sentence):
        return json.loads(self.nlp.annotate(sentence, properties = self.props))
    
    @staticmethod
    def tokens_to_dict(_tokens):
        tokens = defaultdict(dict)
        for token in _tokens:
            tokens[int(token['index'])] = {
                'word': token['word'],
                'lemma': token['lemma'],
                'pos': token['pos'],
                'ner': token['ner']
            }
        return tokens

# if __name__ == '__main__':
#     sNLP = StanfordNLP()
#     text = 'A blog post using Stanford CoreNLP Server. Visit www.khalidalnajjar.com for more details.'
#     print ("Annotate:"+ sNLP.annotate(text))
#     print ("POS:", sNLP.pos(text))
#     print ("Tokens:", sNLP.word_tokenize(text))
#     print ("NER:", sNLP.ner(text))
#     print ("Parse:", sNLP.parse(text))
#     print ("Dep Parse:", sNLP.dependency_parse(text))

sNLP = StanfordNLP()
text = 'A blog post using Stanford CoreNLP Server. Visit www.khalidalnajjar.com for more details.'
sNLP.annotate(text)

INFO:root:Using an existing server http://localhost:9000
INFO:root:Waiting until the server is available.
INFO:root:Waiting until the server is available.
INFO:root:Waiting until the server is available.
INFO:root:Waiting until the server is available.
INFO:root:Waiting until the server is available.
INFO:root:Waiting until the server is available.
INFO:root:Waiting until the server is available.


KeyboardInterrupt: 