# Comparison of Natural Language Understanding Commercial Services with Example Code

This notebook implements code from four different commerical NLP services in a typical workflow.  Each script should be run as a stand-alone implementation.

__Commercial references__

* [kontikilabs: very thorough with accompanying code](https://medium.com/kontikilabs/comparing-machine-learning-ml-services-from-various-cloud-ml-service-providers-63c8a2626cb6)
* [Google vs Watson](http://fredrikstenbeck.com/google-natural-language-vs-watson-natural-language-understanding/)
* [Watson internals](https://www.quora.com/What-do-AI-ML-and-NLP-researchers-think-of-IBM%E2%80%99s-Watson-Does-it-have-the-potential-to-make-a-huge-impact)
* [Google: categories](https://cloud.google.com/natural-language/docs/categories)
* [Watson: categories](https://console.bluemix.net/docs/services/natural-language-understanding/categories.html#categories-hierarchy)

### [IBM Watson](https://www.ibm.com/watson/developercloud/natural-language-understanding/api/v1/)

In [None]:
import json
from watson_developer_cloud import NaturalLanguageUnderstandingV1
from watson_developer_cloud.natural_language_understanding_v1 \
  import Features, EntitiesOptions, KeywordsOptions
import time
from datetime import timedelta
import sys
import os
import argparse


#We need to get our API credentials in the code for authentication that we have stored as Environment Variables locally
NLP_USER_WATSON = os.environ.get("NLP_USER_WATSON")
NLP_PASS_WATSON = os.environ.get("NLP_PASS_WATSON")
NLP_VER_WATSON = os.environ.get("NLP_VER_WATSON")


#Following line is used to save all the console output into a text file
sys.stdout = open('nlp_api_output.txt', 'a')

start_time = time.monotonic()


def input_file(text_file_path):
    global text
    if os.path.isfile(text_file_path):
        with open(text_file_path, 'r') as text_file:
            text = text_file.read()
    else:
        print("File doesn't exist in the directory!")


def analyze_text():
  #Initialize NaturalLanguageUnderstanding function using the API credentials
  natural_language_understanding = NaturalLanguageUnderstandingV1(
    username = NLP_USER_WATSON,
    password = NLP_PASS_WATSON,
    version = NLP_VER_WATSON)

  response = natural_language_understanding.analyze(
    text = text,
    features = Features(
      entities = EntitiesOptions(
        emotion = True,
        sentiment = True),
      keywords = KeywordsOptions(
        emotion = True,
        sentiment = True)))

  print(json.dumps(response, indent = 2)) #json output after textual analysis


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description = __doc__,
        formatter_class = argparse.RawDescriptionHelpFormatter)
    parser.add_argument(
        'text_file_path',
        help = 'The complete file path of the text file you want to analyze.') 
    args = parser.parse_args()

    input_file(args.text_file_path)
    analyze_text()


end_time = time.monotonic()
print("Execution_Time:", timedelta(seconds = end_time - start_time))
print('\n')

### [Google Cloud Natural Language](https://cloud.google.com/natural-language/)

In [None]:
rom google.cloud import language
from google.cloud.language import enums
from google.cloud.language import types
import os
import time
from datetime import timedelta
import sys
import argparse


#We need to get our API credentials in the code for authentication that we have stored as Environment Variables locally.
os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")

#Following line is used to save all the console outputs in a text file.
sys.stdout = open('nlp_api_content_output.txt', 'w')

start_time = time.monotonic()


def input_file(text_file_path):
    global text
    if os.path.isfile(text_file_path):
        with open(text_file_path, 'r') as text_file:
            text = text_file.read()
    else:
        print("File doesn't exist in the directory!")


def sentiment_text():
    """Detects sentiment in the text."""
    client = language.LanguageServiceClient()
    # Instantiates a plain text document.
    document = types.Document(
        content = text,
        type = enums.Document.Type.PLAIN_TEXT)

    # Detects sentiment in the document. You can also analyze HTML with:
    #   document.type == enums.Document.Type.HTML
    sentiment = client.analyze_sentiment(document).document_sentiment

    print('Sentiment: {}, {}'.format(sentiment.score, sentiment.magnitude))
    print('\n')


def entities_text():
    """Detects entities in the text."""
    client = language.LanguageServiceClient()

    # Instantiates a plain text document.
    document = types.Document(
        content = text,
        type = enums.Document.Type.PLAIN_TEXT)

    # Detects entities in the document. You can also analyze HTML with:
    #   document.type == enums.Document.Type.HTML
    entities = client.analyze_entities(document).entities

    # entity types from enums.Entity.Type
    entity_type = ('UNKNOWN', 'PERSON', 'LOCATION', 'ORGANIZATION',
                   'EVENT', 'WORK_OF_ART', 'CONSUMER_GOOD', 'OTHER')

    for entity in entities:
        print('=' * 20)
        print(u'{:<16}: {}'.format('name', entity.name))
        print(u'{:<16}: {}'.format('type', entity_type[entity.type]))
        print(u'{:<16}: {}'.format('metadata', entity.metadata))
        print(u'{:<16}: {}'.format('salience', entity.salience))
        print(u'{:<16}: {}'.format('wikipedia_url',
              entity.metadata.get('wikipedia_url', '-')))
    print('\n')


def syntax_text():
    """Detects syntax in the text."""
    client = language.LanguageServiceClient()

    # Instantiates a plain text document.
    document = types.Document(
        content = text,
        type = enums.Document.Type.PLAIN_TEXT)

    # Detects syntax in the document. You can also analyze HTML with:
    #   document.type == enums.Document.Type.HTML
    tokens = client.analyze_syntax(document).tokens

    # part-of-speech tags from enums.PartOfSpeech.Tag
    pos_tag = ('UNKNOWN', 'ADJ', 'ADP', 'ADV', 'CONJ', 'DET', 'NOUN', 'NUM',
               'PRON', 'PRT', 'PUNCT', 'VERB', 'X', 'AFFIX')

    for token in tokens:
        print(u'{}: {}'.format(pos_tag[token.part_of_speech.tag],
                               token.text.content))
    print('\n')


def entity_sentiment_text():
    """Detects entity sentiment in the provided text."""
    client = language.LanguageServiceClient()

    document = types.Document(
        content = text.encode('utf-8'),
        type = enums.Document.Type.PLAIN_TEXT)

    # Detect and send native Python encoding to receive correct word offsets.
    encoding = enums.EncodingType.UTF32
    if sys.maxunicode == 65535:
        encoding = enums.EncodingType.UTF16

    result = client.analyze_entity_sentiment(document, encoding)

    for entity in result.entities:
        print('Mentions: ')
        print(u'Name: "{}"'.format(entity.name))
        for mention in entity.mentions:
            print(u'  Begin Offset : {}'.format(mention.text.begin_offset))
            print(u'  Content : {}'.format(mention.text.content))
            print(u'  Magnitude : {}'.format(mention.sentiment.magnitude))
            print(u'  Sentiment : {}'.format(mention.sentiment.score))
            print(u'  Type : {}'.format(mention.type))
        print(u'Salience: {}'.format(entity.salience))
        print(u'Sentiment: {}\n'.format(entity.sentiment))
    print('\n')


def classify_text():
    """Classifies content categories of the provided text."""
    client = language.LanguageServiceClient()

    document = types.Document(
        content = text.encode('utf-8'),
        type = enums.Document.Type.PLAIN_TEXT)

    categories = client.classify_text(document).categories

    for category in categories:
        print(u'=' * 20)
        print(u'{:<16}: {}'.format('name', category.name))
        print(u'{:<16}: {}'.format('confidence', category.confidence))
    print('\n')


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description = __doc__,
        formatter_class = argparse.RawDescriptionHelpFormatter)
    parser.add_argument(
        'text_file_path',
        help = 'The complete file path of the text file you want to analyze.')
    args = parser.parse_args()

    input_file(args.text_file_path)
    sentiment_text()
    entities_text()
    syntax_text()
    entity_sentiment_text()
    classify_text()


end_time = time.monotonic()
print("Execution_Time:", timedelta(seconds = end_time - start_time))

### [Amazon Comprehend](https://aws.amazon.com/documentation/comprehend/)

In [None]:
import boto3
import time
from datetime import timedelta
import sys
import os
import argparse


#We need to get our API credentials in the code for authentication that we have stored as Environment Variables locally.
os.environ.get("AWS_ACCESS_KEY_ID")
os.environ.get("AWS_SECRET_ACCESS_KEY")
os.environ.get("AWS_REGION")
    

#Following line is used to save all the console outputs in a text file.
sys.stdout = open('output.txt','a')

start_time = time.monotonic()


def input_file(text_file_path):
    global text
    if os.path.isfile(text_file_path):
        with open(text_file_path, 'r') as text_file:
            text = text_file.read()
    else:
        print("File doesn't exist in the directory!")


def dominant_language_text():
    #Initialize amazon_comprehend client function
    client_comprehend = boto3.client(
        'comprehend',
    )
    dominant_language_response = client_comprehend.detect_dominant_language(
        Text = text
    )
    #Print the Dominant Language
    print("Language:", sorted(dominant_language_response['Languages'], key = lambda k: k['LanguageCode'])[0]['LanguageCode'])


def entities_text():
    #Initialize amazon_comprehend client function
    client_comprehend = boto3.client(
        'comprehend',
    )
    response_entities = client_comprehend.detect_entities(
            Text = text,
            LanguageCode = 'en'
    )
    entities = list(set([obj['Type'] for obj in response_entities['Entities']]))
    #Print the Entities
    print("Entities:",entities)


def key_phrases_text():
    #Initialize amazon_comprehend client function
    client_comprehend = boto3.client(
        'comprehend',
    )
    response_key_phrases = client_comprehend.detect_key_phrases(
        Text = text,
        LanguageCode = 'en'
    )
    key_phrases = list(set([obj['Text'] for obj in response_key_phrases['KeyPhrases']]))
    #Print the Key Phrases
    print("Key Phrases:", key_phrases)


def sentiment_text():
    #Initialize amazon_comprehend client function
    client_comprehend = boto3.client(
        'comprehend',
    )
    response_sentiment = client_comprehend.detect_sentiment(
        Text = text,
        LanguageCode = 'en'
    )
    sentiment = response_sentiment['Sentiment']
    #Print the Sentiment
    print("Sentiment Analysis:" , sentiment)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description = __doc__,
        formatter_class = argparse.RawDescriptionHelpFormatter)
    parser.add_argument(
        'text_file_path',
        help = 'The complete file path of the text file you want to analyze.')
    args = parser.parse_args()
    input_file(args.text_file_path)
    dominant_language_text()
    entities_text()
    key_phrases_text()
    sentiment_text()


end_time = time.monotonic()
print("Execution_Time:", timedelta(seconds = end_time - start_time))

### [Microsoft Azure Text Analytics](https://azure.microsoft.com/en-us/resources/videos/learn-how-to-create-text-analytics-solutions-with-azure-machine-learning-templates/)

In [None]:
mport requests
import os
import sys
import json
import time
from datetime import timedelta
import argparse


#We need to get our API credentials in the code for authentication that we have stored as Environment Variables locally
Ocp_Apim_Subscription_Key = os.environ.get("KEY_NLP")


#Following line is used to save all the console output into a text file
sys.stdout = open('nlp_api_output.txt', 'a')

start_time = time.monotonic()


def input_file(text_file_path):
    global text
    if os.path.isfile(text_file_path):
        with open(text_file_path, 'r') as text_file:
            text = text_file.read()
    else:
        print("File doesn't exist in the directory!")


def analyze_text():
    headers = {
        # NOTE: Replace the "Ocp-Apim-Subscription-Key" value with a valid subscription key.
        'Ocp-Apim-Subscription-Key': Ocp_Apim_Subscription_Key,
    }

    urls = ['https://eastus2.api.cognitive.microsoft.com/text/analytics/v2.0/languages', 'https://eastus2.api.cognitive.microsoft.com/text/analytics/v2.0/sentiment', 'https://eastus2.api.cognitive.microsoft.com/text/analytics/v2.0/keyPhrases']

    documents = { 'documents': [
        { 'id': '1', 'language': 'en', 'text': text }]}

    try:
        # NOTE: You must use the same location in your REST call as you used to obtain your subscription keys.
        #   For example, if you obtained your subscription keys from westus, replace "eastus2" in the
        #   URLs above with "westus".
        for url in urls:
            response = requests.post(url = url,
                                 headers = headers,
                                 data = (json.dumps(documents)).encode('utf-8'))
            data = response.json()
            print(data)
        print('\n')
    except Exception as e:
        print('Error: ', e)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description = __doc__,
        formatter_class = argparse.RawDescriptionHelpFormatter)
    parser.add_argument(
        'text_file_path',
        help = 'The complete file path of the text file you want to analyze.')
    args = parser.parse_args()

    input_file(args.text_file_path)
    analyze_text()


end_time = time.monotonic()
print("Execution_Time:", timedelta(seconds = end_time - start_time))
print('\n')

END OF DOCUMENT