This notebooks demonstrate how to to AWS Boto3 APIs to invoke the various Comprehend actions

In [55]:
from pprint import pprint
import boto3

items_to_show = 3

with open('data/comprehend_sample.txt') as sample_file:
        sample_text = sample_file.read()

comprehend_client = boto3.client('comprehend')

In [56]:
print("detecting dominant language")
languages = comprehend_client.detect_dominant_language(Text=sample_text)
lang_code = languages['Languages'][0]['LanguageCode']
pprint(lang_code)

detecting dominant language
'en'


In [57]:
print("Detecting entities using the pre-trained model.")
entities = comprehend_client.detect_entities(Text=sample_text, LanguageCode=lang_code)
print(f"The first {items_to_show} are:")
pprint(entities['Entities'][:items_to_show])

Detecting entities using the pre-trained model.
The first 3 are:
[{'BeginOffset': 6,
  'EndOffset': 15,
  'Score': 0.9995719790458679,
  'Text': 'Zhang Wei',
  'Type': 'PERSON'},
 {'BeginOffset': 22,
  'EndOffset': 26,
  'Score': 0.9990108013153076,
  'Text': 'John',
  'Type': 'PERSON'},
 {'BeginOffset': 33,
  'EndOffset': 67,
  'Score': 0.9993506073951721,
  'Text': 'AnyCompany Financial Services, LLC',
  'Type': 'ORGANIZATION'}]


In [58]:
print("Detecting sentiment in text")
sentiment = comprehend_client.detect_sentiment(Text=sample_text, LanguageCode=lang_code)
pprint(sentiment['Sentiment'])
pprint(sentiment['SentimentScore'])

Detecting sentiment in text
'NEUTRAL'
{'Mixed': 8.856321983330417e-06,
 'Negative': 0.012237872928380966,
 'Neutral': 0.9870284795761108,
 'Positive': 0.0007247643661685288}


In [59]:
print("Detecting pii entities in text")
pii = comprehend_client.detect_pii_entities(Text=sample_text, LanguageCode=lang_code)
pprint(pii['Entities'][:items_to_show])

Detecting pii entities in text
[{'BeginOffset': 6,
  'EndOffset': 15,
  'Score': 0.9999125003814697,
  'Type': 'NAME'},
 {'BeginOffset': 22,
  'EndOffset': 26,
  'Score': 0.9998666048049927,
  'Type': 'NAME'},
 {'BeginOffset': 88,
  'EndOffset': 107,
  'Score': 0.9999945163726807,
  'Type': 'CREDIT_DEBIT_NUMBER'}]


In [60]:
print('Dectecting key phrases')
key_phrases = comprehend_client.detect_key_phrases(
                Text=sample_text, LanguageCode=lang_code)
pprint(key_phrases['KeyPhrases'][:items_to_show])

Dectecting key phrases
[{'BeginOffset': 6,
  'EndOffset': 15,
  'Score': 0.9542659521102905,
  'Text': 'Zhang Wei'},
 {'BeginOffset': 22,
  'EndOffset': 26,
  'Score': 0.9990684390068054,
  'Text': 'John'},
 {'BeginOffset': 28,
  'EndOffset': 62,
  'Score': 0.9869424700737,
  'Text': 'Your AnyCompany Financial Services'}]


In [61]:
print('Detecting syntax')
syntax = comprehend_client.detect_syntax(
                Text=sample_text, LanguageCode=lang_code)
pprint(syntax['SyntaxTokens'][:items_to_show])

Detecting syntax
[{'BeginOffset': 0,
  'EndOffset': 5,
  'PartOfSpeech': {'Score': 0.9812782406806946, 'Tag': 'INTJ'},
  'Text': 'Hello',
  'TokenId': 1},
 {'BeginOffset': 6,
  'EndOffset': 11,
  'PartOfSpeech': {'Score': 0.9995248317718506, 'Tag': 'PROPN'},
  'Text': 'Zhang',
  'TokenId': 2},
 {'BeginOffset': 12,
  'EndOffset': 15,
  'PartOfSpeech': {'Score': 0.9987133741378784, 'Tag': 'PROPN'},
  'Text': 'Wei',
  'TokenId': 3}]
