# AWS AI Services APIs

## Install AWS Python SDK client

In [None]:
!python3 -m pip install --upgrade boto3
!python3 -m pip install amazon-textract-response-parser --upgrade
!python3 -m pip install amazon-textract-prettyprinter


## Import required packages

In [None]:
# Packages import
import boto3
import time
import json
import uuid
from urllib.request import urlopen

#Textract Libraries for parsing results
from trp import Document
from textractprettyprinter.t_pretty_print import Pretty_Print_Table_Format, Textract_Pretty_Print, get_string

# Multimedia Transcription Exercise

## Amazon Transcribe client

In [None]:
transcribe = boto3.client('transcribe')

## Specify an audio file to transcribe and generate a UUID as a job name

In [None]:
media_uri = "s3://aimlenablement/transcribe/sample+audio.wav"
job_name = str(uuid.uuid4())

## Start the audio file transcription job

In [None]:
transcribe.start_transcription_job(
    TranscriptionJobName = job_name,
    Media = {'MediaFileUri': media_uri},
    MediaFormat='wav',
    LanguageCode='en-US'
)

## Verify the transcription job progress

In [None]:
while True:
    status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
    if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
        print(f"Transcription Status: {status['TranscriptionJob']['TranscriptionJobStatus']}")
        break
    print(f"Transcription Status: {status['TranscriptionJob']['TranscriptionJobStatus']}")
    time.sleep(10)

## Get the audio transcription

In [None]:
transcription_json = json.loads(urlopen(status['TranscriptionJob']['Transcript']['TranscriptFileUri']).read().decode('utf-8'))

transcription = transcription_json['results']['transcripts'][0]['transcript']

print(f"Transcription: {transcription}")

# Natural Language Processing (NLP) with Amazon Comprehend Exercise

## Create an Amazon Comprehend client

In [None]:
comprehend = boto3.client('comprehend')

## Entities Detection

In [None]:
comprehendResponse = comprehend.detect_entities(Text=transcription, LanguageCode='en')

for i in range(len(comprehendResponse["Entities"])):
    entity = comprehendResponse["Entities"][i]
    print(f"Entity Type:{entity['Type']} Entity Text:{entity['Text']}")

## Sentiment detection

In [None]:
comprehendResponse = comprehend.detect_sentiment(Text=transcription, LanguageCode='en')
comprehendResponse['Sentiment']

## Key phrases detection

In [None]:
comprehendResponse = comprehend.detect_key_phrases(Text=transcription, LanguageCode='en')

for i in range(len(comprehendResponse["KeyPhrases"])):
    key_phrase = comprehendResponse["KeyPhrases"][i]
    print(f"Key Phrase:{key_phrase['Text']}")

# Intelligent Document Processing (IDP) with Amazon Textract Exercise

## Document to analyze

![Document to Analyze](sampledoc.png)

## Create an Amazon Textract client

In [None]:
textract = boto3.client('textract')

## Get the document file bytes as a bytearray

In [None]:
documentName = "sampledoc.png"
with open(documentName, 'rb') as document:
    imageBytes = bytearray(document.read())

## Call Amazon Textract AnalyzeDocument sychronous API

In [None]:
# Call Amazon Textract
response = textract.analyze_document(Document={'Bytes': imageBytes},FeatureTypes=["FORMS", "TABLES"])

## Use the Textract pretty printer tool to show the form values detected as key-value pairs

In [None]:
pretty_printed_string = get_string(textract_json=response, output_type=[Textract_Pretty_Print.FORMS], table_format=Pretty_Print_Table_Format.fancy_grid)
print(pretty_printed_string)

## Use the Textract pretty printer tool to show the tables detected

In [None]:
pretty_printed_string = get_string(textract_json=response, output_type=[Textract_Pretty_Print.TABLES], table_format=Pretty_Print_Table_Format.fancy_grid)
print(pretty_printed_string)

## Use the Textract pretty printer tool to show the text detected

In [None]:
pretty_printed_string = get_string(textract_json=response, output_type=[Textract_Pretty_Print.LINES], table_format=Pretty_Print_Table_Format.fancy_grid)
print(pretty_printed_string)

## Amazon Comprehend entities detection on document text detected by Amazon Textract

In [None]:
comprehendResponse = comprehend.detect_entities(Text=pretty_printed_string, LanguageCode='en')

for i in range(len(comprehendResponse["Entities"])):
    entity = comprehendResponse["Entities"][i]
    print(f"Entity Type:{entity['Type']} Entity Text:{entity['Text']}")