# An Introduction to the Amazon Transcribe and Comprehend
---------------------------------------------------------------------------------------------------


# Part I. Amazon Transcribe
------
## Setup
------

### Import the libraries and inicialize the clients

In [None]:
import boto3
transcribe = boto3.client('transcribe')
s3=boto3.resource('s3')

### Print all buckets and find ours

In [None]:
for bucket in s3.buckets.all():
    print(bucket.name)

In [None]:
bucket_name="nbtranscribe"

### List our bucket to find our .mp3 file

In [None]:
s3 = boto3.resource('s3')
your_bucket = s3.Bucket(bucket_name)

for s3_file in your_bucket.objects.all():
    print(s3_file.key)

### Initialize filename

In [None]:
file_name = "AWS_This_Week_10_March_2019.mp3"

### Get the object url of our .mp3 file

In [None]:
bucket_location = boto3.client('s3').get_bucket_location(Bucket='nbtranscribe')
object_url = "https://s3-{0}.amazonaws.com/{1}/{2}".format(
    bucket_location['LocationConstraint'],
    bucket_name,
    file_name)
print object_url

### [ optional ] List all treanscription jobs

In [None]:
transcribe.list_transcription_jobs()

# transcribe.list_transcription_jobs()['TranscriptionJobSummaries']

# for element in transcribe.list_transcription_jobs()['TranscriptionJobSummaries']:
#     print element['TranscriptionJobName\n']
#     print element['TranscriptionJobStatus']

### Start new transcription job with our .mp3 file

In [None]:
JobName="08_Apr19"
transcribe.start_transcription_job(TranscriptionJobName=JobName,LanguageCode="en-US",MediaFormat="mp3",Media={"MediaFileUri":object_url})

In [None]:
for element in transcribe.list_transcription_jobs()['TranscriptionJobSummaries']:
    print element['TranscriptionJobName'] + " ====> " + element['TranscriptionJobStatus']

### Get the results

In [None]:
import json
response_url = (transcribe.get_transcription_job(TranscriptionJobName=JobName))
transcribe.get_transcription_job(TranscriptionJobName=JobName)

### Extract the transcrition url from the response.json

In [None]:
url = json.dumps(response_url['TranscriptionJob']['Transcript']['TranscriptFileUri'])
print url[1:-1]

In [None]:
import requests

resp = requests.get(url[1:-1])
data = resp.json() 

In [None]:
print data['results']['transcripts']

# Part II. Amazon Comprehend
------
## Setup
------

### Import the libraries and inicialize the clients

In [None]:
import boto3
import json

comprehend = boto3.client(service_name='comprehend', region_name='eu-west-1')

### Example I - Detect Setntiment

In [None]:
text = "It is raining today in Seattle"

print('Calling DetectSentiment')
print(json.dumps(comprehend.detect_sentiment(Text=text, LanguageCode='en'), sort_keys=True, indent=4))
print('End of DetectSentiment\n')

In [None]:

text_1 = "Oh my God, I have so much work for today and there is this boring presentation too..."

text_2 = "I love you all!"

text_3 = "Well, this is boring but at least I'll learn something"

print(json.dumps(comprehend.detect_sentiment(Text=text_1, LanguageCode='en'), sort_keys=True, indent=4))

### Example II - Detect the dominant language of the text

In [None]:
text = "Fumatul poata sa ucida"

print('Calling DetectDominantLanguage')
data = json.dumps(comprehend.detect_dominant_language(Text = text)['Languages'])
print (data)
print("End of DetectDominantLanguage\n")

### Example III - Detecting Named Entities 

In [None]:
text = "Norbert, could you please finish this prezentation untill tomorrow?"

print('Calling DetectEntities')
print(json.dumps(comprehend.detect_entities(Text=text, LanguageCode='en'), sort_keys=True, indent=4))
print('End of DetectEntities\n')

### Example IV - Detecting Key Phrases

In [None]:
text = '''Barcelona's talisman Leo Messi, 31, is at the top of the Catalans wage structure pocketing a whopping £500,000-a-week.

And despite the Argentine being tied down at Nou Camp until 2021'''

print('Calling DetectKeyPhrases')
print(json.dumps(comprehend.detect_key_phrases(Text=text, LanguageCode='en'), sort_keys=True, indent=4))
print('End of DetectKeyPhrases\n')

### Example V - Detecting Syntax

In [None]:
text = "Yoda maste am I?"

print('Calling DetectSyntax')
print(json.dumps(comprehend.detect_syntax(Text=text, LanguageCode='en'), sort_keys=True, indent=4))
print('End of DetectSyntax\n')