# AI Building Blocks using AWS APIs

This notebook provides code for some of the key AWS using the boto3 SDK:
   >- Speech to text - Amazon Transcribe<br>
   >- Text to Speech - Amazon Polly<br>
   >- Image Recognition - Amazon Rekognition<br>
    >- Object Detection<br>
    >- Object Detection<br>
        
 Note that valid AWS credentials are needed! 

In [1]:
import json
import uuid
import time
import requests
import ast

import boto3

## Authentication

In [2]:
with open('../credentials.json', 'r') as f:
    credentials = json.load(f)

session = boto3.Session(
    aws_access_key_id=credentials['aws']['access_key'],
    aws_secret_access_key=credentials['aws']['secret_access_key'],
    region_name = 'us-east-1'
)

In [3]:
bucket = 'pl-movify-hackathon'

## Speech to Text

Upload file to S3

In [3]:
s3 = session.client('s3')
s3.upload_file('../input_files/sample.wav', bucket, 'sample.wav')

In [4]:
job_name = f"sample-job-{str(uuid.uuid4())}"
media_uri = f"s3://{bucket}/sample.wav"
media_format = "wav"
language_code = "en-US"



job_args = {
    "TranscriptionJobName": job_name,
    "Media": {"MediaFileUri": media_uri},
    "MediaFormat": media_format,
    "IdentifyLanguage":True
}

transcribe_client = session.client('transcribe')

In [5]:
transcribe_client.start_transcription_job(**job_args)

{'TranscriptionJob': {'TranscriptionJobName': 'sample-job-9f46cb40-904f-4538-9466-03598751538d',
  'TranscriptionJobStatus': 'IN_PROGRESS',
  'MediaFormat': 'wav',
  'Media': {'MediaFileUri': 's3://pl-movify-hackathon/sample.wav'},
  'StartTime': datetime.datetime(2023, 11, 10, 14, 52, 45, 758000, tzinfo=tzlocal()),
  'CreationTime': datetime.datetime(2023, 11, 10, 14, 52, 45, 739000, tzinfo=tzlocal()),
  'IdentifyLanguage': True},
 'ResponseMetadata': {'RequestId': '285aa887-2933-4922-8e06-fc06cd9fc938',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '285aa887-2933-4922-8e06-fc06cd9fc938',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '302',
   'date': 'Fri, 10 Nov 2023 13:52:44 GMT'},
  'RetryAttempts': 0}}

In [7]:
timeout = 20

start = time.time()
now = time.time()

status = None

while (status != 'COMPLETED')&((now-start) < timeout):
    time.sleep(1)
    response = transcribe_client.get_transcription_job(
        TranscriptionJobName=job_name
    )

    status = response['TranscriptionJob']['TranscriptionJobStatus']
    now = time.time()


URL = response['TranscriptionJob']['Transcript']['TranscriptFileUri']
response = requests.get(URL)
output = ast.literal_eval(response.content.decode('utf-8'))

In [13]:
output

{'jobName': 'sample-job-9f46cb40-904f-4538-9466-03598751538d',
 'accountId': '577064967282',
 'status': 'COMPLETED',
 'results': {'language_code': 'en-US',
  'language_identification': [{'code': 'en-US', 'score': '0.9826'},
   {'code': 'en-NZ', 'score': '0.0077'},
   {'code': 'en-IE', 'score': '0.0057'},
   {'code': 'en-ZA', 'score': '0.0025'},
   {'code': 'th-TH', 'score': '0.0015'}],
  'transcripts': [{'transcript': 'The stale smell of old beer lingers. It takes heat to bring out the odor. A cold dip restores health and zest. A salt pickle tastes fine with ham tacos. El Pastor are my favorite. A zestful food is the hot Cross bun.'}],
  'items': [{'type': 'pronunciation',
    'alternatives': [{'confidence': '0.999', 'content': 'The'}],
    'start_time': '1.24',
    'end_time': '1.45'},
   {'type': 'pronunciation',
    'alternatives': [{'confidence': '0.999', 'content': 'stale'}],
    'start_time': '1.46',
    'end_time': '1.86'},
   {'type': 'pronunciation',
    'alternatives': [{'con

In [12]:
output['results']['transcripts']

[{'transcript': 'The stale smell of old beer lingers. It takes heat to bring out the odor. A cold dip restores health and zest. A salt pickle tastes fine with ham tacos. El Pastor are my favorite. A zestful food is the hot Cross bun.'}]

## Text to Speech
Check https://docs.aws.amazon.com/polly/latest/dg/get-started-what-next.html for further examples

In [5]:
polly_client = session.client('polly')

In [6]:
text = 'The movify hackathon is absolutely amazing!'

In [9]:
response = polly_client.synthesize_speech(
    Engine = 'standard',
    OutputFormat = 'mp3',
    Text = text,
    VoiceId = 'Amy'
)

In [12]:
output_file = '../output_files/sample.mp3'

In [13]:
with open(output_file, "wb") as file:
    file.write(response['AudioStream'].read())

## Image Recognition

### Object Detection

In [27]:
filename = "sample_object_detection.jpg"

In [25]:
s3 = session.client('s3')
s3.upload_file(f'../input_files/{filename}', bucket, filename)

In [26]:
rekognition_client = session.client('rekognition')

In [18]:
request_args = {
    'Image':{
        'S3Object':{
            'Bucket':bucket,
            'Name':filename
        }
    }
}

In [24]:
response = rekognition_client.detect_labels(**request_args)
response

{'Labels': [{'Name': 'Road',
   'Confidence': 99.99202728271484,
   'Instances': [],
   'Parents': [],
   'Aliases': [],
   'Categories': [{'Name': 'Transport and Logistics'}]},
  {'Name': 'Tarmac',
   'Confidence': 99.99202728271484,
   'Instances': [],
   'Parents': [{'Name': 'Road'}],
   'Aliases': [{'Name': 'Asphalt'}],
   'Categories': [{'Name': 'Transport and Logistics'}]},
  {'Name': 'City',
   'Confidence': 99.97151184082031,
   'Instances': [],
   'Parents': [],
   'Aliases': [{'Name': 'Town'}],
   'Categories': [{'Name': 'Buildings and Architecture'}]},
  {'Name': 'Metropolis',
   'Confidence': 99.97151184082031,
   'Instances': [],
   'Parents': [{'Name': 'City'}, {'Name': 'Urban'}],
   'Aliases': [],
   'Categories': [{'Name': 'Buildings and Architecture'}]},
  {'Name': 'Urban',
   'Confidence': 99.97151184082031,
   'Instances': [],
   'Parents': [],
   'Aliases': [],
   'Categories': [{'Name': 'Colors and Visual Composition'}]},
  {'Name': 'Neighborhood',
   'Confidence':

### Face Detection

In [28]:
filename = "sample_face_detection.jpg"

In [29]:
s3 = session.client('s3')
s3.upload_file(f'../input_files/{filename}', bucket, filename)

In [30]:
rekognition_client = session.client('rekognition')

In [31]:
request_args = {
    'Image':{
        'S3Object':{
            'Bucket':bucket,
            'Name':filename
        }
    }
}

In [32]:
response = rekognition_client.detect_faces(**request_args)
response

{'FaceDetails': [{'BoundingBox': {'Width': 0.010117187164723873,
    'Height': 0.021042201668024063,
    'Left': 0.3170037865638733,
    'Top': 0.591657280921936},
   'Landmarks': [{'Type': 'eyeLeft',
     'X': 0.32253751158714294,
     'Y': 0.5983611345291138},
    {'Type': 'eyeRight', 'X': 0.3254826068878174, 'Y': 0.5984554290771484},
    {'Type': 'mouthLeft', 'X': 0.3225465714931488, 'Y': 0.6058644652366638},
    {'Type': 'mouthRight', 'X': 0.32500794529914856, 'Y': 0.6058992147445679},
    {'Type': 'nose', 'X': 0.3258243203163147, 'Y': 0.6026809215545654}],
   'Pose': {'Roll': -2.405670642852783,
    'Yaw': 37.31802749633789,
    'Pitch': -5.7937164306640625},
   'Quality': {'Brightness': 89.19695281982422,
    'Sharpness': 3.3018569946289062},
   'Confidence': 99.90345001220703},
  {'BoundingBox': {'Width': 0.009773864410817623,
    'Height': 0.020675761625170708,
    'Left': 0.959280788898468,
    'Top': 0.5970905423164368},
   'Landmarks': [{'Type': 'eyeLeft',
     'X': 0.960775

## OCR - Textract

### Using JPG

In [23]:
filename = "handwriting.jpg"

In [24]:
textract_client = session.client('textract')
s3 = session.client('s3')

s3.upload_file(f'../input_files/{filename}', bucket, filename)

In [25]:
request_args = {
    'Document':{
        'S3Object':{
            'Bucket':bucket,
            'Name':filename
        }
    },
    'FeatureTypes':['TABLES']
}

In [26]:
response = textract_client.analyze_document(**request_args)
response

{'DocumentMetadata': {'Pages': 1},
 'Blocks': [{'BlockType': 'PAGE',
   'Geometry': {'BoundingBox': {'Width': 1.0,
     'Height': 1.0,
     'Left': 0.0,
     'Top': 0.0},
    'Polygon': [{'X': 0.0005937423557043076, 'Y': 0.0},
     {'X': 1.0, 'Y': 0.0003207796544302255},
     {'X': 1.0, 'Y': 1.0},
     {'X': 0.0, 'Y': 1.0}]},
   'Id': '835c0db9-2d41-4d9f-8aa7-334733df8f60',
   'Relationships': [{'Type': 'CHILD',
     'Ids': ['a46ddab5-5ed1-427b-83bf-5d8c7339c533',
      'fbe6fd35-acf2-438a-8e35-10ad03daa8d5',
      '40edaa9f-2705-4fe2-9045-4bf01a1df68a',
      'b8d0a763-fa30-4cc8-9b69-a2b5d5579b8c',
      '6614af7b-b389-4867-866f-8e87ade37a9c',
      '709d2e52-a024-4913-89d7-badf4d9c2178',
      '0ff0b500-dc5a-4db1-b882-17654aca500b',
      '76d2a3b0-5b87-49e7-b3e7-db10ce64784e',
      'eef84364-223e-4286-b666-384b0f40b728',
      'ca569c74-e54c-4422-8f36-0b0ed65471a0',
      'bd4eb3a1-f43b-43da-83a0-b8e999492cb8',
      'c703bc5a-3006-4157-b8cd-c763876ab51e',
      '4554bfd5-7a06-42e7

In [34]:
result_text = ' '.join([i['Text'] for i in response['Blocks'] if i['BlockType'] == 'WORD'])

In [35]:
result_text

'Social problems and evils are created by several factors. The main causes are and problems are obstacles on the way, to lack of education and poverty Social evils social development Both individual and social problems and evils It is very much social development is badly affected by such important for all to understand that they are not created any single person Instead they are created socially SO solution must should be sought socially. For this we have to identify our social problems and evils clearly. We have to identify their root causes. Only then can we make appropriate effort to solve or end them. The following table has a summary of the causes of social problems and evils and solution measured of the same. As mentioned in the above table , the most important measure to solve problems and evils is to spread education and raise awareness among ordinary people 60 they will go in a very correct manner'

## Using PDF

In [4]:
filename = "sample.pdf"

In [5]:
textract_client = session.client('textract')
s3 = session.client('s3')

s3.upload_file(f'../input_files/{filename}', bucket, filename)

In [8]:
request_args = {
    'DocumentLocation':{
        'S3Object':{
            'Bucket':bucket,
            'Name':filename
        }
    }
}

In [10]:
start_response = textract_client.start_document_text_detection(**request_args)
start_response

{'JobId': '696c21db861af5d498fc6adb6b410de485876c658ad11ed6888224314da9da3b',
 'ResponseMetadata': {'RequestId': 'e2c54c5e-a254-446e-9e7b-87b048e1f785',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'e2c54c5e-a254-446e-9e7b-87b048e1f785',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '76',
   'date': 'Fri, 10 Nov 2023 15:34:41 GMT'},
  'RetryAttempts': 0}}

In [11]:
response = textract_client.get_document_text_detection(
        JobId=start_response['JobId']
    )

In [12]:
response

{'JobStatus': 'IN_PROGRESS',
 'DetectDocumentTextModelVersion': '1.0',
 'ResponseMetadata': {'RequestId': '86821bb7-d4f3-49eb-897a-a4426227028b',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '86821bb7-d4f3-49eb-897a-a4426227028b',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '66',
   'date': 'Fri, 10 Nov 2023 15:36:17 GMT'},
  'RetryAttempts': 0}}

In [13]:
timeout = 20

start = time.time()
now = time.time()

status = None

while (status != 'COMPLETED')&((now-start) < timeout):
    time.sleep(1)
    response = textract_client.get_document_text_detection(
        JobId=start_response['JobId']
    )

    status = response['JobStatus']
    now = time.time()


response

{'DocumentMetadata': {'Pages': 8},
 'JobStatus': 'SUCCEEDED',
 'NextToken': 'XEu7PQOmkuo5RhF/oACJo7yAxqy1YLXHsOPQM2WaW4NJAd6+lnLydtcf8Ki0/DLydOM6tKjjhlFTjn+hX6NdKf5L76ylsaJ0OQS1k4JrmJFyFUVIUD172XcBFPu1c7zQfvOrsTM=',
 'Blocks': [{'BlockType': 'PAGE',
   'Geometry': {'BoundingBox': {'Width': 1.0,
     'Height': 1.0,
     'Left': 0.0,
     'Top': 0.0},
    'Polygon': [{'X': 1.6670481883807042e-08, 'Y': 0.0},
     {'X': 1.0, 'Y': 2.0146543988630583e-07},
     {'X': 1.0, 'Y': 1.0},
     {'X': 0.0, 'Y': 1.0}]},
   'Id': '679c36cd-6d55-49e5-b5b7-06a1fbabda2f',
   'Relationships': [{'Type': 'CHILD',
     'Ids': ['65e91927-20f4-445d-8210-2b81958880cd',
      '191cfd59-d29c-4bbf-bc94-18acfb77cfe0',
      '123e18e0-d92c-4e8f-978b-7b066ad3dbc9',
      '498deaf4-2f63-423e-becd-d93749a816bc',
      '9e163726-20d5-4621-bf5d-30e09a78fe4b',
      'ced9590c-b6f5-4e99-a3d7-35d5d8c5eb71',
      '1b06cd30-69a2-46a9-bb59-74519ca0f8f3',
      '4cfd3825-1ac4-4228-a3c1-8ecbe3f0a9f1',
      '106ba04b-e5f5-4457-

In [20]:
result_text = '\n'.join([i['Text'] for i in response['Blocks'] if i['BlockType'] == 'LINE'])

In [21]:
print(result_text)

Universal Declaration of Human Rights
Preamble
Whereas recognition of the inherent dignity and of the equal and inalienable
rights of all members of the human family is the foundation of freedom, justice
and peace in the world,
Whereas disregard and contempt for human rights have resulted in barbarous
acts which have outraged the conscience of mankind, and the advent of a world
in which human beings shall enjoy freedom of speech and belief and freedom
from fear and want has been proclaimed as the highest aspiration of the common
people,
Whereas it is essential, if man is not to be compelled to have recourse, as a last
resort, to rebellion against tyranny and oppression, that human rights should be
protected by the rule of law,
Whereas it is essential to promote the development of friendly relations between
nations,
Whereas the peoples of the United Nations have in the Charter reaffirmed their
faith in fundamental human rights, in the dignity and worth of the human person
and in the equ