In [1]:
import boto3
import json
import base64
import requests
import time
import pandas as pd

In [2]:
# load client and resource
sd_session = boto3.session.Session(profile_name='smart-dyspnea')
s3_client = sd_session.client('s3')
transcribe_client = sd_session.client('transcribe')

In [3]:
with open('./data/ml_data.json', 'r') as f:
    data = json.loads(f.read())

In [11]:
audio = base64.b64decode(data[76]['audio']['data'].encode('utf-8'))

In [15]:
type(data[76]['audio']['data'])

str

In [18]:
with open('./data/audio.mp3', 'wb') as f:
    f.write(audio)

In [21]:
response = s3_client.upload_file(Bucket='sd-test-ml', Key='audios/audio.mp3', Filename="./data/audio.mp3")

In [34]:
response = transcribe_client.start_transcription_job(
    TranscriptionJobName='test-clase-1',
    LanguageCode='es-ES',
    MediaFormat='mp3',
    Media={
        'MediaFileUri': f"s3://sd-test-ml/audios/audio.mp3"
    },
#    Settings={
#        'ShowAlternatives': True,
#        'MaxAlternatives': 3,
#        'VocabularyFilterName': 'string',
#        'VocabularyFilterMethod': 'remove'|'mask'
#    },
)

In [35]:
response

{'TranscriptionJob': {'TranscriptionJobName': 'test-clase-1',
  'TranscriptionJobStatus': 'IN_PROGRESS',
  'LanguageCode': 'es-ES',
  'MediaFormat': 'mp3',
  'Media': {'MediaFileUri': 's3://sd-test-ml/audios/audio.mp3'},
  'StartTime': datetime.datetime(2021, 3, 17, 18, 22, 4, 47000, tzinfo=tzlocal()),
  'CreationTime': datetime.datetime(2021, 3, 17, 18, 22, 4, 26000, tzinfo=tzlocal())},
 'ResponseMetadata': {'RequestId': '0957824a-e512-4be2-89f2-4e4d54d2dd05',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/x-amz-json-1.1',
   'date': 'Wed, 17 Mar 2021 17:22:03 GMT',
   'x-amzn-requestid': '0957824a-e512-4be2-89f2-4e4d54d2dd05',
   'content-length': '263',
   'connection': 'keep-alive'},
  'RetryAttempts': 0}}

In [52]:
response = transcribe_client.get_transcription_job(
    TranscriptionJobName='test-clase-1'
)
response

{'TranscriptionJob': {'TranscriptionJobName': 'test-clase-1',
  'TranscriptionJobStatus': 'COMPLETED',
  'LanguageCode': 'es-ES',
  'MediaSampleRateHertz': 48000,
  'MediaFormat': 'mp3',
  'Media': {'MediaFileUri': 's3://sd-test-ml/audios/audio.mp3'},
  'Transcript': {'TranscriptFileUri': 'https://s3.eu-west-1.amazonaws.com/aws-transcribe-eu-west-1-prod/294604510371/test-clase-1/fadc6ae1-7295-44f1-964e-0ae071398422/asrOutput.json?X-Amz-Security-Token=IQoJb3JpZ2luX2VjEPn%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCWV1LXdlc3QtMSJIMEYCIQDDyiIMaTCxwBsLs6YZP205uRemqrhrZYSBBngevnVifAIhAKhBJ5Nye29uhXsBe4fp0QHKXujW1Nd9Kyp1lWelDYf0KrQDCDIQAhoMNTg3MDE3NjYzNDE3IgxIINUq4rdl4D%2FWif8qkQMguMO%2FL7zkrjrFt949DGdTbfjIbyIaEKCV4kJO%2BMIN6qwdBqu8VFrVXvHEflJXtrIxirWzj%2F8Uwc%2Fyp10NXBcmGTGzRSZRnmhLrmDbmsa151ErP88DefseqFWLMDODzGLcNbR7KSXYbdU%2B3sCuHS2vqVGioHzGM2rgvz72MEr9XeFmkK0VCxw1zqZ2DJEgnJWlVWr0eewKEkviASQ8Yl6NGoGN0XtslEUkTzBaguFql653sRavjNejjEzPhyslvDFpPb3H%2BQ5MsOdFuWqavev1WPIwm2hADbxe8rNsbxSNnPkuTMGhJvt5R%2F0B5

In [53]:
output = requests.get(response["TranscriptionJob"]["Transcript"]["TranscriptFileUri"]).json()

In [54]:
output

{'jobName': 'test-clase-1',
 'accountId': '294604510371',
 'results': {'transcripts': [{'transcript': 'uno dos tres cuatro cinco seis siete ocho nueve diez once doce trece catorce quince. Vicenti siete ocho ocho nueve. En entre un noventa y dos veintitrés. Veinticuatro veinticinco, veintiséis veintisiete veintiocho veintinueve treinta.'}],
  'items': [{'start_time': '0.0',
    'end_time': '0.12',
    'alternatives': [{'confidence': '0.7208', 'content': 'uno'}],
    'type': 'pronunciation'},
   {'start_time': '0.12',
    'end_time': '0.4',
    'alternatives': [{'confidence': '0.9988', 'content': 'dos'}],
    'type': 'pronunciation'},
   {'start_time': '0.4',
    'end_time': '0.66',
    'alternatives': [{'confidence': '1.0', 'content': 'tres'}],
    'type': 'pronunciation'},
   {'start_time': '0.66',
    'end_time': '1.02',
    'alternatives': [{'confidence': '1.0', 'content': 'cuatro'}],
    'type': 'pronunciation'},
   {'start_time': '1.02',
    'end_time': '1.35',
    'alternatives': 

### Creating a filter vocabulary

In [55]:
response = transcribe_client.create_vocabulary_filter(
    VocabularyFilterName='numbers',
    LanguageCode='es-ES',
    Words=[
        'uno','siete','diez','treinta'
    ]
)

In [58]:
response = transcribe_client.start_transcription_job(
    TranscriptionJobName='test-clase-2',
    LanguageCode='es-ES',
    MediaFormat='mp3',
    Media={
        'MediaFileUri': f"s3://sd-test-ml/audios/audio.mp3"
    },
    Settings={
#        'ShowAlternatives': True,
#        'MaxAlternatives': 3,
        'VocabularyFilterName': 'numbers',
#        'VocabularyFilterMethod': 'remove'|'mask'
    },
)

In [59]:
%timeit
response = {
    'TranscriptionJob': {
      'TranscriptionJobStatus': ''
    }
}

while response['TranscriptionJob']['TranscriptionJobStatus'] != 'COMPLETED':
    time.sleep(1)
    response = transcribe_client.get_transcription_job(
        TranscriptionJobName='test-clase-2'
    )
    print(response['TranscriptionJob']['TranscriptionJobStatus'])

IN_PROGRESS
IN_PROGRESS
IN_PROGRESS
IN_PROGRESS
IN_PROGRESS
IN_PROGRESS
IN_PROGRESS
IN_PROGRESS
IN_PROGRESS
IN_PROGRESS
IN_PROGRESS
IN_PROGRESS
IN_PROGRESS
IN_PROGRESS
IN_PROGRESS
COMPLETED


In [60]:
response

{'TranscriptionJob': {'TranscriptionJobName': 'test-clase-2',
  'TranscriptionJobStatus': 'COMPLETED',
  'LanguageCode': 'es-ES',
  'MediaSampleRateHertz': 48000,
  'MediaFormat': 'mp3',
  'Media': {'MediaFileUri': 's3://sd-test-ml/audios/audio.mp3'},
  'Transcript': {'TranscriptFileUri': 'https://s3.eu-west-1.amazonaws.com/aws-transcribe-eu-west-1-prod/294604510371/test-clase-2/52869a72-a64c-4737-9b66-1fe9ac6fc4ca/asrOutput.json?X-Amz-Security-Token=IQoJb3JpZ2luX2VjEPn%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCWV1LXdlc3QtMSJHMEUCIQCHhI97bQonqrR3C9%2BHQE%2F5tXQ%2FiBticrVBZbYca%2F7BZgIgF8KSHanRa9y9lYW%2BsoKzjHXX4nmO70YnNw8jnzdKN5QqtAMIMhACGgw1ODcwMTc2NjM0MTciDEjkeEXBCJXz6VDaXCqRAw5TRmzSbcLBPL6PT6ur4aqjRpvsLrQILZOhfBTmbl0m4OXIlAzHtgZKbaCRS9E3NeL4Uzx3cYGzOch2UB%2F0EgJRTtLIkAF1uNDAK4ekmnlGraSW%2FeQet9acKX7QDTvOp%2FWXkmzE6Fo3K2RcNu6AV%2FyYq1iERVdLlUAPIr63TpbiZ%2FS3DQL%2B20tgR9yZe5sNt2evL3sl4qs4I2QVjWmyOIlQoSlpatBDgr%2BIR%2B%2Fqwlrs%2FGs5zuY3d92RskZwiOteOkJIeMnTKabkEad1u84oFUcn3sxTw6vTkOXMGWYhYG29DGW

In [61]:
output = requests.get(response["TranscriptionJob"]["Transcript"]["TranscriptFileUri"]).json()

In [62]:
output

{'jobName': 'test-clase-2',
 'accountId': '294604510371',
 'results': {'transcripts': [{'transcript': '*** dos tres cuatro cinco seis *** ocho nueve *** once doce trece catorce quince. Vicenti *** ocho ocho nueve. En entre un noventa y dos veintitrés. Veinticuatro veinticinco, veintiséis veintisiete veintiocho veintinueve ***.'}],
  'items': [{'start_time': '0.0',
    'end_time': '0.12',
    'alternatives': [{'confidence': '0.7208', 'content': '***'}],
    'type': 'pronunciation'},
   {'start_time': '0.12',
    'end_time': '0.4',
    'alternatives': [{'confidence': '0.9988', 'content': 'dos'}],
    'type': 'pronunciation'},
   {'start_time': '0.4',
    'end_time': '0.66',
    'alternatives': [{'confidence': '1.0', 'content': 'tres'}],
    'type': 'pronunciation'},
   {'start_time': '0.66',
    'end_time': '1.02',
    'alternatives': [{'confidence': '1.0', 'content': 'cuatro'}],
    'type': 'pronunciation'},
   {'start_time': '1.02',
    'end_time': '1.35',
    'alternatives': [{'confid

### Creating vocabulary

In [63]:
response = transcribe_client.create_vocabulary(
    VocabularyName='numbers',
    LanguageCode='es-ES',
    Phrases=[
        'uno',
        'siete',
        'diez',
        'treinta'
    ]
)

In [64]:
response = transcribe_client.start_transcription_job(
    TranscriptionJobName='test-clase-3',
    LanguageCode='es-ES',
    MediaFormat='mp3',
    Media={
        'MediaFileUri': f"s3://sd-test-ml/audios/audio.mp3"
    },
    Settings={
        'VocabularyName': 'numbers'
    }
)

In [67]:
response = {
    'TranscriptionJob': {
      'TranscriptionJobStatus': ''
    }
}

while response['TranscriptionJob']['TranscriptionJobStatus'] != 'COMPLETED':
    time.sleep(1)
    response = transcribe_client.get_transcription_job(
        TranscriptionJobName='test-clase-3'
    )
    print(response['TranscriptionJob']['TranscriptionJobStatus'])

COMPLETED


In [68]:
response

{'TranscriptionJob': {'TranscriptionJobName': 'test-clase-3',
  'TranscriptionJobStatus': 'COMPLETED',
  'LanguageCode': 'es-ES',
  'MediaSampleRateHertz': 48000,
  'MediaFormat': 'mp3',
  'Media': {'MediaFileUri': 's3://sd-test-ml/audios/audio.mp3'},
  'Transcript': {'TranscriptFileUri': 'https://s3.eu-west-1.amazonaws.com/aws-transcribe-eu-west-1-prod/294604510371/test-clase-3/abb85346-67bf-4ce0-a55d-140e1235db40/asrOutput.json?X-Amz-Security-Token=IQoJb3JpZ2luX2VjEPr%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCWV1LXdlc3QtMSJHMEUCIQCZS0d4ZPgIiUed1gfuv%2BTomW1owneYIvz%2BVxsZlRYnagIgVvip3hmfeTBftfNRhoPJTElTXCk8Gl0PGbU6FnsF0IAqtAMIMhACGgw1ODcwMTc2NjM0MTciDCShaUSVSplQnowvXSqRA7wMLjYmZujrOQmmOi5uVgzISNLQXO7Iu5oTTQJodM%2B2xmVeb7hHIKlTwISf%2FvGh1V1Ulk%2Bz90k9AhKtmTmyjHLuIhRv39tdcQZqppszHm9Kmo1NtTVXv1KvGHBO20EL3GxqgLlUalT0LgNY7FTj8VJnaFJJuVT2Om2EiSwfL92pB71Cd7m90Ue%2BIGGZHDA0tQpY54wTXilKw9ItHgOaFRkOvbcae%2FcVe8%2FFdahYUhnxY1%2FpCHP7l1CaJKoWuYhaYNv9cDRY4tYALSxDRF7cvGCzGIYQsVlg7McKt5zqKE8JI08lXL7%2FJLcKH

In [69]:
output = requests.get(response["TranscriptionJob"]["Transcript"]["TranscriptFileUri"]).json()

In [70]:
output

{'jobName': 'test-clase-3',
 'accountId': '294604510371',
 'results': {'transcripts': [{'transcript': 'uno dos, tres cuatro cinco, seis siete, ocho nueve, diez once, doce trece catorce quince Vicenti siete chochona deben entre un noventa y dos veintitrés veinticuatro y cinco mil seis siete veintiocho mil nueve treinta.'}],
  'items': [{'start_time': '0.0',
    'end_time': '0.12',
    'alternatives': [{'confidence': '0.7161', 'content': 'uno'}],
    'type': 'pronunciation'},
   {'start_time': '0.12',
    'end_time': '0.4',
    'alternatives': [{'confidence': '0.9989', 'content': 'dos'}],
    'type': 'pronunciation'},
   {'alternatives': [{'confidence': '0.0', 'content': ','}],
    'type': 'punctuation'},
   {'start_time': '0.4',
    'end_time': '0.66',
    'alternatives': [{'confidence': '1.0', 'content': 'tres'}],
    'type': 'pronunciation'},
   {'start_time': '0.66',
    'end_time': '1.02',
    'alternatives': [{'confidence': '1.0', 'content': 'cuatro'}],
    'type': 'pronunciation'}

### Process multiple audios

In [106]:
result = {}

for record in data[:20]:  

    audio_id = record['id'].split('/')[-1][20:]
    response = transcribe_client.start_transcription_job(
        TranscriptionJobName=audio_id,
        LanguageCode='es-ES',
        MediaFormat='mp3',
        Media={
            'MediaFileUri': record['audio']['s3'].replace('sources', 'processed/sources')
        }
    )
    
    response = {
        'TranscriptionJob': {
          'TranscriptionJobStatus': ''
        }
    }
    
    start = time.time()
    while response['TranscriptionJob']['TranscriptionJobStatus'] != 'COMPLETED':
        time.sleep(1)
        response = transcribe_client.get_transcription_job(
            TranscriptionJobName=audio_id
        )
        
    end = time.time()
    print(f"Time spent: {end-start}")
    
    output = requests.get(response["TranscriptionJob"]["Transcript"]["TranscriptFileUri"]).json()    
    
    result[audio_id] = {
        '1': 'uno' in output['results']['transcripts'][0]['transcript'],
        '7': 'siete' in output['results']['transcripts'][0]['transcript'],
        '10': 'diez' in output['results']['transcripts'][0]['transcript'],
        '30': 'treinta' in output['results']['transcripts'][0]['transcript']
    }

Time spent: 32.20404410362244
Time spent: 28.949074268341064
Time spent: 22.17820906639099
Time spent: 19.99015522003174
Time spent: 52.77833700180054
Time spent: 22.20543909072876
Time spent: 27.76417875289917
Time spent: 23.18256711959839
Time spent: 63.23955821990967
Time spent: 33.46356415748596
Time spent: 18.939187049865723
Time spent: 19.890972137451172
Time spent: 19.918843984603882
Time spent: 30.911147117614746
Time spent: 20.557868719100952
Time spent: 17.68913722038269
Time spent: 27.638123750686646
Time spent: 17.916567087173462
Time spent: 17.758325815200806
Time spent: 26.62440299987793


In [107]:
result

{'806145f6-ebae-43e3-848f-3d6b26d803bd': {'1': True,
  '7': True,
  '10': False,
  '30': False},
 'd866b2c0-6eca-40e1-ad93-5f4feab08421': {'1': True,
  '7': True,
  '10': False,
  '30': False},
 '39f1a1db-904a-4069-9a35-005847b9101c': {'1': True,
  '7': True,
  '10': True,
  '30': False},
 '109d0932-6702-4390-9e4c-d01177e1fb5c': {'1': False,
  '7': False,
  '10': False,
  '30': False},
 '7e43bff4-5cec-48e2-8697-1e72f81a8522': {'1': True,
  '7': True,
  '10': False,
  '30': False},
 '26e43698-7653-46eb-9ba4-e3a4502db126': {'1': True,
  '7': True,
  '10': True,
  '30': False},
 'c453e481-e0cc-4509-b321-f8dce76be00a': {'1': True,
  '7': True,
  '10': True,
  '30': True},
 'ab9362e1-e884-4648-86a4-d22e9cb0acf5': {'1': True,
  '7': True,
  '10': True,
  '30': True},
 'fc67332b-fb3f-401b-a506-dd13e0bb3a35': {'1': False,
  '7': True,
  '10': False,
  '30': False},
 'e08b689e-100e-437f-9f33-559ff3436567': {'1': True,
  '7': True,
  '10': True,
  '30': True},
 '976e8c69-7daa-4a54-9f65-aac75dd06

In [113]:
df = pd.DataFrame.from_dict(result, orient='index')

In [114]:
df

Unnamed: 0,1,7,10,30
806145f6-ebae-43e3-848f-3d6b26d803bd,True,True,False,False
d866b2c0-6eca-40e1-ad93-5f4feab08421,True,True,False,False
39f1a1db-904a-4069-9a35-005847b9101c,True,True,True,False
109d0932-6702-4390-9e4c-d01177e1fb5c,False,False,False,False
7e43bff4-5cec-48e2-8697-1e72f81a8522,True,True,False,False
26e43698-7653-46eb-9ba4-e3a4502db126,True,True,True,False
c453e481-e0cc-4509-b321-f8dce76be00a,True,True,True,True
ab9362e1-e884-4648-86a4-d22e9cb0acf5,True,True,True,True
fc67332b-fb3f-401b-a506-dd13e0bb3a35,False,True,False,False
e08b689e-100e-437f-9f33-559ff3436567,True,True,True,True
