In [9]:
from google.cloud import speech
from google.cloud import storage
from google.oauth2 import service_account
import json
import base64
import requests
import time
import pandas as pd

In [10]:
credentials = service_account.Credentials.from_service_account_file("smart-dyspnea-ml-20e2f40d2865.json")

### Store in Google Cloud Storage

In [19]:
BUCKET_DESTINO = "smart-dyspnea-audios"

In [10]:
storage_client = storage.Client(credentials=credentials)

In [11]:
with open('./data/ml_data.json', 'r') as f:
    data = json.loads(f.read())

In [59]:
# guardamos en google cloud storage
for record in data: #[:20]:
    bucket = storage_client.bucket(BUCKET_DESTINO)
    blob = bucket.blob(record['id'])
    audio = base64.b64decode(record['audio']['data'].encode('utf-8'))
    blob.upload_from_string(audio)

### Process multiple audios

Info usada para llamar a speech to text:

- https://cloud.google.com/speech-to-text/docs/libraries
- https://googleapis.dev/python/speech/latest/speech_v1/types.html#google.cloud.speech_v1.types.RecognitionConfig
- https://cloud.google.com/speech-to-text/docs/context-strength
- https://cloud.google.com/speech-to-text/docs/class-tokens


In [36]:
from google.cloud import speech_v1p1beta1 as speech

In [37]:
speech_client = speech.SpeechClient(credentials=credentials)

In [61]:
results = {}

for record in data:# [:20]:
    
    audio_id = record['id']
    
    gcs_uri = f"gs://{BUCKET_DESTINO}/{audio_id}"
    audio = speech.RecognitionAudio(uri=gcs_uri)
    
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.MP3,
        sample_rate_hertz=48000,
        language_code="es-es",
        enable_speaker_diarization=True,
        speech_contexts=[speech.SpeechContext(phrases=["OOV_CLASS_DIGIT_SEQUENCE"])],  #  "$OOV_CLASS_ALPHANUMERIC_SEQUENCE"
    )

    # Detects speech in the audio file
    start = time.time()
    response = speech_client.recognize(config=config, audio=audio)
    end = time.time()
    print(f"Time spent: {end-start}")
    
    # TODO: this should be done instead working with trascription
    # response.results[0].alternatives[0].words[0..N] and .start_time/.end_time (in micros), and .word (value)

    for result in response.results:
        print("Transcript: {}".format(result.alternatives[0].transcript))
        
    results[audio_id] = {
        'trascripcion': result.alternatives[0].transcript,
        'time': end-start
    }


Time spent: 2.466360092163086
Transcript: 4 sitios 9 10 11 y 19
Time spent: 2.192697763442993
Transcript: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 6 7
Time spent: 1.9887597560882568
Transcript: 34
Time spent: 2.881523847579956
Transcript: 3 4 5 6 7 8 9 10 11 12 13 14 y 5
Time spent: 3.2100930213928223
Transcript: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
Time spent: 1.816298007965088
Transcript: mundo 2 3 4 5 6 7 8 9 10 11 12 13 14 15 y 67
Time spent: 1.734076976776123
Transcript: necesito 8 9 10 11 700 x 6779 2007 34878
Time spent: 1.7596309185028076
Transcript: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 18 19 20 21 22 23 24 25 27 28 29 30
Time spent: 2.3361847400665283
Transcript: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 y 24
Time spent: 3.1649529933929443
Transcript: 123456789 10 11 12 13 14 15 y 5
Time spent: 1.548719882965088
Transcript: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 17 19 20 21 22 23 24 25 26 27 28
Time spent: 2.0460891723632812
Transcript: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15

In [67]:
df = (
    pd.DataFrame.from_dict(results, orient='index')
    .assign(**{
        '1': lambda df: df.trascripcion.str.contains("1 "),
        '2': lambda df: df.trascripcion.str.contains("2 "),
        '3': lambda df: df.trascripcion.str.contains("3 "),
        '7': lambda df: df.trascripcion.str.contains(" 7"),
        '10': lambda df: df.trascripcion.str.contains(" 7"),
        '28': lambda df: df.trascripcion.str.contains("28"),
        '29': lambda df: df.trascripcion.str.contains("29"),
        '30': lambda df: df.trascripcion.str.contains("30"),
    })
)

In [68]:
df[["1", "7", "10", "30"]].sum()

1     82
7     70
10    70
30    32
dtype: int64

In [69]:
df.time.mean()

2.080914338429769

In [101]:
df.to_csv("/tmp/gcp_trascription.csv")

In [102]:
df.head()

Unnamed: 0,trascripcion,time,1,2,3,7,10,28,29,30
sources/16-01-2021_21:17:02-806145f6-ebae-43e3-848f-3d6b26d803bd,4 sitios 9 10 11 y 19,2.46636,True,False,False,False,False,False,False,False
sources/02-02-2021_07:09:14-d866b2c0-6eca-40e1-ad93-5f4feab08421,1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 6 7,2.192698,True,True,True,True,True,False,False,False
sources/14-01-2021_00:30:27-39f1a1db-904a-4069-9a35-005847b9101c,34,1.98876,False,False,False,False,False,False,False,False
sources/03-01-2021_15:15:25-109d0932-6702-4390-9e4c-d01177e1fb5c,3 4 5 6 7 8 9 10 11 12 13 14 y 5,2.881524,True,True,True,True,True,False,False,False
sources/15-01-2021_00:56:43-7e43bff4-5cec-48e2-8697-1e72f81a8522,1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 1...,3.210093,True,True,True,True,True,False,False,False
