In [None]:
import requests
# We are using a library called 'requests' to send messages to a service on the internet.

def speech_to_text(audio_binary):
    # This function will take audio (a sound file) and turn it into text.

    # Set up Watson Speech-to-Text HTTP API URL
    base_url = '...'  
    # This is where the Speech-to-Text service is located online (like its address).
    api_url = base_url + '/speech-to-text/api/v1/recognize'
    # We are adding "/recognize" to the base address to tell it exactly what we want it to do.

    # Set up parameters for our HTTP request
    params = {
        'model': 'en-US_Multimedia',
    }
    # "params" is like giving extra instructions to the service.
    # Here, we are telling it which language model to use (English in this case).

    # Set up the body of our HTTP request
    body = audio_binary
    # The "body" is where we send the audio data so the service knows what to work on.

    # Send an HTTP POST request
    response = requests.post(api_url, params=params, data=audio_binary).json()
    # We are sending the audio data to the service and asking it to turn it into text.
    # The service sends back a response, which is the result.

    # Parse the response to get our transcribed text
    text = 'null'
    # Start with text as 'null' (empty), which means we haven’t received any text yet.

    while bool(response.get('results')):
        # "bool" checks if something exists (True) or not (False).
        # Here, it checks if the response has 'results' (the text we are waiting for).

        print('Speech-to-Text response:', response)
        # Print the full response to see all the details.

        text = response.get('results').pop().get('alternatives').pop().get('transcript')
        # This line goes step by step to get the actual text from the response:
        # 1. Get the 'results' part.
        # 2. Take the last item (pop) from the list of results.
        # 3. Look inside for 'alternatives' (different text options).
        # 4. Take the last alternative (pop) and get its 'transcript' (the text it thinks is right).

        print('Recognised text: ', text)
        # Print the final text that the service recognized.

        return text
    # Finally, we return the text, so whoever called the function can use it.
