# Using batch transcription in Python

This is a simple example on using batch transcription, when data is in Blob storage, using Python requests


## Prepare the request

Update the constants with you information from your own account:

* **REGION**: the region your cognitive service was created. I used "australiaeast", for instance
* **KEY**: your cognitive service key
* **MODEL_ID**: if you are using custom speech, add the model ID, otherwise, leave it blank


In [None]:
import requests


REGION = "<<your cognitive service region>>"
KEY = "<<your cognitive service key>>"
MODEL_ID = "<<your custom speech model id - if any>>"

# this needs to be generated for the blob container - with permissions 'r' and 'l'

### NORMAL
BLOB_CONTAINER_SAS_URL ="<<your blob SAS URL>>"

url = f"https://{REGION}.api.cognitive.microsoft.com/speechtotext/v3.0/transcriptions"



## Create translation job

In [None]:
payload=f'''
{{   "contentContainerUrl":     "{BLOB_CONTAINER_SAS_URL}",  
    "properties": {{    
        "diarizationEnabled": true,    
        "wordLevelTimestampsEnabled": false,    
        "punctuationMode": "DictatedAndAutomatic" 
    }},  
    "locale": "en-AU",  
    "displayName": "Transcription using Custom model for en-AU",
    "model": {{
        "self": "https://{REGION}.api.cognitive.microsoft.com/speechtotext/v3.0/models/{MODEL_ID}"
    }},
}}
'''


headers = {
  'Ocp-Apim-Subscription-Key': f'{KEY}',
  'Content-Type': 'application/json'
}

response = requests.request("POST", url, headers=headers, data=payload)

print(response.text)


In [None]:
response_json = response.json()

get_run = response_json['self']
get_files = response_json['links']['files']

print(get_run)
print(get_files)

## Check results

In [None]:
import time

running_status = None
wait_string = ''

while running_status not in ['Succeeded','Failed']:

    response = requests.request("GET", get_run, headers=headers)

    running_status = response.json()['status']

    if running_status not in ['Succeeded','Failed']:
        wait_string += '.'
        print(f'{wait_string}{running_status}',end ="\r" )
        time.sleep(10)

print(f'{wait_string}{running_status}')    
try:
    print(response.json()['properties']['error']['message'])
except:
    pass

## Get results

In [None]:
import json
response = requests.request("GET", get_files, headers=headers)

response_json = response.json()

values = response_json["values"]


print(json.dumps(values,indent=2))

In [None]:

from pandas import DataFrame

for value in values:
    

    doc_url = value['links']['contentUrl']
    
    if value['kind'] == 'Transcription':

        # start a new file transcription

        file_name = value['name'][:-5]
        
        file_content = []

        print('\n**************************************************')
        
        print(file_name)

        response = requests.request("GET", doc_url, headers=headers)

        response_json = response.json()

        phrases = response_json['recognizedPhrases']

        for phrase in phrases:
            print('\n',phrase['offset'] )
            best_recs = phrase['nBest']

            for rec in best_recs:
                print(rec['display'])
                
                file_content.append([phrase['offset'],rec['display']])
            print()
        print()
        
  

        df = DataFrame (file_content,columns=['offset','transcription'])

        df.to_csv(file_name + ".csv")
    