In [1]:
! pip install ./python-client

Processing ./python-client
Building wheels for collected packages: swagger-client
  Building wheel for swagger-client (setup.py) ... [?25l- \ | / done
[?25h  Created wheel for swagger-client: filename=swagger_client-1.0.0-py3-none-any.whl size=221544 sha256=cb9e632d005705e1b31c88adafae6d944414dbf825838b1637623c25154745d1
  Stored in directory: /home/azureuser/.cache/pip/wheels/0c/cc/18/88ced859f6aeb08054d1bacc903bbcc0e3442c23925510bd8f
Successfully built swagger-client
Installing collected packages: swagger-client
Successfully installed swagger-client-1.0.0


#### Using Swagger Client

In [35]:
import logging
import sys
import requests
import time
import swagger_client

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG,
        format="%(asctime)s %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p %Z")
#speech_key, service_region = " ", " "

# Your subscription key and region for the speech service
SUBSCRIPTION_KEY = ""
SERVICE_REGION = ""

NAME = "Simple transcription"
DESCRIPTION = "Simple transcription description"

LOCALE = "ar-AE"

# Provide the uri of a container with audio files for transcribing all of them
# with a single request. At least 'read' and 'list' (rl) permissions are required.
RECORDINGS_CONTAINER_URI ="SAS_URL"

def transcribe_from_single_blob(uri, properties):
    """
    Transcribe a single audio file located at `uri` using the settings specified in `properties`
    using the base model for the specified locale.
    """
    transcription_definition = swagger_client.Transcription(
        display_name=NAME,
        description=DESCRIPTION,
        locale=LOCALE,
        content_urls=[uri],
        properties=properties
    )

    return transcription_definition


def transcribe_with_custom_model(api, uri, properties):
    """
    Transcribe a single audio file located at `uri` using the settings specified in `properties`
    using the base model for the specified locale.
    """
    # Model information (ADAPTED_ACOUSTIC_ID and ADAPTED_LANGUAGE_ID) must be set above.
    if MODEL_REFERENCE is None:
        logging.error("Custom model ids must be set when using custom models")
        sys.exit()

    model = api.get_model(MODEL_REFERENCE)

    transcription_definition = swagger_client.Transcription(
        display_name=NAME,
        description=DESCRIPTION,
        locale=LOCALE,
        content_urls=[uri],
        model=model,
        properties=properties
    )

    return transcription_definition


def transcribe_from_container(uri, properties):
    """
    Transcribe all files in the container located at `uri` using the settings specified in `properties`
    using the base model for the specified locale.
    """
    transcription_definition = swagger_client.Transcription(
        display_name=NAME,
        description=DESCRIPTION,
        locale=LOCALE,
        content_container_url=uri,
        properties=properties
    )

    return transcription_definition


def _paginate(api, paginated_object):
    """
    The autogenerated client does not support pagination. This function returns a generator over
    all items of the array that the paginated object `paginated_object` is part of.
    """
    yield from paginated_object.values
    typename = type(paginated_object).__name__
    auth_settings = ["api_key"]
    while paginated_object.next_link:
        link = paginated_object.next_link[len(api.api_client.configuration.host):]
        paginated_object, status, headers = api.api_client.call_api(link, "GET",
            response_type=typename, auth_settings=auth_settings)

        if status == 200:
            yield from paginated_object.values
        else:
            raise Exception(f"could not receive paginated data: status {status}")


def delete_all_transcriptions(api):
    """
    Delete all transcriptions associated with your speech resource.
    """
    logging.info("Deleting all existing completed transcriptions.")

    # get all transcriptions for the subscription
    transcriptions = list(_paginate(api, api.get_transcriptions()))

    # Delete all pre-existing completed transcriptions.
    # If transcriptions are still running or not started, they will not be deleted.
    for transcription in transcriptions:
        transcription_id = transcription._self.split('/')[-1]
        logging.debug(f"Deleting transcription with id {transcription_id}")
        try:
            api.delete_transcription(transcription_id)
        except swagger_client.rest.ApiException as exc:
            logging.error(f"Could not delete transcription {transcription_id}: {exc}")


def transcribe():
    logging.info("Starting transcription client...")

    # configure API key authorization: subscription_key
    configuration = swagger_client.Configuration()
    configuration.api_key["Ocp-Apim-Subscription-Key"] = SUBSCRIPTION_KEY
    configuration.host = f"https://{SERVICE_REGION}.api.cognitive.microsoft.com/speechtotext/v3.1"

    # create the client object and authenticate
    client = swagger_client.ApiClient(configuration)

    # create an instance of the transcription api class
    api = swagger_client.CustomSpeechTranscriptionsApi(api_client=client)

    # Specify transcription properties by passing a dict to the properties parameter. See
    # https://learn.microsoft.com/azure/cognitive-services/speech-service/batch-transcription-create?pivots=rest-api#request-configuration-options
    # for supported parameters.
    properties = swagger_client.TranscriptionProperties()
    # properties.word_level_timestamps_enabled = True
    # properties.display_form_word_level_timestamps_enabled = True
    # properties.punctuation_mode = "DictatedAndAutomatic"
    # properties.profanity_filter_mode = "Masked"
    # properties.destination_container_url = "<SAS Uri with at least write (w) permissions for an Azure Storage blob container that results should be written to>"
    # properties.time_to_live = "PT1H"

    # uncomment the following block to enable and configure speaker separation
    properties.diarization_enabled = True
    properties.diarization = swagger_client.DiarizationProperties(
         swagger_client.DiarizationSpeakersProperties(min_count=1, max_count=5))

    #properties.language_identification = swagger_client.LanguageIdentificationProperties(["en-IN", "hi-IN"])
    #properties.language_identification = swagger_client.LanguageIdentificationProperties([])

    # Use base models for transcription. Comment this block if you are using a custom model.
    #transcription_definition = transcribe_from_single_blob(RECORDINGS_BLOB_URI, properties)

    # Uncomment this block to use custom models for transcription.
    # transcription_definition = transcribe_with_custom_model(api, RECORDINGS_BLOB_URI, properties)

    # uncomment the following block to enable and configure language identification prior to transcription
    # Uncomment this block to transcribe all files from a container.
    transcription_definition = transcribe_from_container(RECORDINGS_CONTAINER_URI, properties)

    created_transcription, status, headers = api.transcriptions_create_with_http_info(transcription=transcription_definition)

    # get the transcription Id from the location URI
    transcription_id = headers["location"].split("/")[-1]

    # Log information about the created transcription. If you should ask for support, please
    # include this information.
    logging.info(f"Created new transcription with id '{transcription_id}' in region {SERVICE_REGION}")

    logging.info("Checking status.")

    completed = False

    while not completed:
        # wait for 5 seconds before refreshing the transcription status
        time.sleep(5)

        transcription = api.transcriptions_get(transcription_id)
        logging.info(f"Transcriptions status: {transcription.status}")

        if transcription.status in ("Failed", "Succeeded"):
            completed = True

        if transcription.status == "Succeeded":
            pag_files = api.transcriptions_list_files(transcription_id)
            print(pag_files)
            for file_data in _paginate(api, pag_files):
                if file_data.kind != "Transcription":
                    continue

                audiofilename = file_data.name
                results_url = file_data.links.content_url
                results = requests.get(results_url)
                logging.info(f"Results for {audiofilename}:\n{results.content.decode('utf-8')}")
        elif transcription.status == "Failed":
            logging.info(f"Transcription failed: {transcription.properties.error.message}")

In [4]:
transcribe()

#### Using REST API 3.1

In [2]:
import json
import requests

In [37]:
endpoint="https://{service_region}.api.cognitive.microsoft.com/speechtotext/v3.1/transcriptions"
headers = {
    'Ocp-Apim-Subscription-Key': '{subscription_key}',
    'Content-Type': 'application/json',
}


In [38]:

raw_data={
  "properties": {
    "diarizationEnabled": True,
    "wordLevelTimestampsEnabled": False,
    "displayFormWordLevelTimestampsEnabled": False,
    "channels": [
      0,1
    ],
    "destinationContainerUrl":"https://adfstoragejr.blob.core.windows.net/medicaltracriptionarab?sp=racwl&st=2023-06-06T08:00:35Z&se=2023-06-30T16:00:35Z&spr=https&sv=2022-11-02&sr=c&sig=6hPPJ7e74Ouw527%2FwFM%2FrBCkcbTrHwV0nJN4cT%2BIy%2B8%3D",#"https://adfstoragejr.blob.core.windows.net/malayalamtarget?sv=2021-08-06&st=2023-04-13T08%3A55%3A11Z&se=2023-05-31T08%3A55%3A00Z&sr=c&sp=rwl&sig=HADjuUL4UvwNsLmVe712HVYJg5cBAKdsHDfnG7lZV14%3D",#https://adfstoragejr.blob.core.windows.net/malayalamtarget?sv=2021-08-06&st=2023-04-13T07%3A35%3A11Z&se=2023-05-31T07%3A35%3A00Z&sr=c&sp=rwl&sig=XEJzGgKMmVfmwVxjAtTUh0y5agKb%2BY7xhIDW4%2BORpLM%3D", #"https://adfstoragejr.blob.core.windows.net/targetbatch?sv=2021-08-06&st=2023-02-19T02%3A59%3A15Z&se=2023-03-31T02%3A59%3A00Z&sr=c&sp=racwdxltf&sig=jU3Z6mKeRRlhCbiBNHZ%2FJTgswzupqvQ%2Be8athgrFgKc%3D",
    "punctuationMode": "DictatedAndAutomatic",
    "profanityFilterMode": "Masked",
    "diarization": {
      "speakers": {
        "minCount": 1,
        "maxCount": 5
      }
    },
    #"languageIdentification": {
      #"candidateLocales": [
        #"ar-AE"
      #],     
    #},
  },


  "contentContainerUrl":"https://adfstoragejr.blob.core.windows.net/medicaltracriptionarab?sp=rl&st=2023-06-06T08:25:45Z&se=2023-06-30T16:25:45Z&spr=https&sv=2022-11-02&sr=c&sig=wXhIONyAa6Xm0I7ImGJP73QoD69IgO02EYMTyco8yos%3D",# "https://adfstoragejr.blob.core.windows.net/medicaltracriptionarab?sp=racwl&st=2023-06-06T08:06:58Z&se=2023-06-30T16:06:58Z&spr=https&sv=2022-11-02&sr=c&sig=t9aEU2zdZg4Sc7a%2FeEBsGU0QyuAyw3vpWgJUgLgiH9M%3D",#https://adfstoragejr.blob.core.windows.net/malyalamfile?sv=2021-08-06&st=2023-04-13T07%3A30%3A51Z&se=2023-05-31T07%3A30%3A00Z&sr=c&sp=rwl&sig=16sYMeBTzSbBeqdGev1jlERYGyvMEuw9ZXrDXIE4eWg%3D"# "https://adfstoragejr.blob.core.windows.net/speechbatchsource?sv=2021-08-06&st=2023-02-19T02%3A58%3A17Z&se=2023-03-31T02%3A58%3A00Z&sr=c&sp=rl&sig=tyx1y%2Ft8uL8hF9V3SPv2aNlVB6cS5%2BAscBgo%2B9C0o4Y%3D",
  "locale": "ar-AE",
  "displayName": "My Transcription",
  "description": "Test",
}
data=json.dumps(raw_data)

In [39]:
response = requests.post(endpoint, headers=headers, data=data)

In [40]:
response.status_code

201

In [3]:
response.json()

In [42]:
get_url=response.json()['links']['files']
get_url
response_results = requests.get(get_url, headers=headers)
response_results

<Response [200]>

In [5]:
file_data=response_results.json()
file_data

In [6]:
get_transcription_url=response_results.json()['values'][0]['links']['contentUrl']

print(get_transcription_url)

sa_url_arabic="SAS_TOKEN for the destination url"


In [45]:
response_arabic=requests.get(get_transcription_url+sa_url_arabic).content



In [46]:
arabic_transcripts=json.loads(response_arabic)

In [8]:
arabic_transcripts

In [303]:
#arabic_transcripts['recognizedPhrases'][0]

In [48]:
speaker=[]
#speaker2=[]
for i in  range(len(arabic_transcripts['recognizedPhrases'])):
    #print(i)
    if arabic_transcripts['recognizedPhrases'][i]['recognitionStatus']=='Success':
        if arabic_transcripts['recognizedPhrases'][i]['speaker']==1:
            for ii in range(len(arabic_transcripts['recognizedPhrases'][i]['nBest'])):
                speaker.append(("speaker 1: ", arabic_transcripts['recognizedPhrases'][i]['nBest'][ii]['display']))

        if arabic_transcripts['recognizedPhrases'][i]['speaker']==2:
            for ii in range(len(arabic_transcripts['recognizedPhrases'][i]['nBest'])):
                speaker.append(("speaker 2: ",arabic_transcripts['recognizedPhrases'][i]['nBest'][ii]['display']))

        if arabic_transcripts['recognizedPhrases'][i]['speaker']==3:
            for ii in range(len(arabic_transcripts['recognizedPhrases'][i]['nBest'])):
                speaker.append(("speaker 3: ",arabic_transcripts['recognizedPhrases'][i]['nBest'][ii]['display']))





In [49]:
speaker

[('speaker 1: ', 'هنا.'),
 ('speaker 2: ', 'خبرني.'),
 ('speaker 1: ', 'شو المشكلة؟'),
 ('speaker 3: ', 'دكتور؟ أحس بألم في بطني؟'),
 ('speaker 2: ', 'من كم يوم.'),
 ('speaker 3: ', 'تقريبا من أسبوع.'),
 ('speaker 1: ', 'تحس بالألم قبل الأكل ولا بعد الأكل؟'),
 ('speaker 3: ', 'بعد الأكل؟'),
 ('speaker 2: ', 'إنزين.'),
 ('speaker 1: ', 'وين تحس بالألم؟ فوق ولا تحت؟'),
 ('speaker 3: ', 'اهني فوق؟'),
 ('speaker 2: ', 'الظاهر عندك حموضة؟'),
 ('speaker 2: ', 'بس لازم نسويلك فحوصات الزيادة عشان نتأكد.'),
 ('speaker 2: ', 'عندك تأمين؟'),
 ('speaker 3: ', 'هيه، عندي تأمين.')]

#### Azure OpenAI

In [9]:
#Note: The openai-python library support for Azure OpenAI is in preview.
import os
import openai
openai.api_type = "azure"
openai.api_base = "https://{resourcename}.openai.azure.com/"
openai.api_version = "2022-12-01"
openai.api_key = "OPENAI_API_KEY"

response = openai.Completion.create(
  engine="text-davinci-003",
  prompt="Translate the below conversation to English and then summarize.\n\n\n('speaker 1: ', 'هنا.'),\n ('speaker 2: ', 'خبرني.'),\n ('speaker 1: ', 'شو المشكلة؟'),\n ('speaker 3: ', 'دكتور؟ أحس بألم في بطني؟'),\n ('speaker 2: ', 'من كم يوم.'),\n ('speaker 3: ', 'تقريبا من أسبوع.'),\n ('speaker 1: ', 'تحس بالألم قبل الأكل ولا بعد الأكل؟'),\n ('speaker 3: ', 'بعد الأكل؟'),\n ('speaker 2: ', 'إنزين.'),\n ('speaker 1: ', 'وين تحس بالألم؟ فوق ولا تحت؟'),\n ('speaker 3: ', 'اهني فوق؟'),\n ('speaker 2: ', 'الظاهر عندك حموضة؟'),\n ('speaker 2: ', 'بس لازم نسويلك فحوصات الزيادة عشان نتأكد.'),\n ('speaker 2: ', 'عندك تأمين؟'),\n ('speaker 3: ', 'هيه، عندي تأمين.')\n\n",
  temperature=0.42,
  max_tokens=953,
  top_p=0.16,
  frequency_penalty=0,
  presence_penalty=0,
  best_of=1,
  stop=None)

In [2]:
from pprint import pprint
pprint(response.choices[0].text)

('\n'
 'Speaker 1: "Here."\n'
 'Speaker 2: "Tell me."\n'
 'Speaker 1: "What\'s the problem?"\n'
 'Speaker 3: "Doctor? I feel pain in my stomach?"\n'
 'Speaker 2: "How long?"\n'
 'Speaker 3: "About a week."\n'
 'Speaker 1: "Do you feel the pain before or after eating?"\n'
 'Speaker 3: "After eating?"\n'
 'Speaker 2: "Okay."\n'
 'Speaker 1: "Where do you feel the pain? Above or below?"\n'
 'Speaker 3: "Above?"\n'
 'Speaker 2: "Do you have acidity?"\n'
 'Speaker 2: "We need to do some tests to make sure."\n'
 'Speaker 2: "Do you have insurance?"\n'
 'Speaker 3: "Yes, I have insurance."\n'
 '\n'
 'Speaker 1 and 2 are asking Speaker 3 questions about their stomach pain. '
 'Speaker 3 has been feeling the pain for about a week and it occurs after '
 'eating. Speaker 2 suspects Speaker 3 has acidity and suggests they do some '
 'tests to confirm. Speaker 3 confirms they have insurance.')
