## Text-to-Speech generation with Azure OpenAI TTS models

In [1]:
# Importing required packages
from openai import AzureOpenAI
import IPython
import requests
import os

In [2]:
# Extracting environment variables
AOAI_API_BASE = os.getenv("AZURE_OPENAI_API_BASE")
AOAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AOAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION")
AOAI_DEPLOYMENT = os.getenv("AZURE_OPENAI_API_DEPLOY_TTS")

In [3]:
# Setting destination files for TTS output
TTS_OUTPUT1 = os.path.join(os.getcwd(), "menu1.mp3")
TTS_OUTPUT2 = os.path.join(os.getcwd(), "menu2.mp3")

### Option 1: Python SDK v1

In [4]:
# Instantiate Azure OpenAI client
client = AzureOpenAI(
    api_version = AOAI_API_VERSION,
    api_key = AOAI_API_KEY,
    azure_deployment = AOAI_DEPLOYMENT,
    azure_endpoint = AOAI_API_BASE
)

In [5]:
# Producing text-to-speech
response = client.audio.speech.create(
    model = "tts-1",
    voice = "alloy",
    input = "Menu options include vegetarian lasagna, beef bourguignon, and pan-seared salmon."
)

In [6]:
# Saving TTS output to file
response.write_to_file(TTS_OUTPUT1)

In [7]:
# Playing generated MP3 file
IPython.display.Audio(TTS_OUTPUT1)

### Option 2: REST API

In [8]:
# Preparing endpoint, headers and request payload
endpoint = f"{AOAI_API_BASE}openai/deployments/{AOAI_DEPLOYMENT}/audio/speech?api-version={AOAI_API_VERSION}"
headers = {   
    "Content-Type": "application/json",   
    "api-key": AOAI_API_KEY,
}     
data = {
    "model": "tts-1",
    "voice": "onyx",
    "input": "Menu options include vegetarian lasagna, beef bourguignon, and pan-seared salmon."
} 

In [9]:
# Calling Azure OpenAI endpoint via REST API
response = requests.post(
    url = endpoint,
    headers = headers,
    json = data
)

# Checking the response
if response.status_code == 200:
    # Geting the audio content
    audio_content = response.content

    # Saving TTS output to file
    with open(TTS_OUTPUT2, "wb") as f:
        f.write(audio_content)
        print("Audio downloaded and saved as MP3 file")
else:
    print(f"Error: API call failed with status code {response.status_code}")

Audio downloaded and saved as MP3 file


In [10]:
# Playing generated MP3 file
IPython.display.Audio(TTS_OUTPUT2)