### Зависимости

In [None]:
!pip install httpx aiofiles

### Импорты

In [None]:
import httpx
from datetime import datetime as dt
from uuid import uuid4
import IPython
import aiofiles as aiof

In [None]:
AUTH_KEY = (
    ...
)
SBER_API_SCOPE = "SALUTE_SPEECH_PERS"
TOKEN = {"access_token": "", "expires_at": 0}
SBER_OAUTH_URL = "https://ngw.devices.sberbank.ru:9443/api/v2/oauth"
SBER_SPEACH_URL = "https://smartspeech.sber.ru/rest/v1"
SPEECH_FILE_FORMAT = "opus"
SPEECH_FILE_EXTENTION = "ogg"
SPEECH_CONTENT_TYPE = "audio/ogg;codecs=opus"
VOICE_TYPE="May_24000"

### Функция получения `access_token`а

обновляет `access_token` если он не определён или его срок истёк

In [None]:
async def get_access_token() -> str:
    if (
        not TOKEN["access_token"]
        or dt.fromtimestamp(TOKEN["expires_at"] / 1000) < dt.now()
    ):
        headers = {
            "Content-Type": "application/x-www-form-urlencoded",
            "Accept": "application/json",
            "RqUID": str(uuid4()),
            "Authorization": f"Basic {AUTH_KEY}",
        }
        async with httpx.AsyncClient(verify=False) as client:
            resp = await client.post(
                url=SBER_OAUTH_URL,
                headers=headers,
                data={"scope": SBER_API_SCOPE},
            )
            if resp.status_code != 200:
                print(resp.content)
                return ""
        token = resp.json()
        TOKEN["access_token"] = token["access_token"]
        TOKEN["expires_at"] = token["expires_at"]
    return TOKEN["access_token"]

### Функция преобразования текста в аудиофайл

принимает `text` - текст для преобразования в аудиофайл

возвращает `file_name` - имя сгенерированного аудиофайла

In [None]:
async def text_to_speech(text: str) -> str:
    acc_token: str = await get_access_token()
    if not acc_token:
        print(f"Authentication error")
        return ""
    headers = {
        "Authorization": f"Bearer {acc_token}",
        "Content-Type": "application/text",
    }
    params = {
        "format": SPEECH_FILE_FORMAT,
        "voice": VOICE_TYPE,
    }
    async with httpx.AsyncClient(verify=False) as client:
        resp = await client.post(
            url=f"{SBER_SPEACH_URL}/text:synthesize",
            headers=headers,
            params=params,
            content=text,
        )
        if resp.status_code != 200:
            print(resp.content)
            return ""
    file_name = f"speech_{uuid4()}.ogg"
    async with aiof.open(file_name, "wb") as f:
        await f.write(resp.content)
        await f.flush()
    return file_name

### Функция для распознования текста из аудиофайла

принимает:

`speech_file` - имя файла для распознования

`language` - язык для распознования (доступен русский `ru-RU`, английский `en-US` и казахский `kk-KZ`)

возвращает:

`speech_text` - распознанный текст

In [None]:
async def speech_to_text(speech_file: str, language: str) -> str:
    acc_token: str = await get_access_token()
    if not acc_token:
        print(f"Authentication error")
        return ""
    headers = {
        "Authorization": f"Bearer {acc_token}",
        "Content-Type": SPEECH_CONTENT_TYPE,
    }
    params = {
        "language": language,
    }
    data = b""
    async with aiof.open(speech_file, "rb") as f:
        data = await f.read()
    async with httpx.AsyncClient(verify=False) as client:
        resp = await client.post(
            url=f"{SBER_SPEACH_URL}/speech:recognize",
            headers=headers,
            params=params,
            content=data,
        )
        if resp.status_code != 200:
            print(resp.content)
            return ""
    speech_text: str = resp.json()["result"][0]
    return speech_text

### Примеры использования

In [None]:
# example of speech generation from text
speech_file_name: str = await text_to_speech(
    "Расскажи про самый большой памятник Ленину среди четырёх городов "
    "России, таких как Екатеринбург, Нижний Новгород, Владимир, Ярославль."
)
print(speech_file_name)
# example of speech recognition from audio file
recognized_text: str = await speech_to_text(speech_file_name, "ru-RU")
print(f"Recognized text is: {recognized_text}")
IPython.display.Audio(speech_file_name)