In [6]:
# ⬇️ Устанавливаем pydub и ffmpeg
!pip install pydub --quiet
!apt-get install -y ffmpeg

from pydub.utils import mediainfo
import os

# ⬇️ Путь к папке с MP3
output_dir = "output"

# ⬇️ Выводим длительности
for filename in sorted(os.listdir(output_dir)):
    if filename.endswith(".mp3"):
        filepath = os.path.join(output_dir, filename)
        info = mediainfo(filepath)
        duration = float(info['duration'])
        minutes = int(duration // 60)
        seconds = int(duration % 60)
        print(f"{filename}: {minutes}:{seconds:02} мин ({duration:.2f} сек)")

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.
01.mp3: 8:31 мин (511.51 сек)
02.mp3: 8:41 мин (521.66 сек)
03.mp3: 8:45 мин (525.41 сек)
04.mp3: 9:10 мин (550.97 сек)
05.mp3: 8:22 мин (502.99 сек)
06.mp3: 8:24 мин (504.14 сек)
07.mp3: 9:07 мин (547.68 сек)
08.mp3: 9:57 мин (597.50 сек)
09.mp3: 8:35 мин (515.66 сек)
10.mp3: 9:03 мин (543.26 сек)
11.mp3: 8:52 мин (532.25 сек)
12.mp3: 8:59 мин (539.66 сек)
13.mp3: 8:34 мин (514.32 сек)
14.mp3: 8:48 мин (528.70 сек)
15.mp3: 8:28 мин (508.73 сек)
16.mp3: 9:14 мин (554.66 сек)
17.mp3: 8:58 мин (538.03 сек)
18.mp3: 8:45 мин (525.98 сек)
19.mp3: 8:46 мин (526.15 сек)
20.mp3: 8:57 мин (537.72 сек)
21.mp3: 8:47 мин (527.76 сек)
22.mp3: 8:49 мин (529.82 сек)
23.mp3: 8:13 мин (493.99 сек)
24.mp3: 8:41 мин (521.74 сек)
25.mp3: 8:42 мин (522.38 сек)
26.mp3: 8:43 мин (523.49

In [5]:
# ⬇️ Создаем ZIP-архив из папки output
import shutil
shutil.make_archive("audio_chunks", 'zip', OUTPUT_DIR)

# ⬇️ Выводим ссылку на скачивание архива
from google.colab import files
files.download("audio_chunks.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [4]:
import os
import asyncio
import nltk
import edge_tts
import nest_asyncio
nest_asyncio.apply()

# Загрузка токенизатора
nltk.download('punkt_tab')

# Параметры
INPUT_FILE = "fantom.txt"
OUTPUT_DIR = "output"
VOICE = "ru-RU-SvetlanaNeural"
MAX_LEN = 7000  # с пробелами

# Создаём выходную папку
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Функция: разбиваем на чанки по предложениям
from nltk.tokenize import sent_tokenize

def split_text_into_chunks(text, max_len=MAX_LEN):
    sentences = sent_tokenize(text)
    chunks = []
    current_chunk = ""

    for sentence in sentences:
        if len(sentence) > max_len:
            if current_chunk:
                chunks.append(current_chunk.strip())
                current_chunk = ""
            for i in range(0, len(sentence), max_len):
                chunks.append(sentence[i:i+max_len].strip())
            continue

        if len(current_chunk) + len(sentence) + 1 <= max_len:
            current_chunk += " " + sentence
        else:
            chunks.append(current_chunk.strip())
            current_chunk = sentence

    if current_chunk:
        chunks.append(current_chunk.strip())

    return chunks

# Основной процесс
async def process_chunks():
    # Чтение текста
    with open(INPUT_FILE, "r", encoding="utf-8") as f:
        text = f.read()

    chunks = split_text_into_chunks(text)

    for i, chunk in enumerate(chunks, 1):
        communicate = edge_tts.Communicate(chunk, VOICE)
        filename = os.path.join(OUTPUT_DIR, f"{i:02}.mp3")
        await communicate.save(filename)
        print(f"Сохранено: {filename}")

# Запуск
await process_chunks()

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


Сохранено: output/01.mp3
Сохранено: output/02.mp3
Сохранено: output/03.mp3
Сохранено: output/04.mp3
Сохранено: output/05.mp3
Сохранено: output/06.mp3
Сохранено: output/07.mp3
Сохранено: output/08.mp3
Сохранено: output/09.mp3
Сохранено: output/10.mp3
Сохранено: output/11.mp3
Сохранено: output/12.mp3
Сохранено: output/13.mp3
Сохранено: output/14.mp3
Сохранено: output/15.mp3
Сохранено: output/16.mp3
Сохранено: output/17.mp3
Сохранено: output/18.mp3
Сохранено: output/19.mp3
Сохранено: output/20.mp3
Сохранено: output/21.mp3
Сохранено: output/22.mp3
Сохранено: output/23.mp3
Сохранено: output/24.mp3
Сохранено: output/25.mp3
Сохранено: output/26.mp3
Сохранено: output/27.mp3
Сохранено: output/28.mp3
Сохранено: output/29.mp3
Сохранено: output/30.mp3
Сохранено: output/31.mp3
Сохранено: output/32.mp3
Сохранено: output/33.mp3
Сохранено: output/34.mp3
Сохранено: output/35.mp3
Сохранено: output/36.mp3
Сохранено: output/37.mp3
Сохранено: output/38.mp3
Сохранено: output/39.mp3
Сохранено: output/40.mp3


CancelledError: 

In [1]:
pip install edge-tts nltk

Collecting edge-tts
  Downloading edge_tts-7.0.2-py3-none-any.whl.metadata (5.5 kB)
Collecting srt<4.0.0,>=3.4.1 (from edge-tts)
  Downloading srt-3.5.3.tar.gz (28 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading edge_tts-7.0.2-py3-none-any.whl (26 kB)
Building wheels for collected packages: srt
  Building wheel for srt (setup.py) ... [?25l[?25hdone
  Created wheel for srt: filename=srt-3.5.3-py3-none-any.whl size=22427 sha256=4c54be4cf15781148994577ace19a982f42f733990d7667cae5428796ebe94b4
  Stored in directory: /root/.cache/pip/wheels/1f/43/f1/23ee9119497fcb57d9f7046fbf34c6d9027c46a1fa7824cf08
Successfully built srt
Installing collected packages: srt, edge-tts
Successfully installed edge-tts-7.0.2 srt-3.5.3


In [None]:
import os
import asyncio
import edge_tts
import nest_asyncio
nest_asyncio.apply()

# Папка с главами
chapters_dir = "chapters_txt"
audio_dir = "chapters_mp3"
vtt_dir = "chapters_vtt"

os.makedirs(audio_dir, exist_ok=True)
os.makedirs(vtt_dir, exist_ok=True)

# Голос по умолчанию (можно заменить на любой из SUPPORTED_VOICES)
VOICE = "ru-RU-SvetlanaNeural"
RATE = "+0%"
VOLUME = "+0%"

# Обработка одного файла
async def convert_one(txt_path, mp3_path, vtt_path):
    with open(txt_path, "r", encoding="utf-8") as f:
        text = f.read()

    communicate = edge_tts.Communicate(text=text, voice=VOICE, rate=RATE, volume=VOLUME)
    submaker = edge_tts.SubMaker()
    words = []
    with open(mp3_path, "wb") as audio_file:
        async for chunk in communicate.stream():
            if chunk["type"] == "audio":
                audio_file.write(chunk["data"])
            elif chunk["type"] == "WordBoundary":
                words.append(chunk["text"])

    # Записываем субтитры без времени — просто слова через пробел или построчно
    with open(vtt_path, "w", encoding="utf-8") as f:
        # Можно сохранить просто все слова через пробел:
        f.write(" ".join(words))

# Обработка всех файлов
async def batch_convert():
    files = sorted(f for f in os.listdir(chapters_dir) if f.endswith(".txt"))
    for file in files:
        name = os.path.splitext(file)[0]  # например, Глава_01
        txt_path = os.path.join(chapters_dir, file)
        mp3_path = os.path.join(audio_dir, name + ".mp3")
        vtt_path = os.path.join(vtt_dir, name + ".vtt")
        print(f"🎙️ {file} → 🎧 {mp3_path}, 📝 {vtt_path}")
        await convert_one(txt_path, mp3_path, vtt_path)
# Запускаем
await batch_convert()

print("✅ Все главы озвучены и сохранены в", output_dir)

🎙️ 01.txt → 🎧 chapters_mp3/01.mp3, 📝 chapters_vtt/01.vtt
🎙️ 02.txt → 🎧 chapters_mp3/02.mp3, 📝 chapters_vtt/02.vtt
🎙️ 03.txt → 🎧 chapters_mp3/03.mp3, 📝 chapters_vtt/03.vtt
🎙️ 04.txt → 🎧 chapters_mp3/04.mp3, 📝 chapters_vtt/04.vtt
🎙️ 05.txt → 🎧 chapters_mp3/05.mp3, 📝 chapters_vtt/05.vtt


In [None]:
import os
import re
import shutil
from docx import Document

# Путь к docx-файлу
input_file = "Fantom.docx"  # замените на путь к вашей книге
output_dir = "chapters_txt"

# Очистить папку, если она уже существует
if os.path.exists(output_dir):
    shutil.rmtree(output_dir)
os.makedirs(output_dir, exist_ok=True)

# Загружаем документ
doc = Document(input_file)

chapters = []
current_chapter = []
chapter_num = 0

# Шаблон для определения начала главы
chapter_pattern = re.compile(r"^Глава\s+\d+", re.IGNORECASE)

# Разбиваем по главам
for para in doc.paragraphs:
    text = para.text.strip()
    if not text:
        continue

    if chapter_pattern.match(text):
        if current_chapter:
            chapters.append(current_chapter)
            current_chapter = []
        chapter_num += 1
    current_chapter.append(text)

if current_chapter:
    chapters.append(current_chapter)

# Количество цифр в номере файла, например: 01, 02, ..., 10, 11 и т.д.
digits = len(str(len(chapters)))

# Сохраняем главы
for i, chapter in enumerate(chapters, 1):
    filename = os.path.join(output_dir, f"{i:0{digits}}.txt")
    with open(filename, "w", encoding="utf-8") as f:
        f.write("\n\n".join(chapter))

print(f"✅ Разделено на {len(chapters)} глав. Файлы сохранены в папке '{output_dir}'")

✅ Разделено на 25 глав. Файлы сохранены в папке 'chapters_txt'


In [None]:
pip install python-docx

Collecting python-docx
  Downloading python_docx-1.2.0-py3-none-any.whl.metadata (2.0 kB)
Downloading python_docx-1.2.0-py3-none-any.whl (252 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/253.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━[0m [32m163.8/253.0 kB[0m [31m4.6 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.0/253.0 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-docx
Successfully installed python-docx-1.2.0


In [None]:
import gradio as gr
import edge_tts
import asyncio
import os
# https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4 - там голоса брать. думаю поймете. ShortName короче
SUPPORTED_VOICES = {
    'DmitryNeural-Руский(муж.)': 'ru-RU-DmitryNeural',
    'SvetlanaNeural-Русский(жен.)': 'ru-RU-SvetlanaNeural',
    'OstapNeural-Украинский(муж.)': 'uk-UA-OstapNeural',
    'PolinaNeural-Украинский(жен.)': 'uk-UA-PolinaNeural'
}

# Смена голоса
def changeVoice(voices):
    example = SUPPORTED_VOICES[voices]
    example_file = os.path.join(os.getcwd(), "example/"+example+".wav")
    return example_file

# Преобразование текста в речь
async def textToSpeech(text, voices, rate, volume):
    output_file = "output.mp3"
    voices = SUPPORTED_VOICES[voices]
    if (rate >= 0):
        rates = rate = "+" + str(rate) + "%"
    else:
        rates = str(rate) + "%"
    if (volume >= 0):
        volumes = "+" + str(volume) + "%"
    else:
        volumes = str(volume) + "%"
    communicate = edge_tts.Communicate(text,
                                       voices,
                                       rate=rates,
                                       volume=volumes,
                                       proxy=None)
    await communicate.save(output_file)
    audio_file = os.path.join(os.getcwd(), "/content/output.mp3")
    if (os.path.exists(audio_file)):
        return audio_file
    else:
        raise gr.Error("Преобразование не удалось！")
        return FileNotFoundError


# Сбросить результат конвертации
def clearSpeech():
    output_file = os.path.join(os.getcwd(), "output.mp3")
    if (os.path.exists(output_file)):
        os.remove(output_file)
    return None, None


with gr.Blocks(css="style.css", title="Преобразование текста в речь") as demo:
    gr.Markdown("""
    # Преобразование текста в речь через Microsoft Edge
    """)
    with gr.Row():
        with gr.Column():
            text = gr.TextArea(label="Текст", elem_classes="text-area")
            btn = gr.Button("Сгенерировать", elem_id="submit-btn")
        with gr.Column():
            voices = gr.Dropdown(choices=[
                "DmitryNeural-Руский(муж.)", "SvetlanaNeural-Русский(жен.)", "OstapNeural-Украинский(муж.)", "PolinaNeural-Украинский(жен.)"
            ],
                                 value="DmitryNeural-Руский(муж.)",
                                 label="Голос",
                                 info="Пожалуйста, выберите голос",
                                 interactive=True)

            example = gr.Audio(label="Пример голоса",
                              value="/content/rus-edge-tts-webui/example/ru-RU-DmitryNeural.wav",
                              interactive=False,
                              elem_classes="example")

            voices.change(fn=changeVoice,inputs=voices,outputs=example)
            rate = gr.Slider(-100,
                             100,
                             step=1,
                             value=0,
                             label="Увеличение / уменьшение скорости речи",
                             info="Скорость речи быстрее / медленнее",
                             interactive=True)

            volume = gr.Slider(-100,
                               100,
                               step=1,
                               value=0,
                               label="Увеличение / уменьшение громкости звука",
                               info="Увеличить / уменьшить громкость звука",
                               interactive=True)
            audio = gr.Audio(label="Результат",
                             interactive=False,
                             elem_classes="audio")
            btn.click(fn=textToSpeech,
                      inputs=[text, voices, rate, volume],
                      outputs=[audio])

if __name__ == "__main__":
    demo.launch(share=True, debug=True)


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://60bb20847775575d17.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7862 <> https://60bb20847775575d17.gradio.live


In [None]:
!pip install edge-tts
!pip install gradio
!pip install asyncio
!git clone https://github.com/hinaichigo-fox/rus-edge-tts-webui.git

fatal: destination path 'rus-edge-tts-webui' already exists and is not an empty directory.
