In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/ebooks-epubs/sample_01.epub
/kaggle/input/ebooks-epubs/Reverend_Insanity.epub


In [16]:
%%time
!sudo apt-get install ffmpeg

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 122 not upgraded.
CPU times: user 34.5 ms, sys: 22.1 ms, total: 56.6 ms
Wall time: 2.13 s


In [7]:
%%time
!pip uninstall pylibcugraph-cu12 -y

[0mCPU times: user 13.2 ms, sys: 18.8 ms, total: 32 ms
Wall time: 1.34 s


In [8]:
%%time
# !pip install ebooklib beautifulsoup4 kokoro>=0.9.2 soundfile torch pydub
!pip install ebooklib beautifulsoup4 kokoro>=0.9.2 soundfile torch pydub pylibraft-cu12==24.12.0 rmm-cu12==24.12.0

CPU times: user 43.6 ms, sys: 20.8 ms, total: 64.4 ms
Wall time: 3.5 s


In [9]:
!pip install tqdm



In [10]:
%%time
import os
import logging
import subprocess
from concurrent.futures import ThreadPoolExecutor
from ebooklib import epub
import ebooklib
from bs4 import BeautifulSoup
import soundfile as sf
import torch
import numpy as np
from tqdm import tqdm
import hashlib

# Placeholder for KPipeline (replace with your actual text-to-speech library)
class KPipeline:
    def __init__(self, lang_code, device):
        self.lang_code = lang_code
        self.device = device

    def __call__(self, text, voice):
        # Dummy implementation: replace with actual TTS logic
        import numpy as np
        for i in range(0, len(text), 100):  # Simulate chunked audio generation
            yield (None, None, np.random.rand(24000))  # 1-second dummy audio

class AudiobookCreator:
    def __init__(self, epub_path, output_dir="audiobooks", voice='af_heart', max_workers=4):
        """Initialize the AudiobookCreator with EPUB path and output settings."""
        self.epub_path = epub_path
        self.epub_name = os.path.splitext(os.path.basename(epub_path))[0]
        self.output_dir = os.path.join(output_dir, self.epub_name)
        self.txt_dir = os.path.join(self.output_dir, "txt")
        self.audio_dir = os.path.join(self.output_dir, "audio")
        self.voice = voice
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.pipeline = KPipeline(lang_code='a', device=self.device)  # Replace with actual TTS
        self.max_workers = max_workers
        self.chapter_names = []

        # Set up logging
        self.logger = logging.getLogger(__name__)
        self.logger.setLevel(logging.INFO)
        handler = logging.StreamHandler()
        handler.setFormatter(logging.Formatter('%(levelname)s: %(message)s'))
        self.logger.addHandler(handler)

    def extract_chapters(self):
        """
        **Improvement 1: Enhanced Chapter Extraction**
        Extract chapters from EPUB, using TOC for titles or falling back to headings.
        """
        os.makedirs(self.txt_dir, exist_ok=True)
        try:
            book = epub.read_epub(self.epub_path)
        except Exception as e:
            self.logger.error(f"Failed to read EPUB file: {e}")
            return

        # Extract chapter names from TOC
        for item in book.toc:
            if isinstance(item, epub.Link):
                self.chapter_names.append(item.title)

        chapter_count = 0
        items = list(book.get_items_of_type(ebooklib.ITEM_DOCUMENT))
        for item in tqdm(items, desc="Extracting chapters"):
            chapter_count += 1
            content = item.get_content().decode('utf-8')
            soup = BeautifulSoup(content, 'html.parser')
            text = soup.get_text(separator='\n', strip=True)

            # Determine chapter title
            if chapter_count <= len(self.chapter_names):
                chapter_name = self.chapter_names[chapter_count - 1]
            else:
                # Fallback to first heading
                heading = soup.find(['h1', 'h2'])
                chapter_name = heading.get_text(strip=True) if heading else f"Chapter {chapter_count}"

            chapter_file = os.path.join(self.txt_dir, f"{chapter_name}.txt")
            try:
                with open(chapter_file, 'w', encoding='utf-8') as f:
                    f.write(text)
                self.logger.info(f"Saved chapter text: {chapter_file}")
            except Exception as e:
                self.logger.error(f"Error writing {chapter_file}: {e}")

    def text_to_audio(self):
        """
        **Improvement 2: Parallel Audio Generation**
        **Improvement 5: Robust Error Handling**
        **Improvement 6: Audio Caching**
        Convert text files to audio with parallel processing, retries, and caching.
        """
        os.makedirs(self.audio_dir, exist_ok=True)
        txt_files = [f for f in os.listdir(self.txt_dir) if f.endswith('.txt')]

        def generate_audio(txt_file):
            chapter_name = os.path.splitext(txt_file)[0]
            chapter_path = os.path.join(self.txt_dir, txt_file)
            audio_path = os.path.join(self.audio_dir, f"{chapter_name}.wav")
            cached_hash_path = audio_path + '.md5'

            # Check cache (Improvement 6)
            if os.path.exists(audio_path):
                with open(chapter_path, 'r', encoding='utf-8') as f:
                    text_hash = hashlib.md5(f.read().encode()).hexdigest()
                if os.path.exists(cached_hash_path):
                    with open(cached_hash_path, 'r') as f:
                        cached_hash = f.read()
                    if cached_hash == text_hash:
                        self.logger.info(f"Using cached audio for {chapter_name}")
                        return

            # Generate audio with retries (Improvement 5)
            for attempt in range(3):  # Retry up to 3 times
                try:
                    with open(chapter_path, "r", encoding="utf-8") as f:
                        text = f.read()
                    generator = self.pipeline(text, voice=self.voice)
                    combined_audio = []
                    for _, _, audio in generator:
                        combined_audio.append(audio)
                    combined_audio = np.concatenate(combined_audio)
                    sf.write(audio_path, combined_audio, 24000)
                    # Save hash for caching
                    with open(cached_hash_path, 'w') as f:
                        f.write(text_hash)
                    self.logger.info(f"Saved audio: {audio_path}")
                    break
                except Exception as e:
                    self.logger.error(f"Error converting {chapter_path} (attempt {attempt+1}): {e}")
                    if attempt == 2:
                        raise

        # Parallel processing (Improvement 2)
        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
            list(tqdm(executor.map(generate_audio, txt_files), total=len(txt_files), desc="Generating audio"))

    def create_audiobook(self):
        """
        **Improvement 3: Optimized Audio Combination**
        Combine audio files into a single M4B audiobook using ffmpeg.
        """
        audio_files = sorted([f for f in os.listdir(self.audio_dir) if f.endswith(".wav")])
        concat_file = os.path.join(self.output_dir, "concat.txt")
        with open(concat_file, 'w') as f:
            for audio_file in audio_files:
                f.write(f"file '{os.path.join(self.audio_dir, audio_file)}'\n")

        combined_path = os.path.join(self.output_dir, f"{self.epub_name}.m4b")
        cmd = [
            "ffmpeg", "-f", "concat", "-safe", "0", "-i", concat_file,
            "-c", "copy", combined_path
        ]
        try:
            subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            self.logger.info(f"Audiobook created: {combined_path}")
        except subprocess.CalledProcessError as e:
            self.logger.error(f"FFmpeg error: {e.stderr.decode()}")

    def run(self):
        """Execute the full audiobook creation process."""
        self.extract_chapters()
        self.text_to_audio()
        self.create_audiobook()

CPU times: user 83 µs, sys: 18 µs, total: 101 µs
Wall time: 107 µs


In [14]:
%%time
import os
import logging
import subprocess
from concurrent.futures import ThreadPoolExecutor
from ebooklib import epub
import ebooklib
from bs4 import BeautifulSoup
import soundfile as sf
import torch
import numpy as np
from tqdm import tqdm
import hashlib

# Placeholder for KPipeline (replace with your actual text-to-speech library)
class KPipeline:
    def __init__(self, lang_code, device):
        self.lang_code = lang_code
        self.device = device

    def __call__(self, text, voice):
        # Dummy implementation: replace with actual TTS logic
        for i in range(0, len(text), 100):  # Simulate chunked audio generation
            yield (None, None, np.random.rand(24000))  # 1-second dummy audio

class AudiobookCreator:
    def __init__(self, epub_path, output_dir="audiobooks", voice='af_heart', max_workers=4):
        """Initialize the AudiobookCreator with EPUB path and output settings."""
        self.epub_path = epub_path
        self.epub_name = os.path.splitext(os.path.basename(epub_path))[0]
        self.output_dir = os.path.join(output_dir, self.epub_name)
        self.txt_dir = os.path.join(self.output_dir, "txt")
        self.audio_dir = os.path.join(self.output_dir, "audio")
        self.voice = voice
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.pipeline = KPipeline(lang_code='a', device=self.device)  # Replace with actual TTS
        self.max_workers = max_workers
        self.chapter_names = []

        # Set up logging
        self.logger = logging.getLogger(__name__)
        self.logger.setLevel(logging.INFO)
        handler = logging.StreamHandler()
        handler.setFormatter(logging.Formatter('%(levelname)s: %(message)s'))
        self.logger.addHandler(handler)

    def extract_chapters(self):
        """
        Extract chapters from EPUB, using TOC for titles or falling back to headings.
        """
        os.makedirs(self.txt_dir, exist_ok=True)
        try:
            book = epub.read_epub(self.epub_path)
        except Exception as e:
            self.logger.error(f"Failed to read EPUB file: {e}")
            return

        # Extract chapter names from TOC
        for item in book.toc:
            if isinstance(item, epub.Link):
                self.chapter_names.append(item.title)

        chapter_count = 0
        items = list(book.get_items_of_type(ebooklib.ITEM_DOCUMENT))
        for item in tqdm(items, desc="Extracting chapters"):
            chapter_count += 1
            content = item.get_content().decode('utf-8')
            soup = BeautifulSoup(content, 'html.parser')
            text = soup.get_text(separator='\n', strip=True)

            # Determine chapter title
            if chapter_count <= len(self.chapter_names):
                chapter_name = self.chapter_names[chapter_count - 1]
            else:
                # Fallback to first heading
                heading = soup.find(['h1', 'h2'])
                chapter_name = heading.get_text(strip=True) if heading else f"Chapter {chapter_count}"

            chapter_file = os.path.join(self.txt_dir, f"{chapter_name}.txt")
            try:
                with open(chapter_file, 'w', encoding='utf-8') as f:
                    f.write(text)
                self.logger.info(f"Saved chapter text: {chapter_file}")
            except Exception as e:
                self.logger.error(f"Error writing {chapter_file}: {e}")

    def text_to_audio(self):
        """
        Convert text files to audio with parallel processing, retries, and caching.
        """
        os.makedirs(self.audio_dir, exist_ok=True)
        txt_files = [f for f in os.listdir(self.txt_dir) if f.endswith('.txt')]

        def generate_audio(txt_file):
            chapter_name = os.path.splitext(txt_file)[0]
            chapter_path = os.path.join(self.txt_dir, txt_file)
            audio_path = os.path.join(self.audio_dir, f"{chapter_name}.wav")
            cached_hash_path = audio_path + '.md5'

            # Check cache
            try:
                with open(chapter_path, 'r', encoding='utf-8') as f:
                    text = f.read()
                text_hash = hashlib.md5(text.encode()).hexdigest()
                if os.path.exists(audio_path) and os.path.exists(cached_hash_path):
                    with open(cached_hash_path, 'r') as f:
                        cached_hash = f.read().strip()
                    if cached_hash == text_hash:
                        self.logger.info(f"Using cached audio for {chapter_name}")
                        return
            except Exception as e:
                self.logger.error(f"Error checking cache for {chapter_path}: {e}")

            # Generate audio with retries
            for attempt in range(3):  # Retry up to 3 times
                try:
                    generator = self.pipeline(text, voice=self.voice)
                    combined_audio = []
                    for _, _, audio in generator:
                        combined_audio.append(audio)
                    combined_audio = np.concatenate(combined_audio)
                    sf.write(audio_path, combined_audio, 24000)
                    # Save hash for caching
                    with open(cached_hash_path, 'w') as f:
                        f.write(text_hash)
                    self.logger.info(f"Saved audio: {audio_path}")
                    break
                except Exception as e:
                    self.logger.error(f"Error converting {chapter_path} (attempt {attempt+1}): {e}")
                    if attempt == 2:
                        self.logger.error(f"Failed to convert {chapter_path} after 3 attempts")
                        raise

        # Parallel processing
        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
            list(tqdm(executor.map(generate_audio, txt_files), total=len(txt_files), desc="Generating audio"))

    def create_audiobook(self):
        """
        Combine audio files into a single M4B audiobook using ffmpeg.
        """
        audio_files = sorted([f for f in os.listdir(self.audio_dir) if f.endswith(".wav")])
        concat_file = os.path.join(self.output_dir, "concat.txt")
        with open(concat_file, 'w') as f:
            for audio_file in audio_files:
                f.write(f"file '{os.path.join(self.audio_dir, audio_file)}'\n")

        combined_path = os.path.join(self.output_dir, f"{self.epub_name}.m4b")
        cmd = [
            "ffmpeg", "-f", "concat", "-safe", "0", "-i", concat_file,
            "-c", "copy", combined_path
        ]
        try:
            subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            self.logger.info(f"Audiobook created: {combined_path}")
        except subprocess.CalledProcessError as e:
            self.logger.error(f"FFmpeg error: {e.stderr.decode()}")

    def run(self):
        """Execute the full audiobook creation process."""
        self.extract_chapters()
        self.text_to_audio()
        self.create_audiobook()

CPU times: user 52 µs, sys: 12 µs, total: 64 µs
Wall time: 67.7 µs


In [17]:
%%time
# Example usage
if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    creator = AudiobookCreator("/kaggle/input/ebooks-epubs/sample_01.epub")
    creator.run()

  for root_file in tree.findall('//xmlns:rootfile[@media-type]', namespaces={'xmlns': NAMESPACES['CONTAINERNS']}):
Extracting chapters:   0%|          | 0/21 [00:00<?, ?it/s]INFO: Saved chapter text: audiobooks/sample_01/txt/Chapter 1.txt
INFO: Saved chapter text: audiobooks/sample_01/txt/Chapter 1.txt
INFO: Saved chapter text: audiobooks/sample_01/txt/Chapter 1.txt
INFO: Saved chapter text: audiobooks/sample_01/txt/Chapter 1.txt
INFO: Saved chapter text: audiobooks/sample_01/txt/Chapter 2.txt
INFO: Saved chapter text: audiobooks/sample_01/txt/Chapter 2.txt
INFO: Saved chapter text: audiobooks/sample_01/txt/Chapter 2.txt
INFO: Saved chapter text: audiobooks/sample_01/txt/Chapter 2.txt
INFO: Saved chapter text: audiobooks/sample_01/txt/Chapter 3.txt
INFO: Saved chapter text: audiobooks/sample_01/txt/Chapter 3.txt
INFO: Saved chapter text: audiobooks/sample_01/txt/Chapter 3.txt
INFO: Saved chapter text: audiobooks/sample_01/txt/Chapter 3.txt
INFO: Saved chapter text: audiobooks/sample_01

CPU times: user 225 ms, sys: 47.2 ms, total: 272 ms
Wall time: 332 ms
