In [2]:
# First set the locale to UTF-8
import os
os.environ["PYTHONIOENCODING"] = "utf-8"
os.environ["LC_ALL"] = "C.UTF-8"
os.environ["LANG"] = "C.UTF-8"

# Install required packages
!pip install kokoro>=0.8.2 soundfile
!apt-get update -qq && apt-get -qq -y install espeak-ng > /dev/null 2>&1

import json
import glob
import logging
import traceback
import numpy as np
import soundfile as sf
from pathlib import Path
from kokoro import KPipeline

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def setup_directories():
    """Create necessary directories if they don't exist."""
    # Define the main paths
    storage_dir = Path("Storage")
    temp_texts_dir = storage_dir / "temp_texts"
    temp_audios_dir = storage_dir / "temp_audios"

    # Create directories
    storage_dir.mkdir(exist_ok=True)
    temp_texts_dir.mkdir(exist_ok=True)
    temp_audios_dir.mkdir(parents=True, exist_ok=True)

    return storage_dir, temp_texts_dir, temp_audios_dir

def load_book_json(filepath):
    """Load and parse book data from JSON file."""
    try:
        with open(filepath, 'r', encoding='utf-8') as file:
            return json.load(file)
    except Exception as e:
        logger.error(f"Error loading JSON from {filepath}: {e}")
        logger.error(traceback.format_exc())
        return None

def generate_voice(book_data, book_id, output_dir, voice='af_heart', language='a'):
    """Generate voice audio from book data using Kokoro."""
    try:
        # Initialize Kokoro pipeline
        logger.info(f"Initializing Kokoro TTS model for book {book_id}...")
        pipeline = KPipeline(lang_code=language)

        # Create the content to narrate
        title = book_data.get("title", "Unknown Title")
        author = book_data.get("author", "Unknown Author")
        synopsis = book_data.get("synopsis", "No synopsis available.")

        # Format the text
        content = f"{title} by {author}. Synopsis: {synopsis}"

        # Add outro
        outro = "Thank you for reading. For more, follow us and create good habits."
        full_content = f"{content} {outro}"

        # Check if the text is too long
        if len(full_content) > 5000:  # Arbitrary limit, adjust as needed
            logger.warning(f"Text for book {book_id} is very long ({len(full_content)} chars). Truncating.")
            full_content = full_content[:5000] + "..."

        output_file = output_dir / f"voice_{book_id}.wav"
        logger.info(f"Generating audio for book {book_id}: {title}")

        # Prepare text for Kokoro (split into paragraphs if needed)
        paragraphs = [p.strip() for p in full_content.split('\n\n') if p.strip()]
        if not paragraphs:
            paragraphs = [full_content]

        # Process each paragraph and collect audio
        audio_segments = []
        for i, para in enumerate(paragraphs):
            try:
                logger.info(f"Processing paragraph {i+1}/{len(paragraphs)} for book {book_id}")
                generator = pipeline(para, voice=voice, speed=1)

                for j, (gs, ps, audio) in enumerate(generator):
                    if audio is not None and len(audio) > 0:
                        audio_segments.append(audio)
                        logger.info(f"Generated segment {j+1} for paragraph {i+1}")
                    else:
                        logger.warning(f"Empty audio segment {j+1} for paragraph {i+1}")
            except Exception as para_e:
                logger.error(f"Error processing paragraph {i+1}: {para_e}")
                # Continue with next paragraph instead of failing the whole book

        # Concatenate all segments
        if audio_segments:
            combined_audio = np.concatenate(audio_segments)
            sf.write(str(output_file), combined_audio, 24000)
            logger.info(f"Audio successfully generated: {output_file}")
            return str(output_file)
        else:
            logger.error(f"No audio segments generated for book {book_id}")
            return None

    except Exception as e:
        logger.error(f"Error generating voice for book {book_id}: {e}")
        logger.error(traceback.format_exc())
        return None

def main():
    """Main function to process books and generate audio."""
    try:
        # Set up directories
        storage_dir, temp_texts_dir, temp_audios_dir = setup_directories()

        # Find all book JSON files
        book_files = glob.glob(str(temp_texts_dir / "book_*.json"))

        if not book_files:
            logger.warning(f"No book files found in {temp_texts_dir}")
            # Create sample book files for testing if none exist
            for i in range(1, 6):
                sample_book = {
                    "title": f"Sample Book {i}",
                    "author": "Test Author",
                    "synopsis": "This is a sample book for testing the TTS functionality."
                }
                with open(temp_texts_dir / f"book_{i}.json", 'w', encoding='utf-8') as f:
                    json.dump(sample_book, f, indent=4)
            book_files = glob.glob(str(temp_texts_dir / "book_*.json"))
            logger.info(f"Created {len(book_files)} sample book files for testing")

        # Define voice and language to use
        voice = 'af_heart'  # Can be customized based on preferences
        language = 'a'      # 'a' for American English

        # Process each book
        for book_file in sorted(book_files, key=lambda x: int(os.path.basename(x).split('_')[1].split('.')[0])):
            # Extract book ID (e.g., "1" from "book_1.json")
            book_id = os.path.basename(book_file).split('_')[1].split('.')[0]

            logger.info(f"Processing book {book_id}: {book_file}")

            # Load book data
            book_data = load_book_json(book_file)
            if not book_data:
                logger.error(f"Failed to load book {book_id}")
                continue

            # Generate voice
            output_path = generate_voice(book_data, book_id, temp_audios_dir, voice, language)

            if output_path:
                logger.info(f"Book {book_id} processing complete. Audio saved to: {output_path}")
            else:
                logger.error(f"Failed to process book {book_id}")

    except Exception as e:
        logger.error(f"Unexpected error in main function: {e}")
        logger.error(traceback.format_exc())

if __name__ == "__main__":
    logger.info("Starting book voice generation process with Kokoro TTS")
    main()
    logger.info("Book voice generation process completed")

W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/2.35k [00:00<?, ?B/s]

  WeightNorm.apply(module, name, dim)


kokoro-v1_0.pth:   0%|          | 0.00/327M [00:00<?, ?B/s]

af_heart.pt:   0%|          | 0.00/523k [00:00<?, ?B/s]

