In [None]:
!pip install TTS

import os
import json
import glob
import logging
from pathlib import Path
from TTS.api import TTS

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def setup_directories():
    """Create necessary directories if they don't exist."""
    # Define the main paths
    storage_dir = Path("Storage")
    temp_texts_dir = storage_dir / "temp_texts"
    temp_audios_dir = storage_dir / "temp_audios"

    # Create directories
    temp_audios_dir.mkdir(parents=True, exist_ok=True)

    return storage_dir, temp_texts_dir, temp_audios_dir

def load_book_json(filepath):
    """Load and parse book data from JSON file."""
    try:
        with open(filepath, 'r', encoding='utf-8') as file:
            return json.load(file)
    except Exception as e:
        logger.error(f"Error loading JSON from {filepath}: {e}")
        return None

def generate_voice(book_data, book_id, output_dir):
    """Generate voice audio from book data."""
    try:
        # Initialize TTS model
        logger.info("Initializing TTS model...")
        tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=True, gpu=False)

        # Create the content to narrate
        title = book_data.get("title", "Unknown Title")
        author = book_data.get("author", "Unknown Author")
        synopsis = book_data.get("synopsis", "No synopsis available.")

        # Format the text
        content = f"{title} by {author}. Synopsis: {synopsis}"

        # Add outro
        outro = "Thank you for reading. For more, follow us and create good habits."
        full_content = f"{content} {outro}"

        # Generate voice file
        output_file = output_dir / f"voice_{book_id}.wav"
        logger.info(f"Generating audio for book {book_id}: {title}")
        tts.tts_to_file(text=full_content, file_path=str(output_file))

        logger.info(f"Audio successfully generated: {output_file}")
        return str(output_file)

    except Exception as e:
        logger.error(f"Error generating voice for book {book_id}: {e}")
        return None

def main():
    """Main function to process books and generate audio."""
    # Set up directories
    storage_dir, temp_texts_dir, temp_audios_dir = setup_directories()

    # Find all book JSON files
    book_files = glob.glob(str(temp_texts_dir / "book_*.json"))

    if not book_files:
        logger.warning(f"No book files found in {temp_texts_dir}")
        return

    logger.info(f"Found {len(book_files)} book files")

    # Process each book
    for book_file in book_files:
        # Extract book ID (e.g., "1" from "book_1.json")
        book_id = os.path.basename(book_file).split('_')[1].split('.')[0]

        # Load book data
        book_data = load_book_json(book_file)
        if not book_data:
            continue

        # Generate voice
        output_path = generate_voice(book_data, book_id, temp_audios_dir)

        if output_path:
            logger.info(f"Book {book_id} processing complete. Audio saved to: {output_path}")
        else:
            logger.error(f"Failed to process book {book_id}")

if __name__ == "__main__":
    logger.info("Starting book voice generation process")
    main()
    logger.info("Book voice generation process completed")

 > tts_models/en/ljspeech/tacotron2-DDC is already downloaded.
 > vocoder_models/en/ljspeech/hifigan_v2 is already downloaded.
 > Using model: Tacotron2
 > Setting up Audio Processor...
 | > sample_rate:22050
 | > resample:False
 | > num_mels:80
 | > log_func:np.log
 | > min_level_db:-100
 | > frame_shift_ms:None
 | > frame_length_ms:None
 | > ref_level_db:20
 | > fft_size:1024
 | > power:1.5
 | > preemphasis:0.0
 | > griffin_lim_iters:60
 | > signal_norm:False
 | > symmetric_norm:True
 | > mel_fmin:0
 | > mel_fmax:8000.0
 | > pitch_fmin:1.0
 | > pitch_fmax:640.0
 | > spec_gain:1.0
 | > stft_pad_mode:reflect
 | > max_norm:4.0
 | > clip_norm:True
 | > do_trim_silence:True
 | > trim_db:60
 | > do_sound_norm:False
 | > do_amp_to_db_linear:True
 | > do_amp_to_db_mel:True
 | > do_rms_norm:False
 | > db_level:None
 | > stats_path:None
 | > base:2.718281828459045
 | > hop_length:256
 | > win_length:1024


  return torch.load(f, map_location=map_location, **kwargs)


 > Model's reduction rate `r` is set to: 1
 > Vocoder Model: hifigan
 > Setting up Audio Processor...
 | > sample_rate:22050
 | > resample:False
 | > num_mels:80
 | > log_func:np.log
 | > min_level_db:-100
 | > frame_shift_ms:None
 | > frame_length_ms:None
 | > ref_level_db:20
 | > fft_size:1024
 | > power:1.5
 | > preemphasis:0.0
 | > griffin_lim_iters:60
 | > signal_norm:False
 | > symmetric_norm:True
 | > mel_fmin:0
 | > mel_fmax:8000.0
 | > pitch_fmin:1.0
 | > pitch_fmax:640.0
 | > spec_gain:1.0
 | > stft_pad_mode:reflect
 | > max_norm:4.0
 | > clip_norm:True
 | > do_trim_silence:False
 | > trim_db:60
 | > do_sound_norm:False
 | > do_amp_to_db_linear:True
 | > do_amp_to_db_mel:True
 | > do_rms_norm:False
 | > db_level:None
 | > stats_path:None
 | > base:2.718281828459045
 | > hop_length:256
 | > win_length:1024
 > Generator Model: hifigan_generator
 > Discriminator Model: hifigan_discriminator
Removing weight norm...
 > Text splitted to sentences.
['Pax Technologica: A Roman Empire