In [None]:
import numpy as np
import soundfile as sf
import tensorflow as tf

from tensorflow_tts.inference import AutoConfig
from tensorflow_tts.inference import TFAutoModel
from tensorflow_tts.inference import AutoProcessor

def generate_speech_with_wavenet(text, output_file="generated_audio.wav"):
    """
    Generates speech from the given text using a pre-trained WaveNet model
    and saves it to a WAV file.

    Args:
        text (str): The text to convert to speech.
        output_file (str, optional): The name of the WAV file to save the
            generated audio. Defaults to "generated_audio.wav".
    """
    try:
        # 1. Check for GPU availability and set memory growth
        if len(tf.config.list_physical_devices('GPU')) > 0:
            tf.config.experimental.set_memory_growth(
                tf.config.list_physical_devices('GPU')[0], True
            )

        # 2. Define the WaveNet model and configuration
        #    -  Using a specific WaveNet model from the Hugging Face
        #       TensorFlow TTS repository.
        wavenet_config = AutoConfig.from_pretrained(
            "TensorFlowTTS/tts-ljspeech-wavenet-256-16bit-ckpt.tflite"
        )
        wavenet_model = TFAutoModel.from_pretrained(
            pretrained_model_name_or_path="TensorFlowTTS/tts-ljspeech-wavenet-256-16bit-ckpt.tflite",
            config=wavenet_config,
        )

        # 3. Define the processor
        processor = AutoProcessor.from_pretrained(
            pretrained_model_name_or_path="TensorFlowTTS/tts-ljspeech-wavenet-256-16bit-ckpt.tflite"
        )

        # 4. Process the input text
        input_ids = processor.text_to_sequence(text)

        # 5. Generate the speech (audio)
        #    -  The model expects a batch of inputs, so we expand dimensions.
        audio = wavenet_model.inference(
            tf.expand_dims(input_ids, 0),
            speaker_ids=tf.expand_dims(
                tf.convert_to_tensor([0], dtype=tf.int32), 0
            ),  # Add speaker ID if needed
        )
        audio = audio[0, :, 0].numpy()  # Get the audio data and convert to numpy

        # 6. Save the generated audio to a WAV file
        #    -  Use a sample rate that is appropriate for the model.  22050 is common.
        sf.write(output_file, audio, 22050)
        print(f"Audio saved to {output_file}")

    except Exception as e:
        print(f"Error occurred: {e}")
        print(
            "Please make sure you have TensorFlow and TensorFlow TTS installed, and that the model name is correct."
        )
        print(
            "You can install the required libraries using:\n"
            "pip install tensorflow tensorflow_tts soundfile"
        )


if __name__ == "__main__":
    text_to_speak = (
        "The quick brown fox jumps over the lazy dog.  "
        "This is a test of the WaveNet text-to-speech system. "
        "It should generate clear and natural-sounding speech."
    )
    generate_speech_with_wavenet(text_to_speak)

ModuleNotFoundError: No module named 'tensorflow_tts'

In [None]:
import numpy as np
import soundfile as sf
import tensorflow as tf

# Install tensorflow_tts using pip:
# pip install tensorflow_tts

from tensorflow_tts.inference import AutoConfig
from tensorflow_tts.inference import TFAutoModel
from tensorflow_tts.inference import AutoProcessor


def generate_speech_with_wavenet(text, output_file="generated_audio.wav"):
    """
    Generates speech from the given text using a pre-trained WaveNet model
    and saves it to a WAV file.

    Args:
        text (str): The text to convert to speech.
        output_file (str, optional): The name of the WAV file to save the
            generated audio. Defaults to "generated_audio.wav".
    """
    try:
        # 1. Check for GPU availability and set memory growth
        if len(tf.config.list_physical_devices('GPU')) > 0:
            tf.config.experimental.set_memory_growth(
                tf.config.list_physical_devices('GPU')[0], True
            )

        # 2. Define the WaveNet model and configuration
        #    -  Using a specific WaveNet model from the Hugging Face
        #       TensorFlow TTS repository.
        wavenet_config = AutoConfig.from_pretrained(
            "TensorFlowTTS/tts-ljspeech-wavenet-256-16bit-ckpt.tflite"
        )
        wavenet_model = TFAutoModel.from_pretrained(
            pretrained_model_name_or_path="TensorFlowTTS/tts-ljspeech-wavenet-256-16bit-ckpt.tflite",
            config=wavenet_config,
        )

        # 3. Define the processor
        processor = AutoProcessor.from_pretrained(
            pretrained_model_name_or_path="TensorFlowTTS/tts-ljspeech-wavenet-256-16bit-ckpt.tflite"
        )

        # 4. Process the input text
        input_ids = processor.text_to_sequence(text)

        # 5. Generate the speech (audio)
        #    -  The model expects a batch of inputs, so we expand dimensions.
        audio = wavenet_model.inference(
            tf.expand_dims(input_ids, 0),
            speaker_ids=tf.expand_dims(
                tf.convert_to_tensor([0], dtype=tf.int32), 0
            ),  # Add speaker ID if needed
        )
        audio = audio[0, :, 0].numpy()  # Get the audio data and convert to numpy

        # 6. Save the generated audio to a WAV file
        #    -  Use a sample rate that is appropriate for the model.  22050 is common.
        sf.write(output_file, audio, 22050)
        print(f"Audio saved to {output_file}")

    except Exception as e:
        print(f"Error occurred: {e}")
        print(
            "Please make sure you have TensorFlow and TensorFlow TTS installed, and that the model name is correct."
        )
        print(
            "You can install the required libraries using:\n"
            "pip install tensorflow tensorflow_tts soundfile"
        )


if __name__ == "__main__":
    text_to_speak = (
        "The quick brown fox jumps over the lazy dog.  "
        "This is a test of the WaveNet text-to-speech system. "
        "It should generate clear and natural-sounding speech."
    )
    generate_speech_with_wavenet(text_to_speak)

ModuleNotFoundError: No module named 'tensorflow_tts'

In [None]:
import soundfile as sf
import tensorflow as tf

from tensorflow_tts.inference import TFAutoModel
from tensorflow_tts.inference import AutoProcessor


def generate_speech(text, output_file="output.wav"):
    """
    Generates speech from text using a pre-trained WaveNet model.

    Args:
        text: The text to speak (string).
        output_file: The name of the output WAV file (string, optional).
    """
    try:
        # Load pre-trained WaveNet model and processor
        wavenet_model = TFAutoModel.from_pretrained(
            "TensorFlowTTS/tts-ljspeech-wavenet-256-16bit-ckpt.tflite"
        )
        processor = AutoProcessor.from_pretrained(
            "TensorFlowTTS/tts-ljspeech-wavenet-256-16bit-ckpt.tflite"
        )

        # Process text and generate audio
        input_ids = processor.text_to_sequence(text)
        audio = wavenet_model.inference(tf.expand_dims(input_ids, 0))[0, :, 0].numpy()

        # Save the audio
        sf.write(output_file, audio, 22050)  # Sample rate 22050 Hz
        print(f"Audio saved to {output_file}")

    except Exception as e:
        print(f"Error: {e}")
        print(
            "Please ensure TensorFlow and TensorFlow TTS are installed:\n"
            "pip install tensorflow tensorflow_tts soundfile"
        )


if __name__ == "__main__":
    text_to_speak = "Hello, this is WaveNet speaking."
    generate_speech(text_to_speak)

ModuleNotFoundError: No module named 'tensorflow_tts'

In [None]:
import soundfile as sf
import tensorflow as tf

# Install tensorflow_tts using pip:
# pip install tensorflow_tts

from tensorflow_tts.inference import TFAutoModel
from tensorflow_tts.inference import AutoProcessor


def generate_speech(text, output_file="output.wav"):
    """
    Generates speech from text using a pre-trained WaveNet model.

    Args:
        text: The text to speak (string).
        output_file: The name of the output WAV file (string, optional).
    """
    try:
        # Load pre-trained WaveNet model and processor
        wavenet_model = TFAutoModel.from_pretrained(
            "TensorFlowTTS/tts-ljspeech-wavenet-256-16bit-ckpt.tflite"
        )
        processor = AutoProcessor.from_pretrained(
            "TensorFlowTTS/tts-ljspeech-wavenet-256-16bit-ckpt.tflite"
        )

        # Process text and generate audio
        input_ids = processor.text_to_sequence(text)
        audio = wavenet_model.inference(tf.expand_dims(input_ids, 0))[0, :, 0].numpy()

        # Save the audio
        sf.write(output_file, audio, 22050)  # Sample rate 22050 Hz
        print(f"Audio saved to {output_file}")

    except Exception as e:
        print(f"Error: {e}")
        print(
            "Please ensure TensorFlow and TensorFlow TTS are installed:\n"
            "pip install tensorflow tensorflow_tts soundfile"
        )


if __name__ == "__main__":
    text_to_speak = "Hello, this is WaveNet speaking."
    generate_speech(text_to_speak)

ModuleNotFoundError: No module named 'tensorflow_tts'