# Generate data by text to speech

In [1]:
from pydub import AudioSegment
from gtts import gTTS
import os
import time

def generate_audio_1(text_prompts, output_dir):
  """Generates audio data from text prompts and saves it with corresponding labels.

  Args:
    text_prompts: A list of text phrases or sentences. 
    output_dir: The directory where audio files and labels will be saved.
  """

  if not os.path.exists(output_dir):
    os.makedirs(output_dir)

  # Open the labels file in append mode to avoid overwriting existing labels
  with open(os.path.join(output_dir, "labels.txt"), "a") as label_file:
    for i, text in enumerate(text_prompts):
      tts = gTTS(text=text, lang='en')  # Change 'en' to your desired language
      audio_file = f"audio_{time.time()}.wav"
      # Save audio file
      tts.save(os.path.join(output_dir, audio_file))
      # Write label in the desired format
      label_file.write(f"{audio_file}: {text}\n")


# Example usage
text_prompts = ["Hello, how are you?", "I'm doing well, thanks.", "What's your name? can you speak few words"]

output_dir = "data_training"

# Clear existing labels.txt (optional)
if os.path.exists(os.path.join(output_dir, "labels.txt")):
  os.remove(os.path.join(output_dir, "labels.txt"))

generate_audio_1(text_prompts, output_dir)



# Generate Data by recording and saving

In [2]:
import speech_recognition as sr
import os
import time


def generate_audio_2(output_dir="data_training"):
    count = 0
    recognizer = sr.Recognizer()

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    with sr.Microphone() as source:
        print("Listening...")
        while True:
            try:
                audio = recognizer.listen(source, phrase_time_limit=None)  # Remove timeout

                # Recognize the speech
                text = recognizer.recognize_google(audio)

                # Save the audio file and label
                audio_file = f"audio_{time.time()}.wav"
                count += 1
                print(f"{count} Recording saved: {audio_file}: {text}")
                
                with open(os.path.join(output_dir, "labels.txt"), "a") as label_file:
                    label_file.write(f"{audio_file}: {text}\n")

                with open(os.path.join(output_dir, audio_file), "wb") as audio_file:
                    audio_file.write(audio.get_wav_data())

            except sr.UnknownValueError:
                print("Could not understand audio")
            except sr.RequestError as e:
                print("Could not request results from Google Speech Recognition service; {0}".format(e))

print("Terminate this running cell to stop")
generate_audio_2()

Terminate this running cell to stop
Listening...


KeyboardInterrupt: 