## Convert audio (microphone to text)

In [None]:
$ brew install portaudio
$ pip3 install pyaudio

https://thepythoncode.com/article/using-speech-recognition-to-convert-speech-to-text-python

In [25]:
import speech_recognition as sr

# Initialize recognizer class (for recognizing the speech)
recognizer = sr.Recognizer()

# Use the microphone as a source for input
with sr.Microphone() as source:
    print("Adjusting for ambient noise, please wait...")
    recognizer.adjust_for_ambient_noise(source)
    
    # Set the pause threshold to 5 seconds
    recognizer.pause_threshold = 2

    print(F"You can start speaking now, recognition will begin after {recognizer.pause_threshold} seconds of silence...")

    # Record audio from the microphone
    audio_data = recognizer.listen(source)
    print("Recognizing...")

    # Convert speech to text
    try:
        text = recognizer.recognize_google(audio_data)
        print("You said:", text)
    except sr.UnknownValueError:
        print("Sorry, I did not understand the audio.")
    except sr.RequestError:
        print("Could not request results; check your network connection.")


Adjusting for ambient noise, please wait...
You can start speaking now, recognition will begin after 2 seconds of silence...
Recognizing...
You said: okay I'm going to start to speak and you can come work what I said into text


## Convert text to audio 

In [15]:
import os
import pygame
from gtts import gTTS
from io import BytesIO
import time

def text_to_speech(text):
    print("Generating speech...")
    
    # Split the text into sentences or words for better synchronization
    parts = text.split(". ")  # Split by sentence (you can modify this logic)
    
    # Initialize pygame mixer
    pygame.mixer.init()

    # Loop over each part, play it, and print it while the audio plays
    for part in parts:
        # Generate TTS audio for the current part
        tts = gTTS(text=part, lang="en")
        audio_data = BytesIO()
        tts.write_to_fp(audio_data)
        audio_data.seek(0)
        
        # Create a temporary unique filename for the audio
        temp_filename = f"temp_audio_{int(time.time())}.mp3"
        
        # Write the audio data to the file
        with open(temp_filename, "wb") as f:
            f.write(audio_data.read())

        # Load and play the audio
        pygame.mixer.music.load(temp_filename)
        pygame.mixer.music.play()
        
        # Print the text part to the user
        print(part)
        
        # Wait for the audio to finish before proceeding to the next part
        while pygame.mixer.music.get_busy():
            time.sleep(0.1)  # Sleep for a short while to avoid high CPU usage

# # Example usage
# input_text = "Hello. Please speak after this tone. This is a test of text-to-speech synchronization."
# text_to_speech(input_text)


Generating speech...
Hello
Please speak after this tone
This is a test of text-to-speech synchronization.


In [26]:
input_text = text

In [27]:
text_to_speech(input_text)

Generating speech...
okay I'm going to start to speak and you can come work what I said into text
