In [None]:
# Mic Recorder

In [24]:
import pyaudio
import wave
from google.cloud import speech
import io
import threading
import tempfile
import os

# Initialize the Speech-to-Text client
client = speech.SpeechClient()

# Function to record audio from the microphone
def record_audio(temp_file_path):
    # Setup parameters for the audio recording
    chunk = 1024  # Record in chunks of 1024 samples
    sample_format = pyaudio.paInt16  # 16 bits per sample
    channels = 1
    sample_rate = 24000  # Record at 24kHz
    
    p = pyaudio.PyAudio()  # Create an interface to PortAudio

    print("Recording... Press Enter to stop.")

    # Open a new stream for recording
    stream = p.open(format=sample_format,
                    channels=channels,
                    rate=sample_rate,
                    frames_per_buffer=chunk,
                    input=True)

    frames = []  # Initialize array to store frames

    # Function to read audio data in a loop
    def read_audio():
        while not stop_recording.is_set():
            data = stream.read(chunk)
            frames.append(data)

    # Create a threading event to stop recording
    stop_recording = threading.Event()

    # Start the audio recording thread
    recording_thread = threading.Thread(target=read_audio)
    recording_thread.start()

    # Wait for user input to stop recording
    input()
    stop_recording.set()
    recording_thread.join()

    # Stop and close the stream
    stream.stop_stream()
    stream.close()
    p.terminate()

    print("Recording complete.")

    # Save the recorded data as a WAV file
    with wave.open(temp_file_path, 'wb') as wf:
        wf.setnchannels(channels)
        wf.setsampwidth(p.get_sample_size(sample_format))
        wf.setframerate(sample_rate)
        wf.writeframes(b''.join(frames))

# Function to transcribe audio using Google Cloud Speech-to-Text
def transcribe_audio(temp_file_path):
    # Read the audio file
    with io.open(temp_file_path, "rb") as audio_file:
        content = audio_file.read()
    
    audio = speech.RecognitionAudio(content=content)

    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=24000,
        language_code="ceb-PH",
        model="default",
        audio_channel_count=1,
        enable_word_confidence=True,
        enable_word_time_offsets=True,
    )

    # Detects speech in the audio file
    operation = client.long_running_recognize(config=config, audio=audio)

    print("Waiting for operation to complete...")
    response = operation.result(timeout=90)

    print("Transcript")
    for result in response.results:
        print(result.alternatives[0].transcript)

# Main function
def main():
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
        temp_file_path = temp_file.name

    try:
        record_audio(temp_file_path)
        transcribe_audio(temp_file_path)
    finally:
        os.remove(temp_file_path)

if __name__ == "__main__":
    main()

Recording... Press Enter to stop.
Recording complete.
Waiting for operation to complete...
Transcript
can you somehow find a way to make it. Streaming
 I really enjoy your day working a lot
 pretty tiring streaming


In [29]:
from google.cloud import texttospeech

def synthesize_text(text):
    """Synthesizes speech from the input string of text."""


    client = texttospeech.TextToSpeechClient()

    input_text = texttospeech.SynthesisInput(text=text)

    # Note: the voice can also be specified by name.
    # Names of voices can be retrieved with client.list_voices().
    voice = texttospeech.VoiceSelectionParams(
        language_code="en-US",
        name="en-US-Standard-C",
        ssml_gender=texttospeech.SsmlVoiceGender.FEMALE,
    )

    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3
    )

    response = client.synthesize_speech(
        request={"input": input_text, "voice": voice, "audio_config": audio_config}
    )

    # The response's audio_content is binary.
    with open("output.mp3", "wb") as out:
        out.write(response.audio_content)
        print('Audio content written to file "output.mp3"')



In [30]:
synthesize_text("Hello, World!")

Audio content written to file "output.mp3"


In [2]:
import os
import pyaudio
import wave
import io
import threading
import tempfile
from google.cloud import speech
from google.cloud import texttospeech
import google.generativeai as genai
import random

# Initialize the Speech-to-Text client
speech_client = speech.SpeechClient()

# Initialize the Text-to-Speech client
tts_client = texttospeech.TextToSpeechClient()

# Initialize Gemini API
genai.configure(api_key=os.environ['GEMINI_API_KEY'])
model = genai.GenerativeModel('gemini-pro')

def record_audio(temp_file_path):
    # Setup parameters for the audio recording
    chunk = 1024
    sample_format = pyaudio.paInt16
    channels = 1
    sample_rate = 24000
    
    p = pyaudio.PyAudio()
    print("Recording... Press Enter to stop.")
    
    stream = p.open(format=sample_format,
                    channels=channels,
                    rate=sample_rate,
                    frames_per_buffer=chunk,
                    input=True)
    
    frames = []
    stop_recording = threading.Event()
    
    def read_audio():
        while not stop_recording.is_set():
            data = stream.read(chunk)
            frames.append(data)
    
    recording_thread = threading.Thread(target=read_audio)
    recording_thread.start()
    
    input()
    stop_recording.set()
    recording_thread.join()
    
    stream.stop_stream()
    stream.close()
    p.terminate()
    
    print("Recording complete.")
    
    with wave.open(temp_file_path, 'wb') as wf:
        wf.setnchannels(channels)
        wf.setsampwidth(p.get_sample_size(sample_format))
        wf.setframerate(sample_rate)
        wf.writeframes(b''.join(frames))

def transcribe_audio(temp_file_path):
    with io.open(temp_file_path, "rb") as audio_file:
        content = audio_file.read()
    
    audio = speech.RecognitionAudio(content=content)
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=24000,
        language_code="en-US",
        model="default",
        audio_channel_count=1,
        enable_word_confidence=True,
        enable_word_time_offsets=True,
    )
    
    operation = speech_client.long_running_recognize(config=config, audio=audio)
    print("Transcribing audio...")
    response = operation.result(timeout=90)
    
    transcript = ""
    for result in response.results:
        transcript += result.alternatives[0].transcript + " "
    
    return transcript.strip()

def synthesize_speech(text):
    input_text = texttospeech.SynthesisInput(text=text)
    voice = texttospeech.VoiceSelectionParams(
        language_code="en-US",
        name="en-US-Standard-C",
        ssml_gender=texttospeech.SsmlVoiceGender.FEMALE,
    )
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3
    )
    
    response = tts_client.synthesize_speech(
        input=input_text, voice=voice, audio_config=audio_config
    )
    
    with open("response.mp3", "wb") as out:
        out.write(response.audio_content)
    print('Audio response saved as "response.mp3"')

def generate_ai_response(user_input, mode):
    prompt = f"""
    You are an AI communication assistant. Based on the user's input, your task is to {mode}. 
    Respond in a way that encourages further conversation and helps the user improve their communication skills.

    User input: {user_input}

    Your response:
    """
    
    response = model.generate_content(prompt)
    return response.text

def main():
    while True:
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
            temp_file_path = temp_file.name
        
        try:
            record_audio(temp_file_path)
            user_input = transcribe_audio(temp_file_path)
            print(f"User said: {user_input}")
            
            mode = input("Choose mode (1: Debate, 2: Explain, 3: Storytelling, 4: Q&A): ")
            modes = {
                "1": "debate or defend the idea presented",
                "2": "explain the concept using metaphors and without circular reasoning",
                "3": "create a narrative or story based on the input",
                "4": "formulate relevant questions or provide answers based on the input"
            }
            
            ai_response = generate_ai_response(user_input, modes.get(mode, "have a general conversation about"))
            print(f"AI response: {ai_response}")
            
            synthesize_speech(ai_response)
            
            continue_chat = input("Continue chatting? (y/n): ").lower()
            if continue_chat != 'y':
                break
        
        finally:
            os.remove(temp_file_path)

if __name__ == "__main__":
    main()

Recording... Press Enter to stop.
Recording complete.
Transcribing audio...
User said: hello I am testing this out
AI response: Hello there! I'm glad you're here to test me out. I'm eager to engage in a thought-provoking debate or lend my support in defending an idea. Please feel free to present your stance, and I'll do my best to stimulate a lively and enriching conversation. Let's get started!
Audio response saved as "response.mp3"
Recording... Press Enter to stop.
Recording complete.
Transcribing audio...
User said: so I think abortion should be illegal
AI response: Abortion is a complex and highly personal issue. There are many different beliefs about when life begins, and what rights a fetus has. It is important to be respectful of all opinions on this topic, even if you disagree with them.

One of the main reasons why people believe abortion should be illegal is because they believe that life begins at conception. They argue that a fetus is a human being with the same rights as a

# TEST

In [2]:
from flask import Flask, request, jsonify
from google.cloud import speech, texttospeech
import google.generativeai as genai
import os
import tempfile
import base64

app = Flask(__name__)

# Initialize clients
speech_client = speech.SpeechClient()
tts_client = texttospeech.TextToSpeechClient()

# Initialize Gemini API
genai.configure(api_key="YOUR_GEMINI_API_KEY")
model = genai.GenerativeModel('gemini-pro')

def transcribe_audio(audio_content):
    audio = speech.RecognitionAudio(content=audio_content)
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=24000,
        language_code="en-US",
    )
    response = speech_client.recognize(config=config, audio=audio)
    return response.results[0].alternatives[0].transcript

def synthesize_text(text):
    input_text = texttospeech.SynthesisInput(text=text)
    voice = texttospeech.VoiceSelectionParams(
        language_code="en-US",
        name="en-US-Standard-C",
        ssml_gender=texttospeech.SsmlVoiceGender.FEMALE,
    )
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3
    )
    response = tts_client.synthesize_speech(
        input=input_text, voice=voice, audio_config=audio_config
    )
    return base64.b64encode(response.audio_content).decode('utf-8')

def gemini_process(user_input, mode):
    prompt = f"""
    Respond to the user's input in a conversational manner, keeping the response around 5 seconds long when spoken.
    Focus on the following mode: {mode}

    1. Debate/Defend: Provide a counterargument or supporting argument.
    2. Narrate/Storytell: Create a brief narrative or story element related to the input.
    3. Explain: Use a metaphor or analogy to explain the concept.
    4. Question Formation/Answering: Generate a relevant question or provide a concise answer.

    User input: {user_input}
    """
    response = model.generate_content(prompt)
    return response.text

@app.route('/process_audio', methods=['POST'])
def process_audio():
    audio_data = request.files['audio'].read()
    mode = request.form['mode']
    
    user_input = transcribe_audio(audio_data)
    ai_response = gemini_process(user_input, mode)
    audio_response = synthesize_text(ai_response)
    
    return jsonify({
        'user_input': user_input,
        'ai_response': ai_response,
        'audio_response': audio_response
    })

@app.route('/get_prompt', methods=['GET'])
def get_prompt():
    prompts = [
        "Discuss a recent technological advancement that excites you.",
        "Share your thoughts on the future of remote work.",
        "What's a book or movie that has greatly influenced your thinking?",
        "Describe an interesting cultural tradition from your background.",
        "If you could solve one global problem, what would it be and why?"
    ]
    return jsonify({'prompt': random.choice(prompts)})


In [3]:

if __name__ == '__main__':
    app.run(debug=True)

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
 * Restarting with stat


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
