In [1]:
pip install pyaudio


Collecting pyaudio
  Using cached pyaudio-0.2.14-cp312-cp312-macosx_10_9_universal2.whl
Installing collected packages: pyaudio
Successfully installed pyaudio-0.2.14
Note: you may need to restart the kernel to use updated packages.


In [5]:
import speech_recognition as sr
import wave
import pyaudio
import requests
import json

# Gemini API info
API_KEY = "AIzaSyAUJrMKFNR2YgkE22Orzufwo-tD2xggDVk"
GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent"

# Save audio
def save_audio(audio_data, filename="output.wav"):
    with wave.open(filename, 'wb') as wf:
        wf.setnchannels(1)
        wf.setsampwidth(pyaudio.PyAudio().get_sample_size(pyaudio.paInt16))
        wf.setframerate(44100)
        wf.writeframes(audio_data.get_raw_data())
    print(f"\n🎧 Audio saved as {filename}")

# Record audio
def record_audio(duration=None):
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("🎙️ Speak now...")
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source, phrase_time_limit=duration)
        print("🛑 Recording stopped.")
        return audio

# Convert to text
def transcribe_audio(audio):
    try:
        return sr.Recognizer().recognize_google(audio)
    except sr.UnknownValueError:
        return "[Could not understand the audio]"
    except sr.RequestError as e:
        return f"[Speech recognition failed: {e}]"

def enhance_with_gemini(prompt):
    headers = {"Content-Type": "application/json"}
    data = {
        "prompt": {
            "text": {
                "text": prompt
            }
        }
    }
    try:
        response = requests.post(f"{GEMINI_URL}?key={API_KEY}", headers=headers, json=data)
        if response.status_code == 200:
            result = response.json()
            # Extract generated text safely
            return result['candidates'][0]['content']['text']
        else:
            return f"[Gemini API error {response.status_code}: {response.text}]"
    except Exception as e:
        return f"[Gemini request failed: {str(e)}]"


# Main logic
def main():
    print("🎤 Speech-to-Text Tool")
    print("======================")
    print("1. 🔘 Fixed Speech (10 seconds)")
    print("2. 🔘 Break Speech (until pause)")
    print("0. ❌ Exit")

    choice = input("Choose an option: ")

    if choice == "1":
        audio = record_audio(duration=10)
    elif choice == "2":
        audio = record_audio()
    elif choice == "0":
        print("Goodbye!")
        return
    else:
        print("❌ Invalid option.")
        return

    save_audio(audio)

    print("\n📝 Transcribing...")
    text = transcribe_audio(audio)
    print(f"\n🗣️ You said:\n{text}")

    print("\n🤖 Enhancing with Gemini...")
    result = enhance_with_gemini(text)
    print(f"\n✨ Gemini Response:\n{result}")

if __name__ == "__main__":
    main()

🎤 Speech-to-Text Tool
1. 🔘 Fixed Speech (10 seconds)
2. 🔘 Break Speech (until pause)
0. ❌ Exit


🎙️ Speak now...
🛑 Recording stopped.

🎧 Audio saved as output.wav

📝 Transcribing...

🗣️ You said:
hello how are you

🤖 Enhancing with Gemini...

✨ Gemini Response:
[Gemini API error 400: {
  "error": {
    "code": 400,
    "message": "Invalid JSON payload received. Unknown name \"prompt\": Cannot find field.",
    "status": "INVALID_ARGUMENT",
    "details": [
      {
        "@type": "type.googleapis.com/google.rpc.BadRequest",
        "fieldViolations": [
          {
            "description": "Invalid JSON payload received. Unknown name \"prompt\": Cannot find field."
          }
        ]
      }
    ]
  }
}
]
