In [2]:

import speech_recognition as sr
import wave
import pyaudio
import requests
import json
import os
from datetime import datetime

# === Gemini API Info ===
API_KEY = "AIzaSyAew05mCoO2eCTRn0uPeBS1yZHwp_b8bi4"
GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent"

# === Create Folder to Save Files ===
SAVE_FOLDER = "saved_sessions"
os.makedirs(SAVE_FOLDER, exist_ok=True)

# === Get Unique Timestamp-Based Filename ===
def get_timestamp():
    return datetime.now().strftime("%Y%m%d_%H%M%S")

# === Save audio file ===
def save_audio(audio_data, filename):
    filepath = os.path.join(SAVE_FOLDER, filename)
    with wave.open(filepath, 'wb') as wf:
        wf.setnchannels(1)
        wf.setsampwidth(pyaudio.PyAudio().get_sample_size(pyaudio.paInt16))
        wf.setframerate(44100)
        wf.writeframes(audio_data.get_raw_data())
    print(f"\n🎧 Audio saved as {filepath}")

# === Record audio ===
def record_audio(duration=None):
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("🎙️ Speak now...")
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source, phrase_time_limit=duration)
        print("🛑 Recording stopped.")
        return audio

# === Transcribe with Google ===
def transcribe_audio(audio):
    try:
        return sr.Recognizer().recognize_google(audio)
    except sr.UnknownValueError:
        return "[Could not understand the audio]"
    except sr.RequestError as e:
        return f"[Speech recognition failed: {e}]"

# === Enhance with Gemini ===
def enhance_with_gemini(prompt):
    headers = {"Content-Type": "application/json"}
    data = {"contents": [{"parts": [{"text": prompt}]}]}
    try:
        response = requests.post(f"{GEMINI_URL}?key={API_KEY}", headers=headers, data=json.dumps(data))
        if response.status_code == 200:
            result = response.json()
            return result['candidates'][0]['content']['parts'][0]['text']
        else:
            return f"[Gemini API error {response.status_code}]"
    except Exception as e:
        return f"[Gemini request failed: {str(e)}]"

# === Save transcript and Gemini output ===
def save_text_file(text, gemini_text, timestamp):
    filename = f"response_{timestamp}.txt"
    filepath = os.path.join(SAVE_FOLDER, filename)
    with open(filepath, 'w', encoding='utf-8') as f:
        f.write("🗣️ Transcription:\n")
        f.write(text + "\n\n")
        f.write("🤖 Gemini Response:\n")
        f.write(gemini_text)
    print(f"📄 Output saved as {filepath}")

# === Main logic ===
def main():
    print("🎤 Speech-to-Text Tool")
    print("======================")
    print("1. 🔘 Fixed Speech (10 seconds)")
    print("2. 🔘 Break Speech (until pause)")
    print("0. ❌ Exit")

    choice = input("Choose an option: ")

    if choice == "1":
        audio = record_audio(duration=10)
    elif choice == "2":
        audio = record_audio()
    elif choice == "0":
        print("Goodbye!")
        return
    else:
        print("❌ Invalid option.")
        return

    timestamp = get_timestamp()
    audio_filename = f"output_{timestamp}.wav"
    save_audio(audio, audio_filename)

    print("\n📝 Transcribing...")
    text = transcribe_audio(audio)
    print(f"\n🗣️ You said:\n{text}")

    print("\n🤖 Enhancing with Gemini...")
    prompt = (
    f"You are a helpful assistant. Here's what the user said:\n\n"
    f"\"{text}\"\n\n"
    f"First, correct the punctuation and grammar.\n"
    f"Then, respond like a friendly assistant.")
    gemini_output = enhance_with_gemini(prompt)
    print(f"\n✨ Gemini Response:\n{gemini_output}")

    save_text_file(text, gemini_output, timestamp)

if __name__ == "__main__":
    main()

🎤 Speech-to-Text Tool
1. 🔘 Fixed Speech (10 seconds)
2. 🔘 Break Speech (until pause)
0. ❌ Exit
🎙️ Speak now...
🛑 Recording stopped.

🎧 Audio saved as saved_sessions\output_20250713_141221.wav

📝 Transcribing...

🗣️ You said:
hey hello how are you what's up

🤖 Enhancing with Gemini...

✨ Gemini Response:
Okay, here's the corrected sentence and my response:

**Corrected:** Hey, hello! How are you? What's up?

**Response:** Hey there! I'm doing well, thanks for asking! Just here and ready to help. What can I do for you today?

📄 Output saved as saved_sessions\response_20250713_141221.txt
