In [None]:
import numpy as np
import datetime
import sounddevice as sd

In [7]:
import os
import time
from datetime import datetime
from pathlib import Path
from dotenv import load_dotenv
import numpy as np
import sounddevice as sd
from scipy.io import wavfile
import pyttsx3
import google.generativeai as genai  # Assuming this is the correct module

# Load API key from environment
load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")

if not api_key:
    raise ValueError("Please set GEMINI_API_KEY in the .env file.")

# Configure Gemini
genai.configure(api_key=api_key)

class Speechtotext:
    def __init__(self, audio_dir="recordings"):
        self.model = genai.GenerativeModel("gemini-2.0-flash-exp")
        self.channels = 1
        self.sample_rate = 44100
        self.dtype = np.int16
        self.transcript_dir=Path("transcripts")
        self.transcript_dir.mkdir(exist_ok=True)
        self.audio_dir = Path(audio_dir)  # Corrected: was `os.audio_dir =`
        self.audio_dir.mkdir(exist_ok=True)
        print(f"Audio files will be saved to: {self.audio_dir.absolute()}")

    def record(self, duration=10):
        filepath = self.audio_dir / f"recording_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
        print("Recording for 10 seconds")
        print("Speak now!")
        audio_data = sd.rec(
            int(duration * self.sample_rate),
            samplerate=self.sample_rate,
            channels=self.channels,
            dtype=self.dtype
        )
        sd.wait()
        wavfile.write(filepath, self.sample_rate, audio_data)
        print(f"Recording saved: {filepath}")
        return str(filepath)

    def transcribe(self, filepath):
        try:
            print(f"Transcribing {filepath}")
            audio_file_obj = genai.upload_file(
                path=filepath,
                mime_type="audio/wav"
            )
            print("Uploaded file")

            max_wait_time = 60
            wait_time = 0

            while audio_file_obj.state.name == "PROCESSING":
                print("Processing audio file...")
                time.sleep(2)
                wait_time += 2

                if wait_time > max_wait_time:
                    print("Processing timeout exceeded")
                    genai.delete_file(audio_file_obj.name)
                    return None
                audio_file_obj = genai.get_file(audio_file_obj.name)

            if audio_file_obj.state.name == "FAILED":
                print("Audio processing failed")
                genai.delete_file(audio_file_obj.name)
                return None

            response = self.model.generate_content([
                "Transcript the audio file and give response to this audio file in form transcript: line break response dont include any special symbol and answer in natural language",
                audio_file_obj
            ])
            print(response.text)
            filepath1 = self.transcript_dir / f"transcript_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
            with open(filepath1, "w") as f:
                f.write(response.text)
            return response.text
        except Exception as e:
            print(e)
            return None

    def record_and_transcribe(self):
        filepath = self.record()
        return self.transcribe(filepath)

    def speak(self, text):
        tts = self.TextToSpeech()
        tts.speak(text)

    class TextToSpeech:
        def __init__(self):
            self.engine = pyttsx3.init()
            self.engine.setProperty('rate', 170)
            self.engine.setProperty('volume', 1.0)

        def speak(self, text):
            self.engine.say(text)
            self.engine.runAndWait()

def main():
    bot = Speechtotext()
    response = bot.record_and_transcribe()
    if response:
        bot.speak(response)

# Run the main function
if __name__ == "__main__":
    main()


Audio files will be saved to: d:\prompteng\recordings
Recording for 10 seconds
Speak now!
Recording saved: recordings\recording_20250714_213636.wav
Transcribing recordings\recording_20250714_213636.wav
Uploaded file
What is capital of India
The capital of India is New Delhi

