In [None]:
import os
import time
import numpy as np
import sounddevice as sd
import scipy.io.wavfile as wavfile
import google.generativeai as genai
from pathlib import Path
from datetime import datetime
from dotenv import load_dotenv

class AudioTranscriber:
    def __init__(self, api_key, audio_dir="recordings"):
        """Initialize with Gemini API key and audio directory"""
        #Configuring Gemini
        genai.configure(api_key=api_key)
        self.model = genai.GenerativeModel('gemini-2.0-flash-exp')
        
        #Audio settings
        self.sample_rate = 44100
        self.channels = 1
        self.dtype = np.int16
        
        #Creating audio directory
        self.audio_dir = Path(audio_dir)
        self.audio_dir.mkdir(exist_ok=True)
        print(f"Audio files will be saved to: {self.audio_dir.absolute()}")
    
    def record_audio(self, duration=60, filename=None):
        """Record audio and save to directory"""
        if filename is None:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            filename = f"recording_{timestamp}.wav"
        
        # Full path for saving
        filepath = self.audio_dir / filename
        
        print(f"Recording for {duration} seconds...")
        print("Speak now!")
        
        # Record audio
        audio_data = sd.rec(
            int(duration * self.sample_rate),
            samplerate=self.sample_rate,
            channels=self.channels,
            dtype=self.dtype
        )
        
        # Wait for recording to complete
        sd.wait()
        
        # Save to file
        wavfile.write(filepath, self.sample_rate, audio_data)
        
        print(f"Recording saved: {filepath}")
        return str(filepath)
    
    def transcribe_audio(self, audio_file_path):
        """Transcribe audio file using Gemini"""
        try:
            print(f"Transcribing: {audio_file_path}")
            
            # Method 1: Upload file with explicit path
            audio_file_obj = genai.upload_file(
                path=audio_file_path,
                mime_type='audio/wav'
            )
            
            print(f"Uploaded file: {audio_file_obj.name}")
            
            # Wait for processing with timeout
            max_wait_time = 60  # 60 seconds timeout
            wait_time = 0
            
            while audio_file_obj.state.name == "PROCESSING":
                print("Processing audio file...")
                time.sleep(2)
                wait_time += 2
                
                if wait_time > max_wait_time:
                    print("Processing timeout exceeded")
                    genai.delete_file(audio_file_obj.name)
                    return None
                    
                audio_file_obj = genai.get_file(audio_file_obj.name)
            
            if audio_file_obj.state.name == "FAILED":
                print("Audio processing failed")
                genai.delete_file(audio_file_obj.name)
                return None
            
            print("Audio processing completed successfully")
            
            # Generate transcription with more detailed prompt
            response = self.model.generate_content([
                "Please transcribe the speech in this audio file. Provide only the transcribed text without any additional commentary.",
                audio_file_obj
            ])
            
            # Clean up uploaded file
            genai.delete_file(audio_file_obj.name)
            
            return response.text.strip()
            
        except Exception as e:
            print(f"Transcription error: {e}")
            # Try to clean up if file was uploaded
            try:
                if 'audio_file_obj' in locals():
                    genai.delete_file(audio_file_obj.name)
            except:
                pass
            return None
    
    
            
            
    
    def record_and_transcribe(self, duration=5, filename=None, save_transcript=True):
        """Record audio, save it, and transcribe it"""
        # Record and save audio
        audio_path = self.record_audio(duration, filename)
        
        # Try primary transcription method
        transcription = self.transcribe_audio(audio_path)
        
        # If primary method fails, try alternative
        if not transcription:
            print("Primary transcription failed, trying alternative method...")
            transcription = self.transcribe_audio_alternative(audio_path)
        
        if transcription:
            print("\nTranscription:")
            print("-" * 60)
            print(transcription)
            print("-" * 60)
            
            # Save transcription to file
            if save_transcript:
                transcript_path = audio_path.replace('.wav', '_transcript.txt')
                with open(transcript_path, 'w', encoding='utf-8') as f:
                    f.write(transcription)
                print(f"Transcript saved: {transcript_path}")
            
            return audio_path, transcription
        else:
            print("Both transcription methods failed")
            return audio_path, None

def main():
    # Load environment variables
    load_dotenv()
    
    # Configuration
    API_KEY = os.getenv('GEMINI_API_KEY')
    AUDIO_DIR = "recordings"
    
    if not API_KEY:
        print("Please set your GEMINI_API_KEY in the .env file!")
        return
    
    # Initialize transcriber
    transcriber = AudioTranscriber(API_KEY, AUDIO_DIR)
    
    
    
    transcriber.record_and_transcribe()
    
    
if __name__ == "__main__":
    main()

Audio files will be saved to: d:\prompteng\recordings
Recording for 5 seconds...
Speak now!
Recording saved: recordings\recording_20250714_211755.wav
Transcribing: recordings\recording_20250714_211755.wav
Uploaded file: files/by3ckfu4gizs
Audio processing completed successfully

Transcription:
------------------------------------------------------------
What is capital of India?
------------------------------------------------------------
Transcript saved: recordings\recording_20250714_211755_transcript.txt
