In [3]:
pip install SpeechRecognition pyaudio

Collecting SpeechRecognition
  Downloading speechrecognition-3.14.2-py3-none-any.whl.metadata (30 kB)
Collecting pyaudio
  Downloading PyAudio-0.2.14-cp313-cp313-win_amd64.whl.metadata (2.7 kB)
Collecting standard-aifc (from SpeechRecognition)
  Downloading standard_aifc-3.13.0-py3-none-any.whl.metadata (969 bytes)
Collecting audioop-lts (from SpeechRecognition)
  Downloading audioop_lts-0.2.1-cp313-abi3-win_amd64.whl.metadata (1.7 kB)
Collecting standard-chunk (from standard-aifc->SpeechRecognition)
  Downloading standard_chunk-3.13.0-py3-none-any.whl.metadata (860 bytes)
Downloading speechrecognition-3.14.2-py3-none-any.whl (32.9 MB)
   ---------------------------------------- 0.0/32.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/32.9 MB ? eta -:--:--
   ---------------------------------------- 0.3/32.9 MB ? eta -:--:--
   - -------------------------------------- 1.0/32.9 MB 2.7 MB/s eta 0:00:12
   -- ------------------------------------- 2.4/32.9 MB 4.2 MB/s eta 


[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
import speech_recognition as sr
import os

In [5]:
def Get_Audio_Input(recognizer, input_method, audio_file=None):
    try:
        if input_method == "mic":
            with sr.Microphone() as source:
                print("\n\033[36mSpeak now (wait for the listening prompt)...\033[0m")
                recognizer.adjust_for_ambient_noise(source, duration=1)
                print("\033[33mListening...\033[0m")
                return recognizer.listen(source, timeout=5, phrase_time_limit=10)
        elif input_method == "file":
            if not audio_file or not os.path.exists(audio_file):
                raise ValueError("Invalid audio file path")
            with sr.AudioFile(audio_file) as source:
                return recognizer.record(source)
    except Exception as e:
        print(f"\033[91mError during audio capture: {e}\033[0m")
        return None

In [9]:
def Speech_To_Text():
    recognizer = sr.Recognizer()
    while True:
        print("\n\033[1m=== SPEECH TO TEXT CONVERTER ===\033[0m")
        print("1. 🎤 Use microphone")
        print("2. 📂 Use audio file")
        print("3. ❌ Exit")
        
        choice = input("\nChoose input method (1/2/3): ").strip()
        
        if choice == "3":
            print("Exiting...")
            break
            
        if choice not in ("1", "2"):
            print("\033[91mInvalid choice. Please try again.\033[0m")
            continue
            
        input_method = "mic" if choice == "1" else "file"
        audio_file = None
        
        if input_method == "file":
            audio_file = input("Enter full path to audio file (WAV/FLAC/AIFF): ").strip()
            if not audio_file:
                print("\033[91mNo file path provided\033[0m")
                continue
        
        audio = Get_Audio_Input(recognizer, input_method, audio_file)
        if not audio:
            continue
            
        try:
            text = recognizer.recognize_google(audio)
            print("\n\033[92mTranscription Result:\033[0m")
            print("\033[1m" + text + "\033[0m")
            
            # Option to save to file
            save_choice = input("\nSave to text file? (y/n): ").lower()
            if save_choice == "y":
                with open("transcription.txt", "w") as f:
                    f.write(text)
                print("Saved to transcription.txt")
                
        except sr.UnknownValueError:
            print("\033[91mCould not understand audio\033[0m")
        except sr.RequestError:
            print("\033[91mAPI unavailable. Check internet connection\033[0m")
        except Exception as e:
            print(f"\033[91mError: {e}\033[0m")

In [11]:
if __name__ == "__main__":
    try:
        sr.Microphone()
    except:
        print("\033[91mPyAudio not installed. For microphone input, please install with:")
        print("pip install pyaudio\033[0m")
    
    Speech_To_Text()


[1m=== SPEECH TO TEXT CONVERTER ===[0m
1. 🎤 Use microphone
2. 📂 Use audio file
3. ❌ Exit



Choose input method (1/2/3):  3


Exiting...
