In [None]:
pip install SpeechRecognition


Collecting SpeechRecognition
  Downloading speechrecognition-3.14.3-py3-none-any.whl.metadata (30 kB)
Downloading speechrecognition-3.14.3-py3-none-any.whl (32.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.9/32.9 MB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: SpeechRecognition
Successfully installed SpeechRecognition-3.14.3


In [None]:
# =============================================
# 🎙️ CodTech AI Internship - Task 2
# 📌 Enhanced Speech-to-Text Transcriber
# 🖥️ Developed using Python & SpeechRecognition
# =============================================

import speech_recognition as sr
import os
import wave
import contextlib

def get_audio_duration(file_path):
    """
    Returns the duration (in seconds) of a WAV audio file.
    """
    try:
        with contextlib.closing(wave.open(file_path, 'r')) as audio_file:
            frames = audio_file.getnframes()
            rate = audio_file.getframerate()
            duration = frames / float(rate)
            return round(duration, 2)
    except wave.Error:
        return None

def transcribe_audio(file_path):
    """
    Transcribes spoken words from a WAV file using Google's Web Speech API.
    """
    recognizer = sr.Recognizer()

    try:
        with sr.AudioFile(file_path) as source:
            print("🔊 Processing audio... please wait.")
            audio = recognizer.record(source)

        print("🧠 Transcribing using Google Speech API...")
        text = recognizer.recognize_google(audio)

        print("\n✅ Transcription Successful!")
        print("=" * 50)
        print(text)
        print("=" * 50)
        print(f"📊 Word Count: {len(text.split())} words")

        save = input("\n💾 Do you want to save this transcription as a .txt file? (y/n): ").lower()
        if save == 'y':
            with open("transcription_output.txt", "w", encoding="utf-8") as f:
                f.write(text)
            print("📁 Saved as 'transcription_output.txt'.")

    except sr.UnknownValueError:
        print("⚠️ Could not understand the audio. Try a clearer recording.")
    except sr.RequestError:
        print("❌ Could not reach Google API. Check your internet connection.")
    except Exception as e:
        print(f"🚫 An unexpected error occurred: {str(e)}")

def main():
    """
    Entry point for the Speech-to-Text script.
    """
    print("=" * 55)
    print("🎙️  WELCOME TO SPEECH-TO-TEXT CONVERTER - CodTech AI Internship")
    print("=" * 55)

    file_path = input("📁 Enter full path to your WAV audio file: ").strip()
    file_path = file_path.replace("\\", "/")  # Windows-safe path

    # Check if file exists
    if not os.path.isfile(file_path):
        print("❌ Error: File not found. Check the file path.")
        return

    # Validate WAV file
    if not file_path.lower().endswith(".wav"):
        print("❌ Error: Please provide a .wav audio file only.")
        return

    # Duration check
    duration = get_audio_duration(file_path)
    if duration is None:
        print("❌ Error: Cannot read audio file (may be corrupted).")
        return

    print(f"⏱️ Audio Duration: {duration} seconds")
    if duration > 60:
        print("⚠️ Warning: File is longer than 60 seconds. This may affect performance.")

    # Transcribe
    transcribe_audio(file_path)

if __name__ == "__main__":
    main()


🎙️  WELCOME TO SPEECH-TO-TEXT CONVERTER - CodTech AI Internship
📁 Enter full path to your WAV audio file: /content/voice-sample.wav
⏱️ Audio Duration: 26.3 seconds
🔊 Processing audio... please wait.
🧠 Transcribing using Google Speech API...

✅ Transcription Successful!
hi there this is a sample voice recording created for speech synthesis testing the quick brown fox jumps over the lazy dog just a fun way to include every letter of the alphabet numbers like 1 2 3 are spoken clearly let's see how well this voice captures tone timing and natural Rhythm this audio is provided by sample files.com
📊 Word Count: 60 words

💾 Do you want to save this transcription as a .txt file? (y/n): y
📁 Saved as 'transcription_output.txt'.
