In [1]:
# Audio to Text Transcription with Groq
# This notebook transcribes a single audio file using Groq's API.

import os
import subprocess
from pathlib import Path
from groq import Groq
from dotenv import load_dotenv

# Install required packages (run once)
# !pip install python-dotenv groq pydub

# --- Configuration ---
# Create a .env file in the same directory with your API key and output folder:
# GROQ_API_KEY="your_groq_api_key_here"
# OUTPUT_FOLDER="transcriptions"

load_dotenv()

GROQ_API_KEY = os.getenv("GROQ_API_KEY")
OUTPUT_FOLDER = os.getenv("OUTPUT_FOLDER", "transcriptions")  # Default to 'transcriptions' if not set

# --- SET YOUR INPUT FILE HERE ---
INPUT_FILE = r"c:\Users\Sakib\Downloads\WhatsApp Audio 2025-10-07 at 10.53.41.mpeg"  # <--- REPLACE WITH THE PATH TO YOUR AUDIO FILE

def check_ffmpeg():
    """Check if FFmpeg is installed"""
    try:
        subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
        print("✓ FFmpeg is available")
        return True
    except (subprocess.CalledProcessError, FileNotFoundError):
        print("❌ FFmpeg not found. Please install FFmpeg:")
        print("- Windows: Download from https://ffmpeg.org/download.html")
        print("- macOS: brew install ffmpeg")
        print("- Linux: sudo apt install ffmpeg")
        return False

def convert_to_mp3(input_file, output_file):
    """Convert audio file to MP3 using FFmpeg"""
    try:
        cmd = [
            'ffmpeg',
            '-i', str(input_file),
            '-codec:a', 'libmp3lame',
            '-b:a', '192k',
            '-y',  # Overwrite output file if it exists
            str(output_file)
        ]
        
        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
        print(f"✓ Converted: {input_file.name} -> {output_file.name}")
        return True
    except subprocess.CalledProcessError as e:
        print(f"❌ Error converting {input_file.name}: {e.stderr}")
        return False

def transcribe_audio(audio_file_path, groq_client):
    """Transcribe audio file using Groq API"""
    try:
        with open(audio_file_path, "rb") as file:
            transcription = groq_client.audio.transcriptions.create(
                file=(audio_file_path.name, file.read()),
                model="whisper-large-v3",
                response_format="text"
            )
        return transcription
    except Exception as e:
        print(f"❌ Error transcribing {audio_file_path.name}: {str(e)}")
        return None

def process_audio_file(input_file_path, output_folder, groq_api_key):
    """Main function to process a single audio file."""
    
    # Check if FFmpeg is available
    if not check_ffmpeg():
        return

    # Initialize Groq client
    try:
        groq_client = Groq(api_key=groq_api_key)
        print("✓ Groq client initialized")
    except Exception as e:
        print(f"❌ Error initializing Groq client: {str(e)}")
        return

    input_path = Path(input_file_path)
    if not input_path.exists():
        print(f"❌ Input file not found: '{input_file_path}'")
        return

    # Create output folder if it doesn't exist
    output_path = Path(output_folder)
    output_path.mkdir(exist_ok=True)

    print("-" * 50)
    print(f"Processing: {input_path.name}")

    # Define the path for the converted MP3 file
    mp3_file = output_path / f"{input_path.stem}.mp3"
    
    # Check if file is already MP3 and copy it to the output folder
    if input_path.suffix.lower() == '.mp3':
        import shutil
        shutil.copy(input_path, mp3_file)
        print(f"✓ Already MP3. Copied to output folder: {mp3_file.name}")
    else:
        # Convert to MP3 if necessary
        if not convert_to_mp3(input_path, mp3_file):
            return

    # Transcribe the MP3 file
    print(f"🎤 Transcribing: {mp3_file.name}")
    transcription = transcribe_audio(mp3_file, groq_client)

    if transcription:
        print(f"✓ Transcription completed for: {input_path.name}")
        
        # Save transcription to text file
        txt_file = output_path / f"{input_path.stem}_transcription.txt"
        with open(txt_file, 'w', encoding='utf-8') as f:
            f.write(transcription)
        print(f"✓ Saved transcription to: {txt_file}")
        
        print("-" * 50)
        print("\n" + "="*50)
        print("TRANSCRIPTION RESULT:")
        print("="*50)
        print(transcription)
    else:
        print("Transcription failed.")
    
    return transcription

# Run the main processing function
if __name__ == "__main__":
    # Validate configuration
    if not GROQ_API_KEY or GROQ_API_KEY == "your_groq_api_key_here":
        print("⚠️ Please set your Groq API key in the .env file (GROQ_API_KEY='your_key')")
    elif not INPUT_FILE or INPUT_FILE == "path/to/your/audio_file.wav":
        print("⚠️ Please set the path to your audio file in the INPUT_FILE variable.")
    else:
        # Process the specified audio file
        process_audio_file(INPUT_FILE, OUTPUT_FOLDER, GROQ_API_KEY)


✓ FFmpeg is available
✓ Groq client initialized
--------------------------------------------------
Processing: WhatsApp Audio 2025-10-07 at 10.53.41.mpeg
✓ Converted: WhatsApp Audio 2025-10-07 at 10.53.41.mpeg -> WhatsApp Audio 2025-10-07 at 10.53.41.mp3
🎤 Transcribing: WhatsApp Audio 2025-10-07 at 10.53.41.mp3
❌ Error transcribing WhatsApp Audio 2025-10-07 at 10.53.41.mp3: Error code: 401 - {'error': {'message': 'Invalid API Key', 'type': 'invalid_request_error', 'code': 'invalid_api_key'}}
Transcription failed.
