In [None]:
# !pip install whisper transformers edge-tts

In [None]:
import whisper
from transformers import pipeline
import edge_tts
import asyncio

In [None]:
# Step 1: Voice-to-Text Conversion
def audio_to_text(audio_file):
    """
    Converts audio input to text using the Whisper model.

    Parameters:
    audio_file (str): Path to the audio file.

    Returns:
    str: Transcribed text from the audio.
    """
    model = whisper.load_model("base")  # Load the Whisper model
    result = model.transcribe(audio_file, language='en')
    return result['text']

# Step 2: Text Input into LLM
def get_llm_response(input_text):
    """
    Generates a response from a Large Language Model (LLM) based on input text.

    Parameters:
    input_text (str): Input text for the LLM.

    Returns:
    str: Generated response from the LLM.
    """
    llm = pipeline("text-generation", model="gpt2")  # Load the LLM model
    response = llm(input_text, max_length=50, num_return_sequences=1)
    return response[0]['generated_text']

# Step 3: Text-to-Speech Conversion
async def text_to_speech(text, output_file, pitch='0%', voice='en-US-JessaNeural', speed='0%'):
    """
    Converts text to speech and saves it to an audio file.

    Parameters:
    text (str): Text to convert to speech.
    output_file (str): Path to save the output audio file.
    pitch (str): Pitch adjustment for the speech.
    voice (str): Voice type for the speech.
    speed (str): Speed adjustment for the speech.
    """
    communicate = edge_tts.Communicate(text, voice=voice, rate=speed, pitch=pitch)
    await communicate.save(output_file)

In [None]:
async def main(audio_file):
    # Step 1: Convert audio to text
    print("Converting audio to text...")
    text_output = audio_to_text(audio_file)
    print("Transcribed Text:", text_output)

    # Step 2: Get response from LLM
    print("Generating response from LLM...")
    llm_response = get_llm_response(text_output)
    print("LLM Response:", llm_response)

    # Step 3: Convert response to speech
    output_audio_file = "output_audio.mp3"
    print("Converting response to speech...")
    await text_to_speech(llm_response, output_audio_file)
    print(f"Audio response saved to {output_audio_file}")

In [None]:
# Replace with your audio file path
audio_file_path = "input.wav"  # e.g., "audio.wav"

# Run the main function
await main(audio_file_path)