In [17]:
!pip install gradio openai-whisper torch ffmpeg zyphra


Collecting ffmpeg
  Downloading ffmpeg-1.4.tar.gz (5.1 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting zyphra
  Downloading zyphra-0.1.4-py3-none-any.whl.metadata (7.4 kB)
Collecting numpy<3.0,>=1.0 (from gradio)
  Downloading numpy-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.9/60.9 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
Downloading zyphra-0.1.4-py3-none-any.whl (7.7 kB)
Downloading numpy-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (19.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.5/19.5 MB[0m [31m95.1 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: ffmpeg
  Building wheel for ffmpeg (setup.py) ... [?25l[?25hdone
  Created wheel for ffmpeg: filename=ffmpeg-1.4-py3-none-any.whl size=6082 sha256=9104bcb630982103c4e2a8c362fa63e2498f59f1313fc4877e20612b6f0b61ab
  Stored in directory:

In [None]:


# ==================================
# 2) IMPORT LIBRARIES
# ==================================
import gradio as gr
import whisper
import tempfile
from zyphra import ZyphraClient  # Assumes the Zyphra package provides this client

# ==================================
# 3) LOAD WHISPER MODEL
# ==================================
model = whisper.load_model("base")

# ==================================
# 4) DEFINE PROCESSING FUNCTION
# ==================================
def process_media(media_file):
    """
    This function:
      - Transcribes and translates the uploaded audio/video into English using Whisper.
      - Uses ZyphraClient (synchronous) to convert the English text to speech.
      - Returns both the synthesized audio and the English subtitles.
    """
    try:
        # Transcribe and translate the media into English
        result = model.transcribe(media_file, task="translate")
        english_transcription = result["text"]

        # ==================================
        # Zyphra TTS API CALL using ZyphraClient
        # ==================================
        api_key = "zsk-c8741b6d61d76f872442699c84ed180e98f43b2b2cf4ed8f8c8da72c70fcfbb3"
        with ZyphraClient(api_key=api_key) as client:
            # Get audio bytes for the given text; adjust speaking_rate if desired.
            audio_data = client.audio.speech.create(
                text=english_transcription,
                speaking_rate=15
            )

        # Write the returned audio data to a temporary file
        temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
        temp_audio.write(audio_data)
        temp_audio.close()
        synthesized_audio = temp_audio.name

        return synthesized_audio, english_transcription

    except Exception as e:
        print("Error during processing:", e)
        return None, f"Error: {str(e)}"

# ==================================
# 5) BUILD GRADIO INTERFACE
# ==================================
interface = gr.Interface(
    fn=process_media,
    inputs=gr.File(label="Upload Audio or Video", file_types=["audio", "video"]),
    outputs=[
        gr.Audio(type="filepath", label="Synthesized English Audio"),
        gr.Textbox(label="English Subtitles")
    ],
    title="Multilingual Media to English TTS Pipeline (Zyphra)",
    description=(
        "Upload an audio or video file in any language. The file is transcribed and translated into "
        "English using Whisper, then converted to speech via the Zyphra TTS service using ZyphraClient."
    )
)

# ==================================
# 6) LAUNCH THE APP
# ==================================
interface.launch(debug=True)


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://9da5e93e59442e1e14.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
