In [None]:
!pip install gradio
!pip install groq

Collecting gradio
  Downloading gradio-5.23.3-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 

In [None]:
import torch
import gradio as gr
from transformers import pipeline, GPTNeoForCausalLM, GPT2Tokenizer
import requests
from PIL import Image
import io

# Model and device configuration for Whisper
MODEL_NAME = "openai/whisper-large-v3"
BATCH_SIZE = 8
device = 0 if torch.cuda.is_available() else "cpu"

# Initialize Whisper pipeline
pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=30,
    device=device,
)

# Define translation function
def translate(inputs, task):
    if inputs is None:
        raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
    result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
    return result["text"]

# API details for image generation
API_URL = "https://api-inference.huggingface.co/models/XLabs-AI/flux-RealismLora"
headers = {"Authorization": "Bearer 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"}  # Replace with your Hugging Face token

# Define image generation function
def generate_image(prompt):
    payload = {"inputs": prompt}
    response = requests.post(API_URL, headers=headers, json=payload)
    response.raise_for_status()  # Raise an exception for bad status codes
    image_bytes = response.content
    image = Image.open(io.BytesIO(image_bytes))
    return image

# Initialize GPT-Neo model and tokenizer
text_model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
text_tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")

# Define text generation function
def generate_text(prompt, temperature=0.9, max_length=100):
    inputs = text_tokenizer(prompt, return_tensors="pt")
    gen_tokens = text_model.generate(
        inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        do_sample=True,
        temperature=temperature,
        max_length=max_length,
        pad_token_id=text_tokenizer.eos_token_id,
    )
    gen_text = text_tokenizer.decode(gen_tokens[0], skip_special_tokens=True)
    return gen_text

# Gradio app with multiple functionalities in tabs
with gr.Blocks() as demo:
    with gr.Tab("Microphone Translation"):
        gr.Markdown("Audio Translation")
        mic_input = gr.Audio(sources="microphone", type="filepath", label="Microphone Input")
        mic_task = gr.Radio(["translate"], label="Task", value="translate")
        mic_output = gr.Textbox(label="Translated Text")
        gr.Button("Submit").click(translate, inputs=[mic_input, mic_task], outputs=mic_output)
        gr.Markdown("Image Generation ")
        img_output = gr.Image(label="Generated Image")
        gr.Button("Generate").click(generate_image, inputs=mic_output, outputs=img_output)
        gr.Markdown(" Text Generaion")

    with gr.Tab("File Upload Translation"):
        gr.Markdown("File Translation")
        file_input = gr.Audio(sources="upload", type="filepath", label="Upload Audio File")
        file_task = gr.Radio(["translate"], label="Task", value="translate")
        file_output = gr.Textbox(label="Translated Text")
        gr.Button("Submit").click(translate, inputs=[file_input, file_task], outputs=file_output)
        gr.Markdown(" Image Generation")
        img_output = gr.Image(label="Generated Image")
        gr.Button("Generate").click(generate_image, inputs=file_output, outputs=img_output)
# Launch the app
demo.launch()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.27k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/3.90k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/283k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.48M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/494k [00:00<?, ?B/s]

normalizer.json:   0%|          | 0.00/52.7k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/34.6k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.07k [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/340 [00:00<?, ?B/s]

Device set to use cuda:0


config.json:   0%|          | 0.00/1.35k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/5.31G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/90.0 [00:00<?, ?B/s]

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://005c26a92c3137974b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


