In [None]:
import gradio as gr
import numpy as np
import torch
from transformers import pipeline
import soundfile as sf
import os
import warnings
warnings.filterwarnings("ignore")

# Configurazione iniziale
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32

# Initialize the whisper model
model = pipeline("automatic-speech-recognition",
                model="distil-whisper/distil-large-v3",
                device=DEVICE,
                torch_dtype=DTYPE,
                )

def transcribe_audio(audio):
    """
    Transcribe audio data to text.
    """
    if audio is None:
        return ""
    try:
        sample_rate, data = audio
        temp_path = "temp_audio.wav"
        sf.write(temp_path, data, sample_rate)        
        result = model(temp_path)
        transcription = result["text"]        
        if os.path.exists(temp_path):
            os.remove(temp_path)
        return transcription
    except Exception as e:
        print(f"Error during transcription: {str(e)}")
        return ""

def create_interface():
    with gr.Blocks() as demo:
        gr.Markdown("# Audio Transcription")
        with gr.Row():
            audio_input = gr.Audio(
                                    sources=["microphone", "upload"],
                                    type="numpy"
                                    )            
            text_output = gr.Textbox(
                                    label="Transcription",
                                    placeholder="Transcription will appear here...",
                                    lines=5
                                    )        
        submit_btn = gr.Button("Transcribe")        
        submit_btn.click(
                        fn=transcribe_audio,
                        inputs=[audio_input],
                        outputs=[text_output]
                        )
        gr.Examples(
                    examples=[["sample_audio.wav"],  # Aggiungi qui i tuoi file di esempio
                            ],
                    inputs=[audio_input],
                    outputs=[text_output],
                    )
        gr.Markdown("""
        ## Instructions
        1. Click the microphone button to record audio or upload an audio file
        2. Click 'Transcribe' to convert the audio to text and the transcription will appear in the text box
        
        """)
        
    return demo




Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [11]:
demo = create_interface()
demo.launch(share=True, server_name="localhost")

* Running on local URL:  http://localhost:7863

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.


