<a href="https://colab.research.google.com/github/Merly2004/Traductor/blob/ramfa/TraductorIdiomas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# --- Instalación de librerías ---
!pip install tensorflow tensorflow-datasets librosa gtts transformers tensorflowjs gradio soundfile langdetect



In [2]:
# --- Importaciones ---
import os
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import librosa
from gtts import gTTS
from transformers import pipeline
from langdetect import detect, LangDetectException
import gradio as gr

In [3]:
# --- Carpeta de salida ---
EXPORT_FOLDER = "/content/carpeta_salida"
TFJS_FOLDER = os.path.join(EXPORT_FOLDER, "tfjs_model")
os.makedirs(TFJS_FOLDER, exist_ok=True)

In [4]:
dataset_name = "speech_commands"
(ds_train, ds_test), ds_info = tfds.load(dataset_name, split=["train", "test"],
                                         shuffle_files=True, with_info=True, as_supervised=True)



Downloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to /root/tensorflow_datasets/speech_commands/0.0.3...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating train examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/speech_commands/incomplete.0VWU2Z_0.0.3/speech_commands-train.tfrecord*...…

Generating validation examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/speech_commands/incomplete.0VWU2Z_0.0.3/speech_commands-validation.tfrecor…

Generating test examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/speech_commands/incomplete.0VWU2Z_0.0.3/speech_commands-test.tfrecord*...:…



Dataset speech_commands downloaded and prepared to /root/tensorflow_datasets/speech_commands/0.0.3. Subsequent calls will reuse this data.


In [5]:
def extract_features(audio, sr=16000, max_len=40):
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=20).T
    if len(mfcc) < max_len:
        pad_width = max_len - len(mfcc)
        mfcc = np.pad(mfcc, ((0, pad_width), (0, 0)), mode='constant')
    else:
        mfcc = mfcc[:max_len, :]
    return mfcc

In [6]:
X_train, y_train = [], []
for audio, label in tfds.as_numpy(ds_train.take(500)):  # demo: solo 500 ejemplos
    audio_np = audio.astype(np.float32)
    features = extract_features(audio_np)
    X_train.append(features)
    y_train.append(label)
X_train, y_train = np.array(X_train), np.array(y_train)
print(f"Preparación completada. X_train: {X_train.shape}, y_train: {y_train.shape}")

Preparación completada. X_train: (500, 40, 20), y_train: (500,)


In [7]:
# --- Modelo CNN ---
model_cnn = tf.keras.Sequential([
    tf.keras.layers.Conv1D(32, 3, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])),
    tf.keras.layers.MaxPooling1D(2),
    tf.keras.layers.Conv1D(64, 3, activation='relu'),
    tf.keras.layers.MaxPooling1D(2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(ds_info.features["label"].num_classes, activation='softmax')
])
model_cnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_cnn.fit(X_train, y_train, epochs=3, batch_size=32)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/3
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 94ms/step - accuracy: 0.3451 - loss: 28.6738
Epoch 2/3
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3253 - loss: 7.0798 
Epoch 3/3
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4736 - loss: 3.4869 


<keras.src.callbacks.history.History at 0x79a4070089b0>

In [8]:
keras_model_path = os.path.join(EXPORT_FOLDER, "audio_model.h5")
model_cnn.save(keras_model_path)



In [9]:
import tensorflowjs as tfjs
tfjs.converters.save_keras_model(model_cnn, TFJS_FOLDER)
print("Modelo TF.js exportado en:", TFJS_FOLDER)



failed to lookup keras version from the file,
    this is likely a weight only file
Modelo TF.js exportado en: /content/carpeta_salida/tfjs_model


In [10]:
# --- Whisper ASR ---
asr = pipeline("automatic-speech-recognition", model="openai/whisper-large-v2")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/6.17G [00:00<?, ?B/s]

generation_config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

normalizer.json: 0.00B [00:00, ?B/s]

added_tokens.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

preprocessor_config.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0


In [11]:
# --- Modelos de traducción disponibles ---
translation_models = {
    "en": "Helsinki-NLP/opus-mt-en-es",   # Inglés → Español
    "fr": "Helsinki-NLP/opus-mt-fr-es",   # Francés → Español
    "de": "Helsinki-NLP/opus-mt-de-es",   # Alemán → Español
    "it": "Helsinki-NLP/opus-mt-it-es",   # Italiano → Español
    "pt": "Helsinki-NLP/opus-mt-pt-es",   # Portugués → Español
    "nl": "Helsinki-NLP/opus-mt-nl-es",   # Neerlandés → Español
    "sv": "Helsinki-NLP/opus-mt-sv-es",   # Sueco → Español
    "no": "Helsinki-NLP/opus-mt-no-es",   # Noruego → Español
    "da": "Helsinki-NLP/opus-mt-da-es",   # Danés → Español
    "fi": "Helsinki-NLP/opus-mt-fi-es",   # Finés → Español
    "ru": "Helsinki-NLP/opus-mt-ru-es",   # Ruso → Español
    "zh": "Helsinki-NLP/opus-mt-zh-es",   # Chino → Español
    "ja": "Helsinki-NLP/opus-mt-ja-es",   # Japonés → Español
    "ar": "Helsinki-NLP/opus-mt-ar-es",   # Árabe → Español
}
FALLBACK_MODEL = "Helsinki-NLP/opus-mt-mul-es"
translator_cache = {} #Guarda

In [12]:
def get_translator_for_lang(lang):
    model_name = translation_models.get(lang, FALLBACK_MODEL)# busca traduccion
    if model_name not in translator_cache:
        translator_cache[model_name] = pipeline("translation", model=model_name) # descarga y guarda
    return translator_cache[model_name] # devuelve

In [13]:
# --- Funciones ---
def transcribe_audio(file_path): # convertir voz/audio en texto.
    result = asr(file_path)
    return result.get("text", "").strip()

def detect_lang_from_text(text): # detectar automáticamente el idioma
    try:
        return detect(text)
    except LangDetectException:
        return None

def translate_text(text, lang): # traducción automática.
    translator = get_translator_for_lang(lang)
    return translator(text)[0].get("translation_text", "")

def text_to_speech(text, filename="output_es.mp3"): # Usa gTTS (Google Text-to-Speech) para convertir un texto en audio
    tts = gTTS(text=text, lang='es')
    tts.save(filename)
    return filename

In [14]:
# --- Gradio ---
def translate_fn(audio_path):
    # Transcribir
    txt = transcribe_audio(audio_path)
    if not txt:
        return "No se pudo transcribir el audio.", None

    # Detectar idioma
    lang = detect_lang_from_text(txt) or "desconocido"

    # Traducir
    tr = translate_text(txt, lang)

    # Pasar traducción a voz
    audio_out = text_to_speech(tr, "traduccion_es.mp3")

    return f"🌍 Idioma detectado: {lang}\n\n🎤 Texto original:\n{txt}\n\n🇪🇸 Traducción al Español:\n{tr}", audio_out


with gr.Blocks(css="""
    #titulo {text-align: center; font-size: 28px; font-weight: bold; color: #2E86C1; margin-bottom: 15px;}
    #sub {text-align: center; font-size: 16px; color: #5D6D7E; margin-bottom: 20px;}
    #resultado {font-size: 15px; background: #F8F9F9; padding: 15px; border-radius: 10px;}
    .gradio-container {max-width: 900px; margin: auto;}
""") as demo:

    # Título y subtítulo
    gr.HTML("<div id='titulo'>🎧 Traductor de Audio a Español con Voz</div>")
    gr.HTML("<div id='sub'>Sube un audio en cualquier idioma 🌍 y obtén su transcripción + traducción 🎤</div>")

    with gr.Row():
        with gr.Column(scale=1):
            audio_input = gr.Audio(type="filepath", label="🎵 Sube tu audio")
            btn = gr.Button("🔄 Traducir", variant="primary")

        with gr.Column(scale=2):
            result_text = gr.Textbox(label="📄 Resultado", elem_id="resultado", lines=10)
            result_audio = gr.Audio(label="🔊 Traducción en voz", type="filepath")

    btn.click(fn=translate_fn, inputs=audio_input, outputs=[result_text, result_audio]) # boton manda a translate_fn


demo.launch(share=True) # da un link público


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://a6a7d7cad2ded12f73.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


