<a href="https://colab.research.google.com/github/SebastianJoa/Conversor/blob/main/Traductor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# --- Instalación de librerías ---
!pip install tensorflow tensorflow-datasets librosa gtts transformers tensorflowjs



In [None]:
# --- Importaciones ---
import os
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import librosa
from gtts import gTTS
from transformers import pipeline

In [None]:
# --- Carpeta de salida ---
EXPORT_FOLDER = "/content/carpeta_salida"
TFJS_FOLDER = os.path.join(EXPORT_FOLDER, "tfjs_model")
os.makedirs(TFJS_FOLDER, exist_ok=True)

In [None]:
# --- Cargar el dataset de ejemplo (Speech Commands) ---
dataset_name = "speech_commands"
(ds_train, ds_test), ds_info = tfds.load(dataset_name, split=["train", "test"], shuffle_files=True, with_info=True, as_supervised=True)

In [None]:
# --- Función para extraer MFCCs ---
def extract_features(audio, sr=16000, max_len=40):
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=20).T
    # Asegurar que todas las secuencias tengan el mismo largo
    if len(mfcc) < max_len:
        pad_width = max_len - len(mfcc)
        mfcc = np.pad(mfcc, ((0, pad_width), (0, 0)), mode='constant')
    else:
        mfcc = mfcc[:max_len, :]
    return mfcc

In [None]:
# --- Preparar arrays (versión corregida) ---
X_train, y_train = [], []
for audio, label in tfds.as_numpy(ds_train.take(500)):  # tomar solo 500 ejemplos para demo
    # Convertir el audio a numpy array y asegurar que es float32
    audio_np = audio.astype(np.float32)

    # Extraer características MFCC
    features = extract_features(audio_np)

    X_train.append(features)
    y_train.append(label)
X_train = np.array(X_train)
y_train = np.array(y_train)
print(f"Preparación completada. Dimensiones:")
print(f"X_train: {X_train.shape}")
print(f"y_train: {y_train.shape}")

Preparación completada. Dimensiones:
X_train: (500, 40, 20)
y_train: (500,)


In [None]:
# --- Definir y compilar el modelo CNN para audio ---
model_cnn = tf.keras.Sequential([
    tf.keras.layers.Conv1D(32, 3, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])),
    tf.keras.layers.MaxPooling1D(2),
    tf.keras.layers.Conv1D(64, 3, activation='relu'),
    tf.keras.layers.MaxPooling1D(2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(ds_info.features["label"].num_classes, activation='softmax')
])
model_cnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_cnn.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# --- Entrenamiento del modelo ---
model_cnn.fit(X_train, y_train, epochs=5, batch_size=32)

Epoch 1/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 28ms/step - accuracy: 0.3535 - loss: 38.1408
Epoch 2/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.4218 - loss: 14.9537
Epoch 3/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.4817 - loss: 6.9756
Epoch 4/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.5100 - loss: 3.9242
Epoch 5/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.5685 - loss: 3.7450


<keras.src.callbacks.history.History at 0x7de96b167350>

In [None]:
# --- Guardar el modelo Keras ---
keras_model_path = os.path.join(EXPORT_FOLDER, "audio_model.h5")
model_cnn.save(keras_model_path)



In [None]:
# --- Exportar a TensorFlow.js ---
import tensorflowjs as tfjs
tfjs.converters.save_keras_model(model_cnn, TFJS_FOLDER)
print("Modelo TF.js exportado en:", TFJS_FOLDER)



failed to lookup keras version from the file,
    this is likely a weight only file
Modelo TF.js exportado en: /content/carpeta_salida/tfjs_model


In [None]:
# --- Inicializar modelos de reconocimiento de voz y traducción ---
asr = pipeline("automatic-speech-recognition", model="openai/whisper-large-v2")
translator_pipeline = pipeline("translation", model="Helsinki-NLP/opus-mt-en-es")

Device set to use cpu
Device set to use cpu


In [None]:
# --- Funciones para transcripción, traducción y texto a voz ---
def transcribe_audio(file_path):
    result = asr(file_path)
    return result["text"]
def translate_text(text):
    return translator_pipeline(text)[0]['translation_text']
def text_to_speech(text, filename):
    tts = gTTS(text=text, lang='es')
    tts.save(filename)
    return filename
print("Colab: Preparación completa. Modelos guardados y listos para exportar.")

Colab: Preparación completa. Modelos guardados y listos para exportar.


In [16]:
import tensorflow as tf
import numpy as np
import librosa
import os
from sklearn.model_selection import train_test_split

# --- Configuración ---
SAMPLE_RATE = 16000
DURATION = 1  # 1 segundo
SAMPLES_PER_FILE = SAMPLE_RATE * DURATION

# --- Function to generate dummy audio data ---
def generate_dummy_audio(num_samples, sample_rate):
    """Generates a dummy audio signal with a simple sine wave."""
    t = np.linspace(0., DURATION, num_samples)
    amplitude = np.i0(10 * np.sin(2 * np.pi * 5 * t))  # Example: Modified Bessel function
    return amplitude.astype(np.float32)

# --- Simulation dataset with dummy data ---
num_examples = 100  # Number of dummy examples
num_classes = 5  # Number of dummy classes
X, y = [], []

for i in range(num_examples):
    # Generate dummy audio data
    dummy_audio = generate_dummy_audio(SAMPLES_PER_FILE, SAMPLE_RATE)
    X.append(dummy_audio)
    # Assign a random class label
    y.append(np.random.randint(0, num_classes))

X = np.array(X)
y = np.array(y)

# --- Prepare data ---
X = np.expand_dims(X, -1)  # [N, 16000, 1]
y = tf.keras.utils.to_categorical(y, num_classes=num_classes)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# --- Modelo CNN ---
model = tf.keras.Sequential([
    tf.keras.layers.Conv1D(16, 9, activation="relu", input_shape=(SAMPLES_PER_FILE, 1)),
    tf.keras.layers.MaxPooling1D(4),
    tf.keras.layers.Conv1D(32, 9, activation="relu"),
    tf.keras.layers.MaxPooling1D(4),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dense(num_classes, activation="softmax")
])

model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
model.fit(X_train, y_train, epochs=10, validation_data=(X_test,y_test))

# --- Guardar en formato TensorFlow.js ---
model.save("audio_model.h5")
!mkdir -p Conversor
!pip install tensorflowjs --quiet
!tensorflowjs_converter --input_format keras audio_model.h5 Conversor/

Epoch 1/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 457ms/step - accuracy: 0.2258 - loss: 1532.0175 - val_accuracy: 0.2000 - val_loss: 2071.4971
Epoch 2/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 281ms/step - accuracy: 0.2164 - loss: 1897.4124 - val_accuracy: 0.2000 - val_loss: 1106.0602
Epoch 3/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 312ms/step - accuracy: 0.1977 - loss: 931.4408 - val_accuracy: 0.2500 - val_loss: 314.3582
Epoch 4/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 492ms/step - accuracy: 0.1445 - loss: 301.1959 - val_accuracy: 0.1000 - val_loss: 117.2299
Epoch 5/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 418ms/step - accuracy: 0.2039 - loss: 87.2047 - val_accuracy: 0.2500 - val_loss: 39.7545
Epoch 6/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 322ms/step - accuracy: 0.1914 - loss: 49.7109 - val_accuracy: 0.2500 - val_loss: 21.4813
Epoch 7/10
[1m3



2025-08-20 17:20:48.453852: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755710448.735465   21261 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755710448.814056   21261 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1755710449.225539   21261 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1755710449.225648   21261 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1755710449.225658   21261 computation_placer.cc:177] computation placer alr