In [1]:
import os
import glob
import torch
import torchaudio
from tqdm import tqdm
from transformers import AutoProcessor, SeamlessM4Tv2ForSpeechToSpeech

# Set device (CPU/GPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Set up directories
base_dir = os.path.dirname(os.getcwd())
uploads_dir = os.path.join(os.getcwd(), "uploads")
data_dir = os.path.join(os.getcwd(), "data")
translations_dir = os.path.join(uploads_dir, "translations")

# Create the data and translated directories if they don't exist
os.makedirs(data_dir, exist_ok=True)
os.makedirs(translations_dir, exist_ok=True)

print(f"Looking for MP3 files in: {uploads_dir}")

  from .autonotebook import tqdm as notebook_tqdm


Looking for MP3 files in: c:\Users\Adria\dev\blindwikiapp\notebooks\uploads


In [2]:
# Cargar el modelo y el procesador
model_name = "facebook/seamless-m4t-v2-large"
processor = AutoProcessor.from_pretrained(model_name)
model = SeamlessM4Tv2ForSpeechToSpeech.from_pretrained(model_name).to(device)

Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.56it/s]


In [3]:
def translate_audio(file_path, target_lang="eng"):
    """
    Traduce un archivo de audio usando el modelo SeamlessM4T.
    Args:
        file_path (str): Ruta al archivo de audio
        target_lang (str): Código del idioma de destino (por defecto: 'eng')
    Returns:
        str: Ruta al archivo de audio traducido
    """
    try:
        # Cargar archivo de audio
        waveform, sample_rate = torchaudio.load(file_path)
       
        # Resamplear a 16000 Hz si es necesario
        if sample_rate != 16000:
            resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
            waveform = resampler(waveform)
       
        # Convertir array a tensor de torch
        audio_tensor = waveform.to(device)
        
        # Preparar entrada para el modelo
        inputs = processor(
            audios=audio_tensor,
            sampling_rate=16000,
            return_tensors="pt"
        ).to(device)
        
        # Generar traducción de audio
        with torch.no_grad():
            outputs = model.generate(**inputs, tgt_lang=target_lang, speaker_id=0)
            translated_audio = outputs[0].cpu().numpy().squeeze()
        
        # Guardar el audio traducido
        translated_file_name = os.path.basename(file_path).replace(".mp3", f"_translated_{target_lang}.mp3")
        translated_file_path = os.path.join(translations_dir, translated_file_name)
        torchaudio.save(translated_file_path, torch.tensor(translated_audio).unsqueeze(0), sample_rate=16000)
        
        return translated_file_path
    
    except Exception as e:
        print(f"Error procesando {file_path}: {str(e)}")
        return None

In [4]:

# Encontrar todos los archivos MP3 en el directorio de uploads
mp3_files = glob.glob(os.path.join(uploads_dir, "*.mp3"))
print(f"Se encontraron {len(mp3_files)} archivos MP3 para procesar")

# Procesar cada archivo
for file_path in tqdm(mp3_files):
    file_name = os.path.basename(file_path)
    print(f"Procesando: {file_name}")
    
    translated_file_path = translate_audio(file_path)
    
    # Imprimir progreso
    if translated_file_path:
        print(f"Audio traducido guardado en: {translated_file_path}")
    else:
        print("Traducción fallida.")
    print("-" * 80)

Se encontraron 8 archivos MP3 para procesar


  0%|          | 0/8 [00:00<?, ?it/s]

Procesando: acoruna_barco_m67780_a84524_audio_converted.mp3


 12%|█▎        | 1/8 [00:11<01:17, 11.08s/it]

Audio traducido guardado en: c:\Users\Adria\dev\blindwikiapp\notebooks\uploads\translations\acoruna_barco_m67780_a84524_audio_converted_translated_eng.mp3
--------------------------------------------------------------------------------
Procesando: barcelona_alexdobano_m68284_a85028_audio_converted.mp3


 25%|██▌       | 2/8 [00:37<02:01, 20.20s/it]

Audio traducido guardado en: c:\Users\Adria\dev\blindwikiapp\notebooks\uploads\translations\barcelona_alexdobano_m68284_a85028_audio_converted_translated_eng.mp3
--------------------------------------------------------------------------------
Procesando: barcelona_JuanNuez_m70566_a87310_audio_converted.mp3


 38%|███▊      | 3/8 [00:45<01:11, 14.34s/it]

Audio traducido guardado en: c:\Users\Adria\dev\blindwikiapp\notebooks\uploads\translations\barcelona_JuanNuez_m70566_a87310_audio_converted_translated_eng.mp3
--------------------------------------------------------------------------------
Procesando: barcelona_Ovvero_m68255_a84999_audio_converted.mp3


 50%|█████     | 4/8 [01:00<00:59, 14.79s/it]

Audio traducido guardado en: c:\Users\Adria\dev\blindwikiapp\notebooks\uploads\translations\barcelona_Ovvero_m68255_a84999_audio_converted_translated_eng.mp3
--------------------------------------------------------------------------------
Procesando: berlin_Dirk_m67384_a84128_audio_converted.mp3


 62%|██████▎   | 5/8 [01:28<00:58, 19.60s/it]

Audio traducido guardado en: c:\Users\Adria\dev\blindwikiapp\notebooks\uploads\translations\berlin_Dirk_m67384_a84128_audio_converted_translated_eng.mp3
--------------------------------------------------------------------------------
Procesando: cuenca_Amalia_m69879_a86623_audio_converted.mp3


 75%|███████▌  | 6/8 [02:02<00:49, 24.60s/it]

Audio traducido guardado en: c:\Users\Adria\dev\blindwikiapp\notebooks\uploads\translations\cuenca_Amalia_m69879_a86623_audio_converted_translated_eng.mp3
--------------------------------------------------------------------------------
Procesando: elbarcelonC3A8s_Martagosa_m71623_a88367_audio_converted.mp3


 88%|████████▊ | 7/8 [02:31<00:25, 25.82s/it]

Audio traducido guardado en: c:\Users\Adria\dev\blindwikiapp\notebooks\uploads\translations\elbarcelonC3A8s_Martagosa_m71623_a88367_audio_converted_translated_eng.mp3
--------------------------------------------------------------------------------
Procesando: elbarcelonC3A8s_Neussg_m71656_a88400_audio_converted.mp3


100%|██████████| 8/8 [05:19<00:00, 39.97s/it]

Audio traducido guardado en: c:\Users\Adria\dev\blindwikiapp\notebooks\uploads\translations\elbarcelonC3A8s_Neussg_m71656_a88400_audio_converted_translated_eng.mp3
--------------------------------------------------------------------------------



