In [131]:
# ===========================================
# Script completo para extraer piano con Demucs
# ===========================================

import os
import subprocess
from pydub import AudioSegment

# ==========================
# 1. Configuraci√≥n de rutas
# ==========================

# Ruta al archivo MP3 de entrada
input_mp3 = r"D:\transcripcion_taller\audio_input\queen.mp3"

# Carpeta de salida de Demucs
output_folder = r"D:\transcripcion_taller\demucs_output"

# Ruta a ffmpeg
ffmpeg_path = r"C:\ffmpeg\bin\ffmpeg.exe"  # Ajusta seg√∫n tu instalaci√≥n

# Archivo WAV temporal (para evitar problemas de MP3 en pydub)
wav_temp = os.path.join(os.path.dirname(input_mp3), "queen_temp.wav")

# Archivo final de piano
piano_mp3 = os.path.join(os.path.dirname(input_mp3), "piano.mp3")

# ==========================
# 2. Convertir MP3 -> WAV
# ==========================
print("Convirtiendo MP3 a WAV...")
subprocess.run([ffmpeg_path, "-i", input_mp3, wav_temp], check=True)

# ==========================
# 3. Ejecutar HTDemucs solo piano
# ==========================
print("Extrayendo piano con HTDemucs...")

# Nombre base del archivo sin extensi√≥n
song_name = os.path.splitext(os.path.basename(input_mp3))[0]

# Comando Demucs
demucs_cmd = [
    r"C:\Users\USUARIO\anaconda3\Scripts\demucs.exe",
    "-n", "htdemucs",
    "--stems", "piano",
    "-o", output_folder,
    wav_temp
]

# Ejecutar Demucs
subprocess.run(demucs_cmd, check=True)

# ==========================
# 4. Buscar piano.wav generado
# ==========================
piano_wav = os.path.join(output_folder, "htdemucs", song_name, "piano.wav")

if not os.path.exists(piano_wav):
    raise FileNotFoundError("No se encontr√≥ piano.wav generado por Demucs.")

# ==========================
# 5. Convertir WAV -> MP3 final
# ==========================
print("Convirtiendo piano a MP3...")
AudioSegment.converter = ffmpeg_path  # forzar ffmpeg
audio = AudioSegment.from_wav(piano_wav)
audio.export(piano_mp3, format="mp3")

print("‚úî Piano extra√≠do correctamente:", piano_mp3)

# ==========================
# 6. Limpiar archivo temporal si quer√©s
# ==========================
# os.remove(wav_temp)


Convirtiendo MP3 a WAV...
Extrayendo piano con HTDemucs...


FileNotFoundError: [WinError 2] El sistema no puede encontrar el archivo especificado

In [29]:
import os

input_file = r"D:\transcripcion_taller\audio_input\queen.mp3"

# Verificamos si el archivo existe
print("Existe el archivo?", os.path.exists(input_file))
print("Archivos en la carpeta:", os.listdir(r"D:\transcripcion_taller"))


Existe el archivo? True
Archivos en la carpeta: ['.git', '.gitignore', '.ipynb_checkpoints', 'audio_input', 'demucs_output', 'taller.ipynb', 'Untitled.ipynb']


### DEMUCS_6S CON GUITAR POR ALGUNA RAZ√ìN EXTRAE PIANO

In [37]:
from pydub import AudioSegment
import numpy as np
import torch
from demucs.apply import apply_model
from demucs.pretrained import get_model

# ------------------------------
# 1Ô∏è‚É£ Cargar modelo HTDemucs 6 stems
# ------------------------------
model_name = 'htdemucs_6s'  # 6 stems incluye piano
model = get_model(model_name)
model.eval()

# ------------------------------
# 2Ô∏è‚É£ Cargar WAV con pydub
# ------------------------------
AudioSegment.converter = r"C:\ffmpeg\bin\ffmpeg.exe"  # Ajusta seg√∫n tu instalaci√≥n
wav_path = r"D:\transcripcion_taller\audio_input\faded_corto.wav"
audio_seg = AudioSegment.from_wav(wav_path)

# ------------------------------
# 3Ô∏è‚É£ Convertir a numpy float32
# ------------------------------
samples = np.array(audio_seg.get_array_of_samples()).astype(np.float32)
samples /= 32768.0  # normalizar a [-1,1]

# Stereo
if audio_seg.channels == 2:
    samples = samples.reshape((-1, 2)).T  # [channels, tiempo]
else:
    samples = samples[np.newaxis, :]      # [1, tiempo]

# ------------------------------
# 4Ô∏è‚É£ Convertir a tensor y agregar batch
# ------------------------------
waveform = torch.from_numpy(samples)
if waveform.ndim == 2:
    waveform = waveform.unsqueeze(0)  # [1, channels, samples]

# ------------------------------
# 5Ô∏è‚É£ Aplicar modelo
# ------------------------------
with torch.no_grad():
    sources = apply_model(model, waveform, device='cpu')

# ------------------------------
# 6Ô∏è‚É£ Extraer stem de guitarra (cambiar √≠ndice seg√∫n necesites)
# ------------------------------
# HTDemucs_6s stem indices: [0:drums, 1:bass, 2:other, 3:vocals, 4:guitar, 5:piano]
stem_index = 4  # 4=guitar, 5=piano, 3=vocals
instrument_tensor = sources[:, stem_index, :, :]  # [batch, channels, samples]
instrument_tensor = instrument_tensor[0]      # tomar primer batch
channels = instrument_tensor.shape[0]

# ------------------------------
# 7Ô∏è‚É£ Convertir a numpy y guardar MP3
# ------------------------------
samples_np = instrument_tensor.numpy()

# ‚úÖ CR√çTICO: Convertir float32 [-1,1] a int16 [-32768, 32767]
samples_np = np.clip(samples_np, -1.0, 1.0)  # Asegurar rango v√°lido
samples_np = (samples_np * 32767.0).astype(np.int16)

if channels == 2:
    samples_np = samples_np.T  # pydub espera [samples, channels]

# Usar m√©todo flatten para asegurar formato correcto
audio_bytes = samples_np.flatten().tobytes()

audio_out = AudioSegment(
    audio_bytes,
    frame_rate=audio_seg.frame_rate,
    sample_width=2,  # ‚úÖ int16 = 2 bytes
    channels=channels
)
output_file = r"D:\transcripcion_taller\demucs_output\faded_guitar.mp3"  # Cambiar seg√∫n stem_index
audio_out.export(output_file, format="mp3", bitrate="320k")
print(f"‚úî Instrumento extra√≠do: {output_file}")
print(f"Rango de valores: min={instrument_tensor.min():.4f}, max={instrument_tensor.max():.4f}")


‚úî Instrumento extra√≠do: D:\transcripcion_taller\demucs_output\faded_guitar.mp3
Rango de valores: min=-0.2479, max=0.3247


In [16]:
# üîç DIAGN√ìSTICO: Verificar qu√© hay en el tensor de piano
print("Shape de sources:", sources.shape)
print("√çndices de stems en htdemucs_6s:")
print("  0: drums, 1: bass, 2: other, 3: vocals, 4: guitar, 5: piano")
print("\nRango de valores por stem:")
for i in range(sources.shape[1]):
    stem = sources[0, i, :, :]
    print(f"  Stem {i}: min={stem.min():.4f}, max={stem.max():.4f}, mean={stem.abs().mean():.4f}")

Shape de sources: torch.Size([1, 6, 2, 656384])
√çndices de stems en htdemucs_6s:
  0: drums, 1: bass, 2: other, 3: vocals, 4: guitar, 5: piano

Rango de valores por stem:
  Stem 0: min=-0.0003, max=0.0003, mean=0.0000
  Stem 1: min=-0.1039, max=0.1003, mean=0.0023
  Stem 2: min=-0.0001, max=0.0001, mean=0.0000
  Stem 3: min=-0.1903, max=0.1607, mean=0.0220
  Stem 4: min=-0.2513, max=0.3030, mean=0.0214
  Stem 5: min=-0.0014, max=0.0014, mean=0.0000


### DEMUCS PARA OTHERS, SUENA BIEN CON DONT STOP ME NOW

In [35]:
from pydub import AudioSegment
import numpy as np
import torch
from demucs.apply import apply_model
from demucs.pretrained import get_model

# Cargar modelo htdemucs est√°ndar (4 stems: drums, bass, other, vocals)
model = get_model('htdemucs')
model.eval()

AudioSegment.converter = r"C:\ffmpeg\bin\ffmpeg.exe"
audio_seg = AudioSegment.from_wav(wav_path)

# Convertir a tensor
samples = np.array(audio_seg.get_array_of_samples()).astype(np.float32)
samples /= 32768.0
if audio_seg.channels == 2:
    samples = samples.reshape((-1, 2)).T
else:
    samples = samples[np.newaxis, :]

if waveform.ndim == 2:
    waveform = waveform.unsqueeze(0)

# Aplicar modelo
with torch.no_grad():
    sources = apply_model(model, waveform, device='cpu')

# Extraer "other" (contiene piano + guitarra + sintetizadores)
# htdemucs stems: [0:drums, 1:bass, 2:other, 3:vocals]
other_tensor = sources[:, 2, :, :]
other_tensor = other_tensor[0]
channels = other_tensor.shape[0]

# Convertir y guardar
samples_np = other_tensor.numpy()
samples_np = np.clip(samples_np, -1.0, 1.0)
samples_np = (samples_np * 32767.0).astype(np.int16)

if channels == 2:
    samples_np = samples_np.T

audio_bytes = samples_np.flatten().tobytes()
audio_out = AudioSegment(audio_bytes, frame_rate=audio_seg.frame_rate, sample_width=2, channels=channels)
audio_out.export(r"D:\transcripcion_taller\demucs_output\faded_piano_other.mp3", format="mp3", bitrate="320k")
print("‚úî Piano+instrumentos extra√≠do (htdemucs est√°ndar)")
print(f"Rango: min={other_tensor.min():.4f}, max={other_tensor.max():.4f}")

‚úî Piano+instrumentos extra√≠do (htdemucs est√°ndar)
Rango: min=-0.7459, max=0.6340
