In [1]:
import vozyaudio as vz
import numpy as np
import matplotlib.pyplot as plt
import subprocess
import os
import shutil

from vozyaudio import lee_audio, envolvente, track_pitch, espectro
from scipy.signal import resample, correlate, find_peaks

In [2]:
def normalizar(v):
    return (v - np.min(v)) / (np.max(v) - np.min(v) + 1e-9)

def autocorrelacion(x_frame):
    x_frame = x_frame - np.mean(x_frame)
    corr = correlate(x_frame, x_frame, mode='full')
    mid = len(corr) // 2
    return corr[mid:] / np.max(np.abs(corr) + 1e-9)

def detectar_ritmo(x_frame, fs, fmin=1.5, fmax=8):
    corr = autocorrelacion(x_frame)
    min_lag = int(fs / fmax)
    max_lag = int(fs / fmin)
    if max_lag >= len(corr): max_lag = len(corr) - 1
    if min_lag >= max_lag: return 0.5, corr  # Valor por defecto
    pico = np.argmax(corr[min_lag:max_lag]) + min_lag
    periodo_seg = pico / fs
    return periodo_seg, corr

In [None]:
# === CONFIGURACIÓN ===
AUDIO_PATH = 'audios/music.wav'
FPS = 25
FRAME_FOLDER = 'fotogramas'
VIDEO_PATH = 'output.mp4'
N_BARRAS = 60  # Número de barras del espectro



In [4]:
# === 1. Cargar audio ===
fs, x = lee_audio(AUDIO_PATH)
x = x.astype(np.float32)
dur = len(x) / fs
n_frames = int(FPS * dur)
samples_per_frame = int(fs / FPS)

# === 2. Descriptores ===
env = envolvente(x, fs=fs)
pitch = track_pitch(x, fs)
pitch = np.nan_to_num(pitch)


In [5]:
# 1. Autocorrelación sobre la envolvente
env_smooth = envolvente(x, fs=fs, tr=0.1)  # más estable
corr_env = autocorrelacion(env_smooth)

# 2. Estimar el tempo global
min_lag = int(fs / 5)    # máx 5 Hz = 300 BPM
max_lag = int(fs / 1.5)  # mín 1.5 Hz = 90 BPM
lag_beat = np.argmax(corr_env[min_lag:max_lag]) + min_lag
periodo_muestras = lag_beat

# 3. Encontrar los picos en la envolvente
peaks, _ = find_peaks(env_smooth, distance=periodo_muestras * 0.8)

# Convertir los picos (en muestras) a tiempos (en segundos) y luego a frames
beat_times = peaks / fs
beat_frames = (beat_times * FPS).astype(int)


In [6]:
env = normalizar(env)
pitch = normalizar(pitch)

# Redimensionar descriptores al número de frames
env_frame = np.interp(np.linspace(0, len(env), n_frames), np.arange(len(env)), env)
pitch_frame = np.interp(np.linspace(0, len(pitch), n_frames), np.arange(len(pitch)), pitch)


In [9]:
def es_beat(frame_index, beat_frames, tolerancia=2):
    return any(abs(frame_index - bf) <= tolerancia for bf in beat_frames)

def dibujar_flash(ax):
    ax.scatter(0.5, 0.5, s=1500, c='cyan', alpha=0.9, edgecolors='none', marker='o')

def obtener_frame_audio(x, i, samples_per_frame):
    start = i * samples_per_frame
    end = min(len(x), start + samples_per_frame)
    return x[start:end]

def dibujar_circulo_ritmico(ax, t_actual, periodo):
    ritmo_osc = 0.5 * (1 + np.sin(2 * np.pi * t_actual / periodo))
    color = (ritmo_osc, 0.2, 1 - ritmo_osc)
    size = 300 * ritmo_osc + 20
    ax.scatter(0.5, 0.5, s=size, c=[color], alpha=0.3)

def dibujar_barras(ax, X_resampled, N_BARRAS):
    bar_width = 1 / N_BARRAS
    for j in range(N_BARRAS):
        height = X_resampled[j]
        color = (0.1, 0.8 * height, 1.0)
        ax.bar(j * bar_width, height, width=bar_width*0.8, color=color, align='edge')

def dibujar_particula(ax, pitch, env):
    y_pos = pitch
    size = 100 + env * 300
    color = (1.0, env, pitch)
    ax.scatter(0.5, y_pos, s=size, c=[color], alpha=0.8)

def finalizar_figura(fig, ax, path):
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.axis('off')
    plt.tight_layout()
    plt.savefig(path)
    plt.close(fig)


In [11]:
def generar_frames(x, fs, pitch_frame, env_frame, beat_frames, n_frames, samples_per_frame, FPS, N_BARRAS, FRAME_FOLDER):
    print("Generando frames...")
    
    # Crear carpeta
    os.makedirs(FRAME_FOLDER, exist_ok=True)
    
    for i in range(n_frames):
        porcentaje = (i / n_frames) * 100

        print(f"\rCompletado {porcentaje:.2f} %", end="", flush=True)
        fig, ax = plt.subplots(figsize=(8, 6))
        ax.set_facecolor((0, 0, 0))
        
        if es_beat(i, beat_frames):
            dibujar_flash(ax)
        
        x_frame = obtener_frame_audio(x, i, samples_per_frame)
        t_actual = i / FPS
        periodo, _ = detectar_ritmo(x_frame, fs)
        dibujar_circulo_ritmico(ax, t_actual, periodo)

        X, _ = espectro(x_frame, modo=1, fs=fs)
        X_resampled = normalizar(resample(X, N_BARRAS))
        dibujar_barras(ax, X_resampled, N_BARRAS)

        dibujar_particula(ax, pitch_frame[i], env_frame[i])
        
        finalizar_figura(fig, ax, f"{FRAME_FOLDER}/frame_{i:04d}.png")
    print("\nFrames generados.")


In [None]:
# === 3. Generar Frames ===
print("Generando frames...")
for i in range(n_frames):
    porcentaje = (i / n_frames) * 100
    print(f"\rCompletado {porcentaje:.2f} %", end="", flush=True)
        
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.set_facecolor((0, 0, 0))  # Fondo negro
    
    # Flash más visible en beat
    if any(abs(i - bf) <= 2 for bf in beat_frames):  # mayor tolerancia
        ax.scatter(0.5, 0.5, s=1500, c='cyan', alpha=0.9, edgecolors='none', marker='o')

    # ==== 3.1 Obtener trozo de señal actual ====
    start = i * samples_per_frame
    end = min(len(x), start + samples_per_frame)
    x_frame = x[start:end]
    
    # Detectar ritmo
    periodo, _ = detectar_ritmo(x_frame, fs)
    t_actual = i / FPS
    ritmo_osc = 0.5 * (1 + np.sin(2 * np.pi * t_actual / periodo))  # 0..1

    # Efecto visual rítmico: círculo que late en el centro
    ritmo_color = (ritmo_osc, 0.2, 1 - ritmo_osc)
    ritmo_size = 300 * ritmo_osc + 20
    ax.scatter(0.5, 0.5, s=ritmo_size, c=[ritmo_color], alpha=0.3)
    
    # ==== 3.2 Espectro (resample a N barras) ====
    X, fa = espectro(x_frame, modo=1, fs=fs)
    X_resampled = resample(X, N_BARRAS)
    X_resampled = normalizar(X_resampled)

    # ==== 3.3 Dibujar barras ====
    bar_width = 1 / N_BARRAS
    for j in range(N_BARRAS):
        height = X_resampled[j]
        ax.bar(j * bar_width, height, width=bar_width*0.8, color=(0.1, 0.8*height, 1.0), align='edge')

    # ==== 3.4 Dibujar partícula ====
    y_pos = pitch_frame[i]
    size = 100 + env_frame[i] * 300
    color = (1.0, env_frame[i], pitch_frame[i])
    ax.scatter(0.5, y_pos, s=size, c=[color], alpha=0.8)

    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.axis('off')
    plt.tight_layout()
    plt.savefig(f"{FRAME_FOLDER}/frame_{i:04d}.png")
    plt.close(fig)

print("Frames generados.")


In [None]:
# Generar Video
try:
    subprocess.run(['generarVideo.bat', AUDIO_PATH], check=True)
except subprocess.CalledProcessError as e:
    print("Error al ejecutar generarVideo.bat:", e)
finally:
    # shutil.rmtree('fotogramas/')

    print('Procesado terminado.')