# Audio Preprocessor

In [None]:
from pathlib import Path
import sys

PROJECT_ROOT = Path().resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

import numpy as np
import pandas as pd
import librosa
from scipy.io import wavfile

from Code.audio import AudioPreproc, AudioPreprocConfig

run = 1
dataset = 4

input_dir_base = PROJECT_ROOT / "Database" / "data" / "audio"
output_dir_base = PROJECT_ROOT / "Database" / "tmp" / f"audioPreprocTry{run:02d}"
output_dir_base.mkdir(parents=True, exist_ok=True)

labels = ["contar", "proporcion", "salir"]

config = AudioPreprocConfig(
    target_sr=16000, t_sec=1.2,
    frame_ms=25.0, hop_ms=10.0,
    highpass_hz=40.0, hp_order=2,
    preemph_a=0.97,
    vad_thresh_db=-35.0, vad_win_ms=20.0,
    vad_min_ms=120.0, vad_expand_ms=60.0,
    norm_mode="rms", rms_target_dbfs=-20.0,
    peak_ref=0.98, max_gain_db=18.0,
    gate_dbfs=-60.0, pad_mode="edge",
)
pre = AudioPreproc(config)

y_proc, sr_proc, file_names, class_names = [], [], [], []

for cls in labels:
    input_dir = input_dir_base / cls
    output_dir = output_dir_base / cls
    output_dir.mkdir(parents=True, exist_ok=True)

    for audio_path in sorted(input_dir.glob("*.wav")):
        y, sr = pre.preprocesar_desde_path(audio_path)

        print(y.shape)

        # guardar WAV procesado (float32 -> int16)
        y_int16 = np.clip(y * 32767.0, -32768, 32767).astype(np.int16)
        out_path = output_dir / audio_path.name
        wavfile.write(out_path, sr, y_int16)

        y_proc.append(y)
        sr_proc.append(sr)
        file_names.append(audio_path.name)
        class_names.append(cls)

# Tabla de estadísticos por archivo
filas = []
for cls, fname, y, sr in zip(class_names, file_names, y_proc, sr_proc):
    filas.append({
        "Clase": cls,
        "Archivo": fname,
        "Duración (s)": len(y) / sr,
        "Mín": float(np.min(y)),
        "Máx": float(np.max(y)),
        "Energía RMS": float(np.sqrt(np.mean(y**2))),
    })

df = pd.DataFrame(filas)
df.to_csv(output_dir_base / "estadisticos.csv", index=False)
df  # muestra la tabla en el notebook


# Audio Features

In [None]:
from pathlib import Path
import sys

PROJECT_ROOT = Path().resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

import numpy as np
import pandas as pd
import librosa
from scipy.io import wavfile

from Code.audio import AudioPreproc, AudioPreprocConfig, AudioFeat, AudioFeatConfig

run = 2

input_dir_base = PROJECT_ROOT / "Database" / "data" / "audio"
output_dir_base = PROJECT_ROOT / "Database" / "tmp" / f"audioFeatTry{run:02d}"
output_dir_base.mkdir(parents=True, exist_ok=True)

labels = ["contar", "proporcion", "salir"]

pre_config = AudioPreprocConfig(
    target_sr=16000, t_sec=1.2,
    frame_ms=25.0, hop_ms=10.0,
    highpass_hz=40.0, hp_order=2,
    preemph_a=0.97,
    vad_thresh_db=-35.0, vad_win_ms=20.0,
    vad_min_ms=120.0, vad_expand_ms=60.0,
    norm_mode="rms", rms_target_dbfs=-20.0,
    peak_ref=0.98, max_gain_db=18.0,
    gate_dbfs=-60.0, pad_mode="edge",
)
pre = AudioPreproc(pre_config)

winn, hopp = pre.cfg.frame_ms*1e-3, pre.cfg.hop_ms*1e-3


feat_config = AudioFeatConfig(
    win=winn,
    hop=hopp,
    n_mfcc_no_c0=13,
    delta_order=1,
    add_rms=True,
    add_zcr=True,
    stats=("mean", "std", "p10", "p90"),
)
feat = AudioFeat(cfg=feat_config)
feature_names = feat.nombres_features()

y_proc, sr_proc, file_names, class_names = [], [], [], []
y_features = []

for cls in labels:
    input_dir = input_dir_base / cls
    output_dir = output_dir_base / cls
    output_dir.mkdir(parents=True, exist_ok=True)

    for audio_path in sorted(input_dir.glob("*.wav")):
        y, sr = pre.preprocesar_desde_path(audio_path)
        y_feat = feat.extraer_caracteristicas(y, pre_config.target_sr)

        y_proc.append(y)
        sr_proc.append(sr)
        y_features.append(y_feat)
        
        file_names.append(audio_path.name)
        class_names.append(cls)


# Tabla de estadísticos básicos por archivo (opcional)
stats_rows = []
for cls, fname, y, sr in zip(class_names, file_names, y_proc, sr_proc):
    stats_rows.append({
        "Clase": cls,
        "Archivo": fname,
        "Duración (s)": len(y) / sr,
        "Mín": float(np.min(y)),
        "Máx": float(np.max(y)),
        "Energía RMS": float(np.sqrt(np.mean(y**2))),
    })
df_stats = pd.DataFrame(stats_rows)
df_stats.to_csv(output_dir_base / "estadisticos.csv", index=False)

# Tabla de features completas por archivo
feat_rows = []
for cls, fname, vec in zip(class_names, file_names, y_features):
    row = {"Clase": cls, "Archivo": fname}
    row.update({name: float(val) for name, val in zip(feature_names, vec)})
    feat_rows.append(row)

df_feat = pd.DataFrame(feat_rows)
df_feat.to_csv(output_dir_base / "features.csv", index=False)

df_feat  # muestra la tabla de features en el notebook


# Audio Standarizer

In [None]:
from pathlib import Path
import sys

PROJECT_ROOT = Path().resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

import numpy as np
import pandas as pd
import librosa
from scipy.io import wavfile

from Code.audio import AudioPreproc, AudioPreprocConfig, AudioFeat, AudioFeatConfig
from Code.audio.Standardizer import Standardizer

run = 3

input_dir_base = PROJECT_ROOT / "Database" / "data" / "audio"
output_dir_base = PROJECT_ROOT / "Database" / "tmp" / f"audioSTDTry{run:02d}"
output_dir_base.mkdir(parents=True, exist_ok=True)

labels = ["contar", "proporcion", "salir"]

pre_config = AudioPreprocConfig(
    target_sr=16000, t_sec=1.2,
    frame_ms=25.0, hop_ms=10.0,
    highpass_hz=40.0, hp_order=2,
    preemph_a=0.97,
    vad_thresh_db=-35.0, vad_win_ms=20.0,
    vad_min_ms=120.0, vad_expand_ms=60.0,
    norm_mode="rms", rms_target_dbfs=-20.0,
    peak_ref=0.98, max_gain_db=18.0,
    gate_dbfs=-60.0, pad_mode="edge",
)
pre = AudioPreproc(pre_config)

winn, hopp = pre.cfg.frame_ms*1e-3, pre.cfg.hop_ms*1e-3

feat_config = AudioFeatConfig(
    win=winn,
    hop=hopp,
    n_mfcc_no_c0=13,
    delta_order=1,
    add_rms=True,
    add_zcr=True,
    stats=("mean", "std", "p10", "p90"),
)
feat = AudioFeat(cfg=feat_config)
feature_names = feat.nombres_features()

y_proc, sr_proc, file_names, class_names = [], [], [], []
y_features = []

for cls in labels:
    input_dir = input_dir_base / cls
    output_dir = output_dir_base / cls
    output_dir.mkdir(parents=True, exist_ok=True)

    for audio_path in sorted(input_dir.glob("*.wav")):
        y, sr = pre.preprocesar_desde_path(audio_path)
        y_feat = feat.extraer_caracteristicas(y, pre_config.target_sr)

        y_proc.append(y)
        sr_proc.append(sr)
        y_features.append(y_feat)
        
        file_names.append(audio_path.name)
        class_names.append(cls)


# Tabla de estadísticos básicos por archivo (opcional)
stats_rows = []
for cls, fname, y, sr in zip(class_names, file_names, y_proc, sr_proc):
    stats_rows.append({
        "Clase": cls,
        "Archivo": fname,
        "Duración (s)": len(y) / sr,
        "Mín": float(np.min(y)),
        "Máx": float(np.max(y)),
        "Energía RMS": float(np.sqrt(np.mean(y**2))),
    })
df_stats = pd.DataFrame(stats_rows)
df_stats.to_csv(output_dir_base / "estadisticos.csv", index=False)

# Tabla de features completas por archivo
feat_rows = []
for cls, fname, vec in zip(class_names, file_names, y_features):
    row = {"Clase": cls, "Archivo": fname}
    row.update({name: float(val) for name, val in zip(feature_names, vec)})
    feat_rows.append(row)

df_feat = pd.DataFrame(feat_rows)
df_feat.to_csv(output_dir_base / "features.csv", index=False)

df_feat  # muestra la tabla de features en el notebook

# Covarianza y reducción de dimensión sobre las features

X = np.stack(y_features).astype(np.float32)  # (N, D)
print(f"Dimensión de matriz de features X: {X.shape}")

stats = Standardizer().calculate_statistics(X)
X_std = stats.transform(X)

print(f"Dimensión de matriz X_std: {X_std.shape}")

cov = np.cov(X_std, rowvar=False)
eigvals, eigvecs = np.linalg.eigh(cov)
idx = np.argsort(eigvals)[::-1]
eigvals = eigvals[idx]
eigvecs = eigvecs[:, idx]

explained = eigvals / eigvals.sum()
cum_explained = np.cumsum(explained)
k_95 = int(np.searchsorted(cum_explained, 0.95) + 1)

explained *= 100.0
cum_explained *= 100.0

# Proyección en las k componentes que cubren ~95% de la varianza
X_proj = X_std @ eigvecs[:, :k_95]

cov_df = pd.DataFrame(cov, index=feature_names, columns=feature_names)
var_df = pd.DataFrame({
    'eigenvalue': eigvals,
    'Indice': idx,
    'Porcentaje': explained,
    'Porcentaje Acumulado': cum_explained,
})
loadings_df = pd.DataFrame(
    eigvecs[:, :k_95],
    index=feature_names,
    columns=[f'PC{i+1}' for i in range(k_95)],
)

display(var_df.head(20))
display(loadings_df.head(20))

# Guardar resultados en CSV en la misma carpeta de salida
cov_df.to_csv(output_dir_base / 'covariance.csv')
var_df.assign(pc=np.arange(1, len(eigvals)+1)).to_csv(output_dir_base / 'variance_explained.csv', index=False)
loadings_df.to_csv(output_dir_base / 'loadings.csv')
proj_rows = []

for fname, cls, vec in zip(file_names, class_names, X_proj):
    row = {'Archivo': fname, 'Clase': cls}
    row.update({f'PC{i+1}': float(val) for i, val in enumerate(vec)})
    proj_rows.append(row)

pd.DataFrame(proj_rows).to_csv(output_dir_base / 'projections.csv', index=False)


# Audio Complete Test

In [112]:
from pathlib import Path
import sys

PROJECT_ROOT = Path().resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

import numpy as np
import pandas as pd
import librosa
from scipy.io import wavfile
import sounddevice as sd
import soundfile as sf

from Code.audio import AudioPreproc, AudioPreprocConfig, AudioFeat, AudioFeatConfig
from Code.audio.Standardizer import Standardizer
from Code.audio import AudioOrchestrator


def infer_label_from_name(p: Path) -> str:
    name = p.stem.lower()  # 'contar001' -> 'contar001'
    if name.startswith("contar"):
        return "contar"
    elif name.startswith("proporcion"):
        return "proporcion"
    elif name.startswith("salir"):
        return "salir"
    else:
        raise ValueError(f"No sé qué clase es '{p.name}'")

def grabar_audio_segundos(segundos: float = 2.0) -> tuple[np.ndarray, int]:
    dev = sd.query_devices(kind='input')
    sr_rec = int(dev['default_samplerate'] or 16000)
    print(f"Grabando {segundos}s a {sr_rec} Hz (input device default)…")
    audio = sd.rec(int(segundos * sr_rec), samplerate=sr_rec, channels=1, dtype="float32")
    sd.wait()
    return audio.squeeze(), sr_rec

run = 2

input_dir = PROJECT_ROOT / "Database" / "data" / "audio"
output_dir = PROJECT_ROOT / "Database" / "tmp" / "audio" / f"audioTry{run:02d}"
output_dir.mkdir(parents=True, exist_ok=True)

model_number = 1
model_path = PROJECT_ROOT / "Database" / "models" / "audio" / f"modelo_audio_{model_number}.npz"

orch = AudioOrchestrator()

paths = sorted(input_dir.rglob("*.wav"))
labels = [p.parent.name.lower() for p in paths]

R_dic = orch.entrenar(paths=paths, labels=labels)
orch.guardar_modelo(model_path)


display(R_dic)

y, sr = grabar_audio_segundos()

resultado = orch.predecir_comando((y, sr), devolver_distancia=True)

print("Comando predicho:", resultado["label"])
print("Distancia mínima:", resultado["distancia_min"])
print("\n")

Xproj = orch._X_store_proj
labs = orch._y_store
preds = []
for i, (x, lab) in enumerate(zip(Xproj, labs)):
    pred = orch.knn.predecir(x, exclude_idx=i)
    preds.append(pred)
    if pred != lab:
        print("Falla en train idx", i, "real", lab, "pred", pred)
print("\nAcc LOO:", sum(p==l for p,l in zip(preds,labs)) / len(labs))

d = orch.knn.distancias(orch.stats.transform_one(orch.feat.extraer_caracteristicas(*orch._preprocesar_audio((y,sr)))) @ orch._eigvecs[:, :orch._k_used])
print("dist min:", d.min(), "dist medias por clase:")
import numpy as np
for c in set(labs):
    mask = labs == c
    print(c, d[mask].mean())



{'N': 33, 'D_raw': 168, 'D_proj': 22, 'var_retenida': 0.9548174186745153}

Grabando 2.0s a 44100 Hz (input device default)…
Comando predicho: proporcion
Distancia mínima: 0.4893041253089905


Falla en train idx 2 real contar pred proporcion
Falla en train idx 4 real contar pred proporcion
Falla en train idx 5 real contar pred proporcion
Falla en train idx 8 real contar pred proporcion
Falla en train idx 11 real proporcion pred salir
Falla en train idx 18 real proporcion pred contar
Falla en train idx 28 real salir pred contar
Falla en train idx 29 real salir pred contar

Acc LOO: 0.7575757575757576
dist min: 0.48930413 dist medias por clase:
proporcion 0.9324585
contar 1.0401521
salir 1.0597081
