# Audio Preprocessor

In [None]:
# %pdb on

from pathlib import Path
import sys

PROJECT_ROOT = Path().resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
	sys.path.insert(0, str(PROJECT_ROOT))

import numpy as np
import pandas as pd
import librosa
from scipy.io import wavfile

from Code.audio import AudioPreproc, AudioPreprocConfig

run = 1
dataset = 4

input_dir_base = PROJECT_ROOT / "Database" / "data" / "audio"
output_dir_base = PROJECT_ROOT / "Database" / "tmp" / f"audioPreprocTry{run:02d}"
output_dir_base.mkdir(parents=True, exist_ok=True)

labels = ["contar", "proporcion", "salir"]

config = AudioPreprocConfig(
	target_sr=16e3,
	T_sec=1.2,
	frame_ms=25.,
	hop_ms=10.,
	corte_pasaalto=80.,
	orden_pasaalto=4,
	coeficiente_pre_enfasis=0.97,
	norm_mode="RMS",
	rms_target_dbfs=-20.0,
	peak_ref=0.98,
	max_gain_db=18.,
	gate_dbfs=-60.,
	pad_mode="edge",
)
pre = AudioPreproc(config)

y_proc, sr_proc, file_names, class_names = [], [], [], []

for cls in labels:
	input_dir = input_dir_base / cls
	output_dir = output_dir_base / cls
	output_dir.mkdir(parents=True, exist_ok=True)

	for p in sorted(input_dir.glob("*.wav")):
		y, sr = pre.procesar(p)

		y_proc.append(y)
		sr_proc.append(sr)
		file_names.append(p.name)
		class_names.append(cls)

# Tabla de estadísticos por archivo
filas = []
for cls, fname, y, sr in zip(class_names, file_names, y_proc, sr_proc):
	filas.append({
		"Clase": cls,
		"Archivo": fname,
		"Duración (s)": len(y) / sr,
		"Mín": float(np.min(y)),
		"Máx": float(np.max(y)),
		"Energía RMS": float(np.sqrt(np.mean(y**2))),
		"shape": y.shape,
	})

df = pd.DataFrame(filas)
df.to_csv(output_dir_base / "estadisticos.csv", index=False)
display(df)  # muestra la tabla en el notebook


# Audio Features

In [None]:
from pathlib import Path
import sys

PROJECT_ROOT = Path().resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
	sys.path.insert(0, str(PROJECT_ROOT))

import numpy as np
import pandas as pd
import librosa
from scipy.io import wavfile

from Code.audio import AudioPreproc, AudioPreprocConfig, AudioFeat, AudioFeatConfig

run = 2

input_dir_base = PROJECT_ROOT / "Database" / "data" / "audio"
output_dir_base = PROJECT_ROOT / "Database" / "tmp" / f"audioFeatTry{run:02d}"
output_dir_base.mkdir(parents=True, exist_ok=True)

config = AudioPreprocConfig(
	target_sr=16e3,
	T_sec=1.2,
	frame_ms=25.,
	hop_ms=10.,
	corte_pasaalto=80.,
	orden_pasaalto=4,
	coeficiente_pre_enfasis=0.97,
	norm_mode="RMS",
	rms_target_dbfs=-20.0,
	peak_ref=0.98,
	max_gain_db=18.,
	gate_dbfs=-60.,
	pad_mode="edge",
)

pre = AudioPreproc(config)


feat_config = AudioFeatConfig(
	sr_target=16000.0,
	win_ms=25.0,
	hop_ms = 10.0,
	N_MFCC = 13,
	delta_order = 1,
	RMS = True,
	ZCR = True,
)

feat = AudioFeat(config=feat_config)

feature_names = feat.nombres_features()

y_proc, sr_proc, file_names, class_names = [], [], [], []
y_features = []

for cls in labels:
	input_dir = input_dir_base / cls
	output_dir = output_dir_base / cls
	output_dir.mkdir(parents=True, exist_ok=True)

	for p in sorted(input_dir.glob("*.wav")):
		y, sr = pre.procesar(p)

		y_feat = feat.extraer_caracteristicas(y, sr)

		y_proc.append(y)
		sr_proc.append(sr)
		y_features.append(y_feat)


		file_names.append(p.name)
		class_names.append(cls)

# Tabla de estadísticos por archivo
filas = []
for cls, fname, y, sr in zip(class_names, file_names, y_proc, sr_proc):
	filas.append({
		"Clase": cls,
		"Archivo": fname,
		"Duración (s)": len(y) / sr,
		"Mín": float(np.min(y)),
		"Máx": float(np.max(y)),
		"Energía RMS": float(np.sqrt(np.mean(y**2))),
		"shape": y.shape,
	})

df = pd.DataFrame(filas)
df.to_csv(output_dir_base / "estadisticos.csv", index=False)
display(df)  # muestra la tabla en el notebook


# Tabla de features completas por archivo
feat_rows = []
for cls, fname, vec in zip(class_names, file_names, y_features):
	row = {"Clase": cls, "Archivo": fname}
	row.update({name: float(val) for name, val in zip(feature_names, vec)})
	feat_rows.append(row)

df_feat = pd.DataFrame(feat_rows)
df_feat.to_csv(output_dir_base / "features.csv", index=False)

df_feat  # muestra la tabla de features en el notebook


# Audio Standarizer

In [4]:
from pathlib import Path
import sys

PROJECT_ROOT = Path().resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
	sys.path.insert(0, str(PROJECT_ROOT))

import numpy as np
import pandas as pd
import librosa
from scipy.io import wavfile

from Code.audio import AudioPreproc, AudioPreprocConfig, AudioFeat, AudioFeatConfig
from Code.audio.Standardizer import Standardizer

run = 3

input_dir_base = PROJECT_ROOT / "Database" / "data" / "audio"
output_dir_base = PROJECT_ROOT / "Database" / "tmp" / "audio" / f"audioSTDTry{run:02d}"
output_dir_base.mkdir(parents=True, exist_ok=True)

labels = ["contar", "proporcion", "salir"]

config = AudioPreprocConfig(
	target_sr=16e3,
	T_sec=1.2,
	frame_ms=25.,
	hop_ms=10.,
	corte_pasaalto=80.,
	orden_pasaalto=4,
	coeficiente_pre_enfasis=0.97,
	norm_mode="RMS",
	rms_target_dbfs=-20.0,
	peak_ref=0.98,
	max_gain_db=18.,
	gate_dbfs=-60.,
	pad_mode="edge",
)

pre = AudioPreproc(config)


feat_config = AudioFeatConfig(
	sr_target=16000.0,
	win_ms=25.0,
	hop_ms = 10.0,
	N_MFCC = 13,
	delta_order = 1,
	RMS = True,
	ZCR = True,
)

feat = AudioFeat(config=feat_config)

feature_names = feat.nombres_features()

y_proc, sr_proc, file_names, class_names = [], [], [], []
y_features = []

for cls in labels:
	input_dir = input_dir_base / cls
	output_dir = output_dir_base / cls
	output_dir.mkdir(parents=True, exist_ok=True)

	for p in sorted(input_dir.glob("*.wav")):
		y, sr = pre.procesar(p)

		y_feat = feat.extraer_caracteristicas(y, sr)

		y_proc.append(y)
		sr_proc.append(sr)
		y_features.append(y_feat)


		file_names.append(p.name)
		class_names.append(cls)

# Tabla de estadísticos por archivo
filas = []
for cls, fname, y, sr in zip(class_names, file_names, y_proc, sr_proc):
	filas.append({
		"Clase": cls,
		"Archivo": fname,
		"Duración (s)": len(y) / sr,
		"Mín": float(np.min(y)),
		"Máx": float(np.max(y)),
		"Energía RMS": float(np.sqrt(np.mean(y**2))),
		"shape": y.shape,
	})

df = pd.DataFrame(filas)
df.to_csv(output_dir_base / "estadisticos.csv", index=False)
display(df)  # muestra la tabla en el notebook


# Tabla de features completas por archivo
feat_rows = []
for cls, fname, vec in zip(class_names, file_names, y_features):
	row = {"Clase": cls, "Archivo": fname}
	row.update({name: float(val) for name, val in zip(feature_names, vec)})
	feat_rows.append(row)

df_feat = pd.DataFrame(feat_rows)
df_feat.to_csv(output_dir_base / "features.csv", index=False)

df_feat  # muestra la tabla de features en el notebook

# Covarianza y reducción de dimensión sobre las features

X = np.stack(y_features).astype(np.float32)  # (N, D)
print(f"Dimensión de matriz de features X: {X.shape}")

stats = Standardizer().calculate_statistics(X)
X_std = stats.transform(X)

print(f"Dimensión de matriz X_std: {X_std.shape}")

cov = np.cov(X_std, rowvar=False)
eigvals, eigvecs = np.linalg.eigh(cov)
idx = np.argsort(eigvals)[::-1]
eigvals = eigvals[idx]
eigvecs = eigvecs[:, idx]

explained = eigvals / eigvals.sum()
cum_explained = np.cumsum(explained)
k_95 = int(np.searchsorted(cum_explained, 0.95) + 1)

explained *= 100.0
cum_explained *= 100.0

# Proyección en las k componentes que cubren ~95% de la varianza
X_proj = X_std @ eigvecs[:, :k_95]

cov_df = pd.DataFrame(cov, index=feature_names, columns=feature_names)
var_df = pd.DataFrame({
	'eigenvalue': eigvals,
	'Indice': idx,
	'Porcentaje': explained,
	'Porcentaje Acumulado': cum_explained,
})
loadings_df = pd.DataFrame(
	eigvecs[:, :k_95],
	index=feature_names,
	columns=[f'PC{i+1}' for i in range(k_95)],
)

display(var_df.head(20))
display(loadings_df.head(20))

# Guardar resultados en CSV en la misma carpeta de salida
cov_df.to_csv(output_dir_base / 'covariance.csv')
var_df.assign(pc=np.arange(1, len(eigvals)+1)).to_csv(output_dir_base / 'variance_explained.csv', index=False)
loadings_df.to_csv(output_dir_base / 'loadings.csv')
proj_rows = []

for fname, cls, vec in zip(file_names, class_names, X_proj):
	row = {'Archivo': fname, 'Clase': cls}
	row.update({f'PC{i+1}': float(val) for i, val in enumerate(vec)})
	proj_rows.append(row)

pd.DataFrame(proj_rows).to_csv(output_dir_base / 'projections.csv', index=False)

pc_names = [f'PC{i+1}' for i in range(len(eigvals))]
top_feat_idx = np.argmax(np.abs(eigvecs), axis=0)          # por columna (PC)
top_feat_name = [feature_names[i] for i in top_feat_idx]
top_loading = [eigvecs[top_feat_idx[i], i] for i in range(len(eigvals))]

var_df_dominant = pd.DataFrame({
    'PC': pc_names,
    'eigenvalue': eigvals,
    'Indice': idx,                      # índice original antes de ordenar, si lo quieres
    'Porcentaje': explained,
    'Porcentaje Acumulado': cum_explained,
    'Feature_dom': top_feat_name,       # estadístico con mayor peso
    'Loading_dom': top_loading,         # su carga (para ver signo/magnitud)
})
display(var_df_dominant)
var_df_dominant.to_csv(output_dir_base / 'variance_explained_with_dominant_features.csv', index=False)

Unnamed: 0,Clase,Archivo,Duración (s),Mín,Máx,Energía RMS,shape
0,contar,Contar01.wav,1.2,-0.801005,1.0,0.099148,"(19200,)"
1,contar,Contar02.wav,1.2,-1.0,1.0,0.095919,"(19200,)"
2,contar,Contar03.wav,1.2,-0.869037,1.0,0.066664,"(19200,)"
3,contar,Contar04.wav,1.2,-0.870248,1.0,0.084461,"(19200,)"
4,contar,Contar05.wav,1.2,-0.441218,0.681352,0.074722,"(19200,)"
5,contar,Contar06.wav,1.2,-0.662514,0.61216,0.074167,"(19200,)"
6,contar,Contar07.wav,1.2,-1.0,1.0,0.107732,"(19200,)"
7,contar,Contar08.wav,1.2,-1.0,1.0,0.097454,"(19200,)"
8,contar,Contar09.wav,1.2,-1.0,1.0,0.09118,"(19200,)"
9,contar,Contar10.wav,1.2,-0.770015,0.901862,0.081734,"(19200,)"


Dimensión de matriz de features X: (33, 112)
Dimensión de matriz X_std: (33, 112)


Unnamed: 0,eigenvalue,Indice,Porcentaje,Porcentaje Acumulado
0,31.656568,111,27.408284,27.408284
1,12.55394,110,10.869212,38.277496
2,10.783134,109,9.336047,47.613543
3,8.221437,108,7.118128,54.731671
4,6.50262,107,5.629974,60.361645
5,5.609836,106,4.857001,65.218646
6,5.0396,105,4.36329,69.581936
7,4.005898,104,3.46831,73.050246
8,3.885357,103,3.363946,76.414191
9,3.459142,102,2.994928,79.40912


Unnamed: 0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10,PC11,PC12,PC13,PC14,PC15,PC16,PC17,PC18,PC19,PC20
MFCC 1_mean,-0.037832,0.047853,-0.086152,0.256518,-0.116924,-0.097264,0.04278,-0.156261,0.049922,-0.034484,0.030643,-0.046044,0.073893,-0.046998,0.056097,-0.113221,0.001486,0.024901,-0.017777,0.030131
MFCC 2_mean,0.113476,0.176217,-0.038307,-0.001579,0.016433,-0.075578,0.074283,-0.028048,-0.108352,0.001402,-0.070845,-0.006981,0.093933,0.036541,-0.018658,0.13072,0.021133,0.005803,-0.018899,-0.004083
MFCC 3_mean,0.108365,0.179134,-0.038075,-0.065709,-0.073007,-0.059671,0.039588,-0.000212,-0.049155,0.121997,-0.043735,-0.03471,-0.062154,0.010067,-0.015498,0.062802,-0.014335,0.112008,-0.053117,-0.103025
MFCC 4_mean,0.156442,0.05376,0.023731,-0.017964,0.093649,0.064062,-0.071265,-0.080109,0.014146,0.003885,-0.039434,-0.054757,-0.018969,0.008846,0.046121,0.082211,0.034216,-0.025684,-0.109903,-0.072478
MFCC 5_mean,0.11894,-0.110558,0.0149,0.013045,0.007475,0.17817,0.063964,-0.049978,-0.009125,0.163489,0.094513,0.019173,-0.042236,0.021472,0.036715,0.095342,-0.078575,-0.041263,-0.003685,-0.01002
MFCC 6_mean,0.163449,0.018337,-0.069738,0.038442,-0.009008,-0.056321,0.026106,-0.005189,0.076733,-0.08531,-0.03875,0.010992,-0.000483,0.022068,-0.010868,0.074371,-0.095339,-0.030819,0.036331,-0.012391
MFCC 7_mean,0.115864,0.141848,-0.058061,0.042882,-0.059674,0.030493,0.13474,0.016946,-0.017881,-0.076732,-0.010229,-0.096319,0.00174,-0.027111,0.056742,0.143752,-0.040428,-0.130169,-0.16992,-0.018835
MFCC 8_mean,0.16181,-0.035644,-0.034611,-0.012085,-0.028591,-0.068007,0.013501,-0.048588,-0.014128,-0.013351,-0.08483,-0.122607,-0.079884,-0.005765,0.085815,0.054012,0.012935,-0.089455,-0.012697,0.015277
MFCC 9_mean,0.102347,0.127515,-0.05832,0.101287,0.09978,0.000384,-0.072975,0.008964,0.144293,-0.181202,-0.006581,-0.074374,0.067885,0.014667,-0.08861,-0.06504,0.105332,-0.080562,0.054373,0.003957
MFCC 10_mean,0.152694,0.025035,-0.001827,-0.064521,0.007491,-0.154227,0.001515,-0.066899,-0.010945,-0.055767,-0.015788,-0.07976,0.087635,-0.077653,0.034105,0.049767,0.07302,0.082776,0.030354,-0.023745


Unnamed: 0,PC,eigenvalue,Indice,Porcentaje,Porcentaje Acumulado,Feature_dom,Loading_dom
0,PC1,3.165657e+01,111,2.740828e+01,27.408284,MFCC 6_p90,0.167560
1,PC2,1.255394e+01,110,1.086921e+01,38.277496,MFCC 2_p10,0.220929
2,PC3,1.078313e+01,109,9.336047e+00,47.613543,Δ 2_p90,0.224637
3,PC4,8.221437e+00,108,7.118128e+00,54.731671,MFCC 1_mean,0.256518
4,PC5,6.502620e+00,107,5.629974e+00,60.361645,Δ 10_p90,-0.241090
...,...,...,...,...,...,...,...
107,PC108,-2.527783e-15,4,-2.188557e-15,100.000000,MFCC 4_p90,0.356926
108,PC109,-3.435120e-15,3,-2.974130e-15,100.000000,MFCC 5_mean,0.299517
109,PC110,-3.583688e-15,2,-3.102760e-15,100.000000,MFCC 4_mean,0.454460
110,PC111,-3.822459e-15,1,-3.309488e-15,100.000000,MFCC 4_mean,-0.352880


# Audio Complete Test

In [None]:
from pathlib import Path
import sys

PROJECT_ROOT = Path().resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
	sys.path.insert(0, str(PROJECT_ROOT))

import numpy as np
import pandas as pd
import librosa
from scipy.io import wavfile
import sounddevice as sd
import soundfile as sf

from Code.audio import AudioPreproc, AudioPreprocConfig, AudioFeat, AudioFeatConfig
from Code.audio.Standardizer import Standardizer
from Code.audio import AudioOrchestrator


def infer_label_from_name(p: Path) -> str:
	name = p.stem.lower()  # 'contar001' -> 'contar001'
	if name.startswith("contar"):
		return "contar"
	elif name.startswith("proporcion"):
		return "proporcion"
	elif name.startswith("salir"):
		return "salir"
	else:
		raise ValueError(f"No sé qué clase es '{p.name}'")

def grabar_audio_segundos(segundos: float = 2.0) -> tuple[np.ndarray, int]:
	dev = sd.query_devices(kind='input')
	sr_rec = int(dev['default_samplerate'] or 16000)
	print(f"Grabando {segundos}s a {sr_rec} Hz (input device default)…")
	audio = sd.rec(int(segundos * sr_rec), samplerate=sr_rec, channels=1, dtype="float32")
	sd.wait()
	return audio.squeeze(), sr_rec

run = 2

input_dir = PROJECT_ROOT / "Database" / "data" / "audio"
output_dir = PROJECT_ROOT / "Database" / "tmp" / "audio" / f"audioTry{run:02d}"
output_dir.mkdir(parents=True, exist_ok=True)

model_number = 1
model_path = PROJECT_ROOT / "Database" / "models" / "audio" / f"modelo_audio_{model_number}.npz"

orch = AudioOrchestrator()

paths = sorted(input_dir.rglob("*.wav"))
labels = [p.parent.name.lower() for p in paths]

R_dic = orch.entrenar(
	paths=paths,
	labels=labels
)
orch.guardar_modelo()

display(R_dic)

# y, sr = grabar_audio_segundos()

# resultado = orch.predecir_comando((y, sr), devolver_distancia=True)

# print("Comando predicho:", resultado["label"])
# print("Distancia mínima:", resultado["distancia_min"])
# print("\n")

# d = orch.knn.distancias(orch.stats.transform_one(orch.feat.extraer_caracteristicas(*orch._preprocesar_audio((y,sr)))) @ orch._eigvecs[:, :orch._k_used])
# print("dist min:", d.min(), "dist medias por clase:")
# for c in set(labs):
# 	mask = labs == c
#	print(c, d[mask].mean())

# Xproj = orch._X_store_proj
# labs = orch._y_store
# preds = []
# for i, (x, lab) in enumerate(zip(Xproj, labs)):
# 	pred = orch.knn.predecir(x, exclude_idx=i)
# 	preds.append(pred)
# 	if pred != lab:
# 		print("Falla en train idx", i, "real", lab, "pred", pred)
# print("\nAcc LOO:", sum(p==l for p,l in zip(preds,labs)) / len(labs))


