In [None]:
from pathlib import Path
import sys

PROJECT_ROOT = Path().resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

import numpy as np
import pandas as pd
import librosa
from scipy.io import wavfile

from Code.audio import AudioPreproc, AudioPreprocConfig, AudioFeat, AudioFeatConfig

run = 2

input_dir_base = PROJECT_ROOT / "Database" / "data" / "audio"
output_dir_base = PROJECT_ROOT / "Database" / "tmp" / f"audioFeatTry{run:02d}"
output_dir_base.mkdir(parents=True, exist_ok=True)

labels = ["contar", "proporcion", "salir"]

pre_config = AudioPreprocConfig(
    target_sr=16000, t_sec=1.2,
    frame_ms=25.0, hop_ms=10.0,
    highpass_hz=40.0, hp_order=2,
    preemph_a=0.97,
    vad_thresh_db=-35.0, vad_win_ms=20.0,
    vad_min_ms=120.0, vad_expand_ms=60.0,
    norm_mode="rms", rms_target_dbfs=-20.0,
    peak_ref=0.98, max_gain_db=18.0,
    gate_dbfs=-60.0, pad_mode="edge",
)
pre = AudioPreproc(pre_config)

winn, hopp = pre.cfg.frame_ms*1e-3, pre.cfg.hop_ms*1e-3


feat_config = AudioFeatConfig(
    win=winn,
    hop=hopp,
    n_mfcc_no_c0=13,
    delta_order=1,
    add_rms=True,
    add_zcr=True,
    stats=("mean", "std", "p10", "p90"),
)
feat = AudioFeat(cfg=feat_config)
feature_names = feat.nombres_features()

y_proc, sr_proc, file_names, class_names = [], [], [], []
y_features = []

for cls in labels:
    input_dir = input_dir_base / cls
    output_dir = output_dir_base / cls
    output_dir.mkdir(parents=True, exist_ok=True)

    for audio_path in sorted(input_dir.glob("*.wav")):
        y, sr = pre.preprocesar_desde_path(audio_path)
        y_feat = feat.extraer_caracteristicas(y, pre_config.target_sr)

        y_proc.append(y)
        sr_proc.append(sr)
        y_features.append(y_feat)
        
        file_names.append(audio_path.name)
        class_names.append(cls)


# Tabla de estadísticos básicos por archivo (opcional)
stats_rows = []
for cls, fname, y, sr in zip(class_names, file_names, y_proc, sr_proc):
    stats_rows.append({
        "Clase": cls,
        "Archivo": fname,
        "Duración (s)": len(y) / sr,
        "Mín": float(np.min(y)),
        "Máx": float(np.max(y)),
        "Energía RMS": float(np.sqrt(np.mean(y**2))),
    })
df_stats = pd.DataFrame(stats_rows)
df_stats.to_csv(output_dir_base / "estadisticos.csv", index=False)

# Tabla de features completas por archivo
feat_rows = []
for cls, fname, vec in zip(class_names, file_names, y_features):
    row = {"Clase": cls, "Archivo": fname}
    row.update({name: float(val) for name, val in zip(feature_names, vec)})
    feat_rows.append(row)

df_feat = pd.DataFrame(feat_rows)
df_feat.to_csv(output_dir_base / "features.csv", index=False)

df_feat  # muestra la tabla de features en el notebook


0.025

0.01

Unnamed: 0,Clase,Archivo,mfcc_1_mean,mfcc_2_mean,mfcc_3_mean,mfcc_4_mean,mfcc_5_mean,mfcc_6_mean,mfcc_7_mean,mfcc_8_mean,...,mfcc_delta_6_p90,mfcc_delta_7_p90,mfcc_delta_8_p90,mfcc_delta_9_p90,mfcc_delta_10_p90,mfcc_delta_11_p90,mfcc_delta_12_p90,mfcc_delta_13_p90,rms_p90,zcr_p90
0,contar,Contar01.wav,54.498886,21.869501,17.312647,9.49595,13.744377,3.434649,9.365744,-5.754476,...,2.893245,3.193537,2.358351,2.301831,1.687209,1.971892,1.358149,1.002515,0.138748,0.283
1,contar,Contar02.wav,30.392019,23.195271,30.310471,19.16194,22.919569,12.335075,17.970024,8.741826,...,1.76827,2.389211,1.904187,1.55761,1.268306,0.391798,1.707798,0.9711722,0.06921,0.475
2,contar,Contar03.wav,35.582451,13.707606,22.758963,16.666489,20.801437,13.844005,19.103773,13.819697,...,2.707908,2.484224,2.362537,1.517648,0.826867,0.408519,1.303719,-2.1079910000000003e-17,0.041069,0.2095
3,contar,Contar04.wav,31.761587,-2.743854,12.506169,4.31029,11.226822,2.167912,15.783747,0.59101,...,2.174295,2.080817,3.117031,1.723384,1.596543,0.898758,1.60549,0.4908921,0.126695,0.316
4,contar,Contar05.wav,19.545143,-34.993675,-12.579431,-9.18632,2.570884,-10.606009,-4.96263,-4.660137,...,3.374838,2.530781,2.278178,2.045021,3.828023,2.577837,2.19962,2.81198,0.148973,0.261
5,contar,Contar06.wav,22.392853,-31.942286,-16.082924,-11.37149,-0.80742,-10.502243,-2.623168,-5.166539,...,3.77732,2.512693,2.670895,1.893148,3.443759,2.131326,2.088318,2.113427,0.147906,0.25
6,contar,Contar07.wav,35.489223,5.561762,16.757273,-5.064148,4.095685,9.746733,12.976592,-1.948522,...,3.242632,3.163615,3.201839,3.666613,2.108126,1.417414,2.154414,2.027819,0.130642,0.31
7,contar,Contar08.wav,56.487003,19.887295,29.493164,4.340764,13.373436,12.099775,18.822849,-0.452632,...,2.975575,3.047519,2.962452,2.089929,1.99145,1.247115,2.050513,1.586956,0.115363,0.274
8,contar,Contar09.wav,49.368023,11.798235,25.131523,-0.847351,19.832119,7.042241,10.231749,6.743614,...,3.38281,2.199265,2.659111,2.047243,2.455315,1.779006,2.001793,1.255298,0.126419,0.2675
9,contar,Contar10.wav,26.20009,-11.340007,5.913662,-6.16398,16.870203,-0.119988,7.28855,-5.453796,...,1.984976,2.529088,3.060664,2.318474,2.545816,1.904681,1.626855,2.048427,0.145433,0.32
