## Celda 1 - Checklist Conforme a los Archivos ```NPY```

In [1]:
import numpy as np
from pathlib import Path

BASE = Path(r"C:\Users\leona\Documents\Thesis_Project_UACH\Temp\Dataset\features_mfcc_labeled")

X_train = np.load(BASE / "X_train.npy", mmap_mode="r")
y_train = np.load(BASE / "y_train.npy")
X_val   = np.load(BASE / "X_val.npy",   mmap_mode="r")
y_val   = np.load(BASE / "y_val.npy")
X_test  = np.load(BASE / "X_test.npy",  mmap_mode="r")
y_test  = np.load(BASE / "y_test.npy")

print("X_train:", X_train.shape, X_train.dtype)
print("y_train:", y_train.shape, y_train.dtype)
print("X_val:  ", X_val.shape, X_val.dtype)
print("X_test: ", X_test.shape, X_test.dtype)

print("\nDistribución y_train:")
unique, counts = np.unique(y_train, return_counts=True)
print(dict(zip(unique, counts)))

X_train: (52551, 3, 32, 201) float16
y_train: (52551,) int64
X_val:   (11253, 3, 32, 201) float16
X_test:  (11308, 3, 32, 201) float16

Distribución y_train:
{0: 7368, 1: 6535, 2: 10668, 3: 27980}


## Celda 2 - CNN baseline (TensorFlow/Keras)

### Celda 2.1 - Dataset por batches (para no cargar todo a RAM)

In [2]:
import tensorflow as tf

BATCH = 64

def make_ds(X, y, training=False):
    ds = tf.data.Dataset.from_tensor_slices((X, y))
    if training:
        ds = ds.shuffle(20000, reshuffle_each_iteration=True)
    ds = ds.batch(BATCH).prefetch(tf.data.AUTOTUNE)
    return ds

train_ds = make_ds(X_train, y_train, training=True)
val_ds   = make_ds(X_val, y_val, training=False)
test_ds  = make_ds(X_test, y_test, training=False)

### Celda 2.2 - CNN pequeña (baseline)

In [3]:
from tensorflow.keras import layers, models

num_classes = 4
input_shape = (3, 32, 201)  # (C, H, W) como lo guardaste

model = models.Sequential([
    layers.Input(shape=input_shape),
    # Keras por default usa channels_last; forzamos channels_first
    layers.Conv2D(16, (3,3), padding="same", activation="relu", data_format="channels_first"),
    layers.BatchNormalization(axis=1),
    layers.MaxPool2D((2,2), data_format="channels_first"),

    layers.Conv2D(32, (3,3), padding="same", activation="relu", data_format="channels_first"),
    layers.BatchNormalization(axis=1),
    layers.MaxPool2D((2,2), data_format="channels_first"),

    layers.Conv2D(64, (3,3), padding="same", activation="relu", data_format="channels_first"),
    layers.BatchNormalization(axis=1),
    layers.GlobalAveragePooling2D(data_format="channels_first"),

    layers.Dense(64, activation="relu"),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation="softmax")
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 16, 32, 201)       448       
                                                                 
 batch_normalization (BatchN  (None, 16, 32, 201)      64        
 ormalization)                                                   
                                                                 
 max_pooling2d (MaxPooling2D  (None, 16, 16, 100)      0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 32, 16, 100)       4640      
                                                                 
 batch_normalization_1 (Batc  (None, 32, 16, 100)      128       
 hNormalization)                                                 
                                                        

### Celda 2.3 - Entrenar

In [4]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(patience=2, factor=0.5)
]

history = model.fit(train_ds, validation_data=val_ds, epochs=30, callbacks=callbacks)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30


### Celda 2.4 - Evaluar + matriz de confusión

In [5]:
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix

y_pred = np.argmax(model.predict(test_ds), axis=1)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred, digits=4))

[[1319   83  103   86]
 [ 120  921  261  111]
 [  61   79 1821  338]
 [ 100  216  288 5401]]
              precision    recall  f1-score   support

           0     0.8244    0.8290    0.8267      1591
           1     0.7090    0.6518    0.6792      1413
           2     0.7364    0.7921    0.7632      2299
           3     0.9099    0.8994    0.9046      6005

    accuracy                         0.8368     11308
   macro avg     0.7949    0.7931    0.7934     11308
weighted avg     0.8375    0.8368    0.8367     11308



## Celda 3 - Red Neuronal

### Celda 3.1 - Cargar ```.npy``` con Memmap

In [1]:
from pathlib import Path
import os, random
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.metrics import classification_report, confusion_matrix

SEED = 123

os.environ["PYTHONHASHSEED"] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

BASE = Path(r"C:\Users\leona\Documents\Thesis_Project_UACH\Temp\Dataset\features_mfcc_labeled")

X_train = np.load(BASE / "X_train.npy", mmap_mode="r")
y_train = np.load(BASE / "y_train.npy")
X_val   = np.load(BASE / "X_val.npy",   mmap_mode="r")
y_val   = np.load(BASE / "y_val.npy")
X_test  = np.load(BASE / "X_test.npy",  mmap_mode="r")
y_test  = np.load(BASE / "y_test.npy")

print("X_train:", X_train.shape, X_train.dtype)
print("y_train:", y_train.shape, y_train.dtype)
print("X_val:  ", X_val.shape, X_val.dtype)
print("X_test: ", X_test.shape, X_test.dtype)

# Chequeo de clases
u, c = np.unique(y_train, return_counts=True)
print("Distribución train:", dict(zip(u, c)))

X_train: (52551, 3, 32, 201) float16
y_train: (52551,) int64
X_val:   (11253, 3, 32, 201) float16
X_test:  (11308, 3, 32, 201) float16
Distribución train: {0: 7368, 1: 6535, 2: 10668, 3: 27980}


### Celda 3.2 - ```tf.data``` (con cast a float32 dentro del pipeline)

In [2]:
BATCH = 64

def make_ds(X, y, training=False):
    ds = tf.data.Dataset.from_tensor_slices((X, y))
    if training:
        ds = ds.shuffle(20000, reshuffle_each_iteration=True)
    ds = ds.batch(BATCH)
    ds = ds.map(lambda a,b: (tf.cast(a, tf.float32), b), num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.prefetch(tf.data.AUTOTUNE)
    return ds

train_ds = make_ds(X_train, y_train, training=True)
val_ds   = make_ds(X_val, y_val, training=False)
test_ds  = make_ds(X_test, y_test, training=False)

### Celda 3.3 - Class weights (para el desbalance)

Esto ayuda mucho para que no “adivine todo clase 3” (clase predominante).

In [3]:
num_classes = 4
counts = np.bincount(y_train, minlength=num_classes)
total = counts.sum()

# Peso inverso a frecuencia (simple y efectivo)
class_weight = {i: float(total / (num_classes * counts[i])) for i in range(num_classes) if counts[i] > 0}
print("counts:", counts)
print("class_weight:", class_weight)

counts: [ 7368  6535 10668 27980]
class_weight: {0: 1.7830822475570032, 1: 2.0103672532517214, 2: 1.2315101237345332, 3: 0.4695407433881344}


### Celda 3.4 - Modelo CNN baseline (channels_first)

In [4]:
input_shape = (3, 32, 201)

model = models.Sequential([
    layers.Input(shape=input_shape),

    layers.Conv2D(16, (3,3), padding="same", activation="relu", data_format="channels_first"),
    layers.BatchNormalization(axis=1),
    layers.MaxPool2D((2,2), data_format="channels_first"),

    layers.Conv2D(32, (3,3), padding="same", activation="relu", data_format="channels_first"),
    layers.BatchNormalization(axis=1),
    layers.MaxPool2D((2,2), data_format="channels_first"),

    layers.Conv2D(64, (3,3), padding="same", activation="relu", data_format="channels_first"),
    layers.BatchNormalization(axis=1),
    layers.GlobalAveragePooling2D(data_format="channels_first"),

    layers.Dense(64, activation="relu"),
    layers.Dropout(0.3),
    layers.Dense(4, activation="softmax")
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 16, 32, 201)       448       
                                                                 
 batch_normalization (BatchN  (None, 16, 32, 201)      64        
 ormalization)                                                   
                                                                 
 max_pooling2d (MaxPooling2D  (None, 16, 16, 100)      0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 32, 16, 100)       4640      
                                                                 
 batch_normalization_1 (Batc  (None, 32, 16, 100)      128       
 hNormalization)                                                 
                                                        

### Celda 3.5 - Entrenar (con callbacks)

In [5]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor="val_accuracy", mode="max",
        patience=10, restore_best_weights=True
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor="val_accuracy", mode="max",
        patience=3, factor=0.5
    ),
    tf.keras.callbacks.ModelCheckpoint(
        filepath=str(BASE / "cnn_mfcc_best.keras"),
        monitor="val_accuracy", mode="max",
        save_best_only=True
    )
]

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=40,
    class_weight=class_weight,
    callbacks=callbacks
)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40


### Celda 3.6 - Evaluación en Test (matriz + macro-F1)

In [6]:
probs = model.predict(test_ds)
y_pred = np.argmax(probs, axis=1)

print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification report:\n", classification_report(y_test, y_pred, digits=4))

Confusion matrix:
 [[1197   78  138  178]
 [  53 1085  200   75]
 [  31  125 1931  212]
 [  40  101  365 5499]]

Classification report:
               precision    recall  f1-score   support

           0     0.9061    0.7524    0.8221      1591
           1     0.7811    0.7679    0.7744      1413
           2     0.7331    0.8399    0.7829      2299
           3     0.9220    0.9157    0.9189      6005

    accuracy                         0.8589     11308
   macro avg     0.8356    0.8190    0.8246     11308
weighted avg     0.8638    0.8589    0.8596     11308

