# Instalación de librerías necesarías

In [None]:
!pip install comet-ml keras-tuner

# Importación de librerías

In [None]:
import os
from pathlib import Path
import comet_ml

from comet_ml import Experiment

import math

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf

import librosa # Comentarios

import sklearn
from sklearn.model_selection import train_test_split

from tensorflow.keras import layers
from tensorflow.keras import models
from IPython import display

# Set the seed value for experiment reproducibility.
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)


# Configurmos para usar GPU
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

# Funciones auxiliares (métricas)

In [None]:
class F1Macro(tf.keras.metrics.Metric):
    def __init__(self, num_labels=80, threshold=0.5, name="f1_macro", **kwargs):
        super().__init__(name=name, **kwargs)
        self.th = threshold
        self.tp = self.add_weight(name="tp", shape=(num_labels,), initializer="zeros")
        self.fp = self.add_weight(name="fp", shape=(num_labels,), initializer="zeros")
        self.fn = self.add_weight(name="fn", shape=(num_labels,), initializer="zeros")

    def update_state(self, y_true, y_pred, sample_weight=None):
        # 1) Alinea tipos
        y_true = tf.cast(y_true, tf.float32)

        # 2) Binariza sólo la predicción
        y_pred = tf.cast(y_pred > self.th, tf.float32)

        # Si tus y_true ya son 0/1, no hace falta umbralizarlas
        self.tp.assign_add(tf.reduce_sum(y_true * y_pred, axis=0))
        self.fp.assign_add(tf.reduce_sum((1 - y_true) * y_pred, axis=0))
        self.fn.assign_add(tf.reduce_sum(y_true * (1 - y_pred), axis=0))

    def result(self):
        precision = tf.math.divide_no_nan(self.tp, self.tp + self.fp)
        recall    = tf.math.divide_no_nan(self.tp, self.tp + self.fn)
        f1        = tf.math.divide_no_nan(2 * precision * recall, precision + recall)
        return tf.reduce_mean(f1)

    def reset_state(self):
        for v in (self.tp, self.fp, self.fn):
            v.assign(tf.zeros_like(v))


In [None]:
# Core calculation of label precisions for one test sample.

def _one_sample_positive_class_precisions(scores, truth):
  """Calculate precisions for each true class for a single sample.

  Args:
    scores: np.array of (num_classes,) giving the individual classifier scores.
    truth: np.array of (num_classes,) bools indicating which classes are true.

  Returns:
    pos_class_indices: np.array of indices of the true classes for this sample.
    pos_class_precisions: np.array of precisions corresponding to each of those
      classes.
  """
  num_classes = scores.shape[0]
  pos_class_indices = np.flatnonzero(truth > 0)
  # Only calculate precisions if there are some true classes.
  if not len(pos_class_indices):
    return pos_class_indices, np.zeros(0)
  # Retrieval list of classes for this sample.
  retrieved_classes = np.argsort(scores)[::-1]
  # class_rankings[top_scoring_class_index] == 0 etc.
  class_rankings = np.zeros(num_classes, dtype=np.int32)
  class_rankings[retrieved_classes] = range(num_classes)
  # Which of these is a true label?
  retrieved_class_true = np.zeros(num_classes, dtype=np.bool_)
  retrieved_class_true[class_rankings[pos_class_indices]] = True
  # Num hits for every truncated retrieval list.
  retrieved_cumulative_hits = np.cumsum(retrieved_class_true)
  # Precision of retrieval list truncated at each hit, in order of pos_labels.
  precision_at_hits = (
      retrieved_cumulative_hits[class_rankings[pos_class_indices]] /
      (1 + class_rankings[pos_class_indices].astype(float)))
  return pos_class_indices, precision_at_hits

# All-in-one calculation of per-class lwlrap.

def calculate_per_class_lwlrap(truth, scores):
  """Calculate label-weighted label-ranking average precision.

  Arguments:
    truth: np.array of (num_samples, num_classes) giving boolean ground-truth
      of presence of that class in that sample.
    scores: np.array of (num_samples, num_classes) giving the classifier-under-
      test's real-valued score for each class for each sample.

  Returns:
    per_class_lwlrap: np.array of (num_classes,) giving the lwlrap for each
      class.
    weight_per_class: np.array of (num_classes,) giving the prior of each
      class within the truth labels.  Then the overall unbalanced lwlrap is
      simply np.sum(per_class_lwlrap * weight_per_class)
  """
  assert truth.shape == scores.shape
  num_samples, num_classes = scores.shape
  # Space to store a distinct precision value for each class on each sample.
  # Only the classes that are true for each sample will be filled in.
  precisions_for_samples_by_classes = np.zeros((num_samples, num_classes))
  for sample_num in range(num_samples):
    pos_class_indices, precision_at_hits = (
      _one_sample_positive_class_precisions(scores[sample_num, :],
                                            truth[sample_num, :]))
    precisions_for_samples_by_classes[sample_num, pos_class_indices] = (
        precision_at_hits)
  labels_per_class = np.sum(truth > 0, axis=0)
  weight_per_class = labels_per_class / float(np.sum(labels_per_class))
  # Form average of each column, i.e. all the precisions assigned to labels in
  # a particular class.
  per_class_lwlrap = (np.sum(precisions_for_samples_by_classes, axis=0) /
                      np.maximum(1, labels_per_class))
  # overall_lwlrap = simple average of all the actual per-class, per-sample precisions
  #                = np.sum(precisions_for_samples_by_classes) / np.sum(precisions_for_samples_by_classes > 0)
  #           also = weighted mean of per-class lwlraps, weighted by class label prior across samples
  #                = np.sum(per_class_lwlrap * weight_per_class)
  return per_class_lwlrap, weight_per_class


# Calculate the overall lwlrap using sklearn.metrics function.

def calculate_overall_lwlrap_sklearn(truth, scores):
  """Calculate the overall lwlrap using sklearn.metrics.lrap."""
  # sklearn doesn't correctly apply weighting to samples with no labels, so just skip them.
  sample_weight = np.sum(truth > 0, axis=1)
  nonzero_weight_sample_indices = np.flatnonzero(sample_weight > 0)
  overall_lwlrap = sklearn.metrics.label_ranking_average_precision_score(
      truth[nonzero_weight_sample_indices, :] > 0,
      scores[nonzero_weight_sample_indices, :],
      sample_weight=sample_weight[nonzero_weight_sample_indices])
  return overall_lwlrap

In [None]:
# Accumulator object version.

class lwlrap_accumulator(object):
  """Accumulate batches of test samples into per-class and overall lwlrap."""

  def __init__(self):
    self.num_classes = 0
    self.total_num_samples = 0

  def accumulate_samples(self, batch_truth, batch_scores):
    """Cumulate a new batch of samples into the metric.

    Args:
      truth: np.array of (num_samples, num_classes) giving boolean
        ground-truth of presence of that class in that sample for this batch.
      scores: np.array of (num_samples, num_classes) giving the
        classifier-under-test's real-valued score for each class for each
        sample.
    """
    assert batch_scores.shape == batch_truth.shape
    num_samples, num_classes = batch_truth.shape
    if not self.num_classes:
      self.num_classes = num_classes
      self._per_class_cumulative_precision = np.zeros(self.num_classes)
      self._per_class_cumulative_count = np.zeros(self.num_classes,
                                                  dtype=np.int32)
    assert num_classes == self.num_classes
    for truth, scores in zip(batch_truth, batch_scores):
      pos_class_indices, precision_at_hits = (
        _one_sample_positive_class_precisions(scores, truth))
      self._per_class_cumulative_precision[pos_class_indices] += (
        precision_at_hits)
      self._per_class_cumulative_count[pos_class_indices] += 1
    self.total_num_samples += num_samples

  def per_class_lwlrap(self):
    """Return a vector of the per-class lwlraps for the accumulated samples."""
    return (self._per_class_cumulative_precision /
            np.maximum(1, self._per_class_cumulative_count))

  def per_class_weight(self):
    """Return a normalized weight vector for the contributions of each class."""
    return (self._per_class_cumulative_count /
            float(np.sum(self._per_class_cumulative_count)))

  def overall_lwlrap(self):
    """Return the scalar overall lwlrap for cumulated samples."""
    return np.sum(self.per_class_lwlrap() * self.per_class_weight())

In [None]:
# Función para extraer labels de datasets batcheados
def extract_labels_from_dataset(dataset):
    """Extrae todas las labels de un dataset batcheado"""
    all_labels = []
    for batch_data, batch_labels in dataset:
        all_labels.append(batch_labels.numpy())
    return np.concatenate(all_labels, axis=0)

In [None]:
# Calcular LWLRAP usando accumulator (más eficiente)
def calculate_lwlrap_accumulator(labels, predictions, batch_size=32):
    """Calcula LWLRAP usando accumulator"""
    accumulator = lwlrap_accumulator()

    num_samples = labels.shape[0]
    for start_idx in range(0, num_samples, batch_size):
        end_idx = min(start_idx + batch_size, num_samples)
        batch_labels = labels[start_idx:end_idx]
        batch_preds = predictions[start_idx:end_idx]
        accumulator.accumulate_samples(batch_labels, batch_preds)

    return accumulator.overall_lwlrap()

def evaluate_model_lwlrap(model, train_dataset, val_dataset, model_name="modelo"):
    """
    Evaluación completa del modelo con LWLRAP
    """
    print(f"=== EVALUANDO {model_name.upper()} ===")

    # Hacer predicciones
    print("Realizando predicciones...")
    pred_train = model.predict(train_dataset, verbose=1)
    pred_val = model.predict(val_dataset, verbose=1)

    # Extraer labels
    print("Extrayendo labels...")
    labels_train = extract_labels_from_dataset(train_dataset)
    labels_val = extract_labels_from_dataset(val_dataset)

    # Verificar dimensiones
    print(f"Train: {labels_train.shape} labels, {pred_train.shape} predictions")
    print(f"Val: {labels_val.shape} labels, {pred_val.shape} predictions")

    # Calcular LWLRAP
    print("\n--- ENTRENAMIENTO ---")
    train_lwlrap = calculate_lwlrap_accumulator(labels_train, pred_train)
    print(f"LWLRAP Train: {train_lwlrap:.4f}")

    print("\n--- VALIDACIÓN ---")
    val_lwlrap = calculate_lwlrap_accumulator(labels_val, pred_val)
    print(f"LWLRAP Validation: {val_lwlrap:.4f}")

    # Análisis por clase (opcional)
    print("\n--- ANÁLISIS POR CLASE ---")
    per_class_lwlrap, weight_per_class = calculate_per_class_lwlrap(labels_val, pred_val)

    # Mostrar las mejores y peores clases
    class_performance = list(zip(all_labels, per_class_lwlrap, weight_per_class))
    class_performance.sort(key=lambda x: x[1], reverse=True)

    print("Top 5 clases (mejor LWLRAP):")
    for i, (label, lwlrap, weight) in enumerate(class_performance[:5]):
        print(f"  {i+1}. {label}: {lwlrap:.4f} (peso: {weight:.4f})")

    print("\nBottom 5 clases (peor LWLRAP):")
    for i, (label, lwlrap, weight) in enumerate(class_performance[-5:]):
        print(f"  {i+1}. {label}: {lwlrap:.4f} (peso: {weight:.4f})")

    return {
        'train_lwlrap': train_lwlrap,
        'val_lwlrap': val_lwlrap,
        'per_class_lwlrap': per_class_lwlrap,
        'weight_per_class': weight_per_class,
        'train_predictions': pred_train,
        'val_predictions': pred_val,
        'train_labels': labels_train,
        'val_labels': labels_val
    }



=== LWLRAP CON ACCUMULATOR ===
Train LWLRAP (accumulator): 0.0631
Validation LWLRAP (accumulator): 0.0963
=== EVALUANDO MOBILENET-CURATED ===
Realizando predicciones...
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 177ms/step
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 211ms/step
Extrayendo labels...
Train: (3479, 80) labels, (3479, 80) predictions
Val: (745, 80) labels, (745, 80) predictions

--- ENTRENAMIENTO ---
LWLRAP Train: 0.0653

--- VALIDACIÓN ---
LWLRAP Validation: 0.0963

--- ANÁLISIS POR CLASE ---
Top 5 clases (mejor LWLRAP):
  1. Writing: 0.7381 (peso: 0.0105)
  2. Church_bell: 0.6339 (peso: 0.0210)
  3. Sink_(filling_or_washing): 0.5312 (peso: 0.0093)
  4. Water_tap_and_faucet: 0.4444 (peso: 0.0105)
  5. Computer_keyboard: 0.4444 (peso: 0.0187)

Bottom 5 clases (peor LWLRAP):
  1. Hi-hat: 0.0162 (peso: 0.0105)
  2. Accordion: 0.0159 (peso: 0.0082)
  3. Fart: 0.0156 (peso: 0.0082)
  4. Finger_snapping: 0.0152 (peso: 0.0117)
  5. Bu

# Link a tfrecords:
https://drive.google.com/drive/folders/1WAlJPGbOhqPaX79pPYzeOekEzAilskGO?usp=sharing

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Lectura de datos
(CSV y tfrecords)

In [None]:
# Ruta en Drive (ya montado)
#base_dir = Path('/content/drive/MyDrive/taa-2025-freesound-audio-tagging')
base_dir = Path('/content/drive/MyDrive/Facultad/TAA/Proyecto2/taa-2025-freesound-audio-tagging') #Facundo

# Cargar CSVs
train_curated_df = pd.read_csv(base_dir / 'train_curated.csv')
train_curated_df = train_curated_df.sample(frac=1, random_state=42).reset_index(drop=True)

train_noisy_df   = pd.read_csv(base_dir / 'train_noisy.csv')
train_noisy_df   = train_noisy_df.sample(frac=1, random_state=42).reset_index(drop=True)

test_df          = pd.read_csv(base_dir / 'sample_submission_v24.csv')
vocab_df         = pd.read_csv(base_dir / 'vocabulary.csv', header=None)

# Extraer etiquetas
all_labels = vocab_df[1].tolist()
label_to_index = {label: i for i, label in enumerate(all_labels)}

In [None]:
size_train_curated = len(train_curated_df)
size_train_noisy = len(train_noisy_df)
size_test = len(test_df)

# Funciones para lectura de tfrecords


In [None]:
def parse_tfrecord_fn(example, num_classes):
  feature_description = {
      'audio': tf.io.FixedLenFeature([], tf.string, default_value=''), # Changed from VarLenFeature to FixedLenFeature
      'label': tf.io.FixedLenFeature([num_classes],tf.int64)
  }
  parsed = tf.io.parse_single_example(example, feature_description)

  # Decodificar audio WAV
  audio, _ = tf.audio.decode_wav(parsed['audio'])
  audio = tf.squeeze(audio, axis=-1)
  label = parsed['label']
  return audio, label

def get_tfrecord_dataset(tfrecord_path, num_classes):
  dataset = tf.data.TFRecordDataset(str(tfrecord_path))
  dataset = dataset.map(lambda x: parse_tfrecord_fn(x, num_classes),)
  return dataset

def parse_tfrecord_test_fn(example):
  feature_description = {
      'audio': tf.io.FixedLenFeature([], tf.string, default_value=''), # Changed from VarLenFeature to FixedLenFeature
  }
  parsed = tf.io.parse_single_example(example, feature_description)

  # Decodificar audio WAV
  audio, _ = tf.audio.decode_wav(parsed['audio'])
  audio = tf.squeeze(audio, axis=-1)

  return audio

def get_tfrecord_test_dataset(tfrecord_path):
  dataset = tf.data.TFRecordDataset(str(tfrecord_path))
  dataset = dataset.map(lambda x: parse_tfrecord_test_fn(x),)
  return dataset

# Creación de datasets de audio


In [None]:
curated_train_resampled_ds = get_tfrecord_dataset(base_dir / 'train_curated_resampled.tfrecord', len(all_labels))
curated_train_ds = get_tfrecord_dataset(base_dir / 'train_curated.tfrecord', len(all_labels))
noisy_train_resampled_ds = get_tfrecord_dataset(base_dir / 'train_noisy_resampled.tfrecord', len(all_labels))
noisy_train_ds = get_tfrecord_dataset(base_dir / 'train_noisy.tfrecord', len(all_labels))
test_resampled_ds = get_tfrecord_test_dataset(base_dir / 'test_resampled.tfrecord')
test_ds = get_tfrecord_test_dataset(base_dir / 'test.tfrecord')

# Funciones de preprocesado de datasets

In [None]:
def get_spectrogram_mfccs(waveform, sample_rate):
        # Padding/truncado a 25 segundos con esto aseguro el percentil 95 en noisy
        desired_samples = sample_rate * 25
        audio_len = tf.shape(waveform)[0]
        waveform = tf.cond(audio_len < desired_samples,
                            lambda: tf.pad(waveform, [[0, desired_samples - audio_len]]),
                            lambda: waveform[:desired_samples])
        # Calculo de STFT
        stft = tf.signal.stft(waveform, frame_length=1024, frame_step=512, fft_length=1024, window_fn=tf.signal.hann_window)
        spectrogram = tf.abs(stft)

        # Se define el banco de filtros a utilizar
        num_spectrogram_bins = stft.shape[-1]
        lower_edge_hertz, upper_edge_hertz, num_mel_bins = 0, 4000, 80
        linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
                                                                            num_mel_bins,
                                                                            num_spectrogram_bins,
                                                                            sample_rate,
                                                                            lower_edge_hertz,
                                                                            upper_edge_hertz
                                                                        )

        # Se aplica el banco de filtros sobre el espctrograma
        mel_spectrogram = tf.tensordot(spectrogram, linear_to_mel_weight_matrix, 1)
        mel_spectrogram.set_shape(spectrogram.shape[:-1].concatenate(linear_to_mel_weight_matrix.shape[-1:]))

        # Calculo el Espectrograma en magnitud logarítmica y escala mel
        log_mel = tf.math.log(mel_spectrogram + 1e-6)

        # Calculo los MFCCs a partir del log_mel y tomo los 13 primeros
        mfccs = tf.signal.mfccs_from_log_mel_spectrograms(log_mel)[..., :13]
        return tf.expand_dims(mfccs, axis=-1)

def preprocess_audio(audio, label, sample_rate=16000):
    # Aplicar espectrograma
    spectrogram = get_spectrogram_mfccs(audio, sample_rate)
    return spectrogram, label

def preprocess_audio_test(audio, sample_rate=16000):
    # Aplicar espectrograma
    spectrogram = get_spectrogram_mfccs(audio, sample_rate)
    return spectrogram


# Preprocesado / Obtención de Espectrogramas

In [None]:
spect_curated_train_resampled_ds = curated_train_resampled_ds.map(lambda audio, label: preprocess_audio(audio, label, sample_rate=16000), num_parallel_calls=tf.data.AUTOTUNE)
spect_curated_train_ds = curated_train_ds.map(lambda audio, label: preprocess_audio(audio, label, sample_rate=44100), num_parallel_calls=tf.data.AUTOTUNE)
spect_noisy_train_resampled_ds = noisy_train_resampled_ds.map(lambda audio, label: preprocess_audio(audio, label, sample_rate=16000), num_parallel_calls=tf.data.AUTOTUNE)
spect_noisy_train_ds = noisy_train_ds.map(lambda audio, label: preprocess_audio(audio, label, sample_rate=44100), num_parallel_calls=tf.data.AUTOTUNE)
spect_test_resampled_ds = test_resampled_ds.map(lambda audio: preprocess_audio_test(audio, sample_rate=16000), num_parallel_calls=tf.data.AUTOTUNE)
spect_test_ds = test_ds.map(lambda audio: preprocess_audio_test(audio, sample_rate=44100), num_parallel_calls=tf.data.AUTOTUNE)

# Funciones auxiliares para Split y Mix de datasets

In [None]:
def split_dataset(dataset, size,train_frac=0.8):
    train_size = int(size * train_frac)
    train_ds = dataset.take(train_size)
    val_ds = dataset.skip(train_size)
    return train_ds, val_ds

def split_dataset_3way(dataset, size, train_frac=0.7, val_frac=0.15):
    train_size = int(size * train_frac)
    val_size = int(size * val_frac)

    train_ds = dataset.take(train_size)
    val_ds = dataset.skip(train_size).take(val_size)
    dev_ds = dataset.skip(train_size + val_size)

    return train_ds, val_ds, dev_ds

def mix_datasets(ds_curated, ds_noisy):
  # Porcentajes relativos
  proportion_curated = 0.7
  proportion_noisy = 0.3

  # Mezcla proporcional
  mixed_dataset = tf.data.Dataset.sample_from_datasets(
      [ds_curated, ds_noisy],
      weights=[proportion_curated, proportion_noisy],
      seed=42
  )

  # Shuffle, batching y prefetch después
  batch_ds = mixed_dataset.shuffle(1000).batch(32).cache().prefetch(tf.data.AUTOTUNE)
  return batch_ds


# Split de entrenamiento y validación. Batcheado y mezcla de datos.

In [None]:
batch_size = 32
# Separación en entrenamiento y validación
batch_noisy_train_ds, batch_noisy_val_ds = split_dataset(spect_noisy_train_ds, size=size_train_noisy, train_frac=0.8)
batch_noisy_resampled_train_ds, batch_noisy_resampled_val_ds = split_dataset(spect_noisy_train_resampled_ds, size=size_train_noisy, train_frac=0.8)
batch_curated_train_ds, batch_curated_val_ds = split_dataset(spect_curated_train_ds, size=size_train_curated, train_frac=0.8)
batch_curated_resampled_train_ds, batch_curated_resampled_val_ds = split_dataset(spect_curated_train_resampled_ds, size=size_train_curated, train_frac=0.8)

# Creo datasets mezclados
mixed_train_ds = mix_datasets(batch_noisy_train_ds, batch_curated_train_ds)
mixed_val_ds = mix_datasets(batch_noisy_val_ds, batch_curated_val_ds)

mixed_train_resampled_ds = mix_datasets(batch_noisy_resampled_train_ds, batch_curated_resampled_train_ds)
mixed_val_resampled_ds = mix_datasets(batch_noisy_resampled_val_ds, batch_curated_resampled_val_ds)

# Batcheado
# Agrego las operaciones Dataset.cache y Dataset.prefetch para reducir la latencia de lectura mientras entrena el modelo
batch_noisy_train_ds = batch_noisy_train_ds.shuffle(1000).batch(batch_size).cache().prefetch(tf.data.AUTOTUNE)
batch_noisy_val_ds = batch_noisy_val_ds.batch(batch_size).cache().prefetch(tf.data.AUTOTUNE)

batch_noisy_resampled_train_ds = batch_noisy_resampled_train_ds.shuffle(1000).batch(batch_size).cache().prefetch(tf.data.AUTOTUNE)
batch_noisy_resampled_val_ds = batch_noisy_resampled_val_ds.batch(batch_size).cache().prefetch(tf.data.AUTOTUNE)

batch_curated_train_ds = batch_curated_train_ds.shuffle(1000).batch(batch_size).cache().prefetch(tf.data.AUTOTUNE)
batch_curated_val_ds = batch_curated_val_ds.batch(batch_size).cache().prefetch(tf.data.AUTOTUNE)

batch_curated_resampled_train_ds = batch_curated_resampled_train_ds.shuffle(1000).cache().batch(batch_size).prefetch(tf.data.AUTOTUNE)
batch_curated_resampled_val_ds = batch_curated_resampled_val_ds.batch(batch_size).cache().prefetch(tf.data.AUTOTUNE)




# Funciones de aumentado de datos

In [None]:
def mixup_individual(spectrogram, label, alpha=0.4):
    """
    Aplica Mixup entre un ejemplo individual y otro aleatorio del dataset.
    """
    dataset_global = mixup_individual.dataset

    # Obtener otro ejemplo aleatorio
    other_spectrogram, other_label = next(iter(dataset_global.shuffle(1000).take(1)))

    # Asegurar que todo sea float32 para operar
    spectrogram = tf.cast(spectrogram, tf.float32)
    label = tf.cast(label, tf.float32)
    other_spectrogram = tf.cast(other_spectrogram, tf.float32)
    other_label = tf.cast(other_label, tf.float32)

    # Generar lambda
    lam = tf.random.gamma(shape=[], alpha=alpha, beta=alpha + 1)
    lam = tf.clip_by_value(lam, 0.0, 1.0)

    # Mezclar
    mixed_spectrogram = lam * spectrogram + (1.0 - lam) * other_spectrogram
    mixed_label = lam * label + (1.0 - lam) * other_label

    return mixed_spectrogram, mixed_label

def spec_augment(spectrogram, input_shape, freq_mask_param=15, time_mask_param=35, num_masks=2):
    """
    Aplica SpecAugment: frequency masking y time masking.
    VERSIÓN CORREGIDA con validación de límites.
    """
    # Ensure spectrogram has at least 3 dimensions (height, width, channels)
    spectrogram = tf.ensure_shape(spectrogram, [input_shape, 13, 1])

    # Obtener dimensiones
    freq_max, time_max = tf.shape(spectrogram)[0], tf.shape(spectrogram)[1]

    # Convertir a int32 para evitar problemas de tipos
    freq_max = tf.cast(freq_max, tf.int32)
    time_max = tf.cast(time_max, tf.int32)

    # Frequency masking
    for _ in range(num_masks):
        # Ensure freq_max is at least 1 before masking
        if tf.greater(freq_max, 1):
            # Asegurar que freq_mask_param no sea mayor que freq_max
            f_param = tf.minimum(freq_mask_param, freq_max - 1)
            f_param = tf.maximum(f_param, 1)  # Asegurar que sea al menos 1

            f = tf.random.uniform([], 0, f_param, dtype=tf.int32)
            f = tf.minimum(f, freq_max - 1)  # Asegurar que f < freq_max

            # Calcular f0 de manera segura
            max_f0 = tf.maximum(freq_max - f, 1)
            f0 = tf.random.uniform([], 0, max_f0, dtype=tf.int32)
            f0 = tf.minimum(f0, freq_max - f)  # Asegurar que f0 + f <= freq_max

            # Crear máscara vertical solo si tenemos dimensions válidas
            if tf.greater(f, 0) and tf.greater(f0, 0):
                mask = tf.concat([
                    tf.ones([f0, time_max, tf.shape(spectrogram)[2]], dtype=spectrogram.dtype),
                    tf.zeros([f, time_max, tf.shape(spectrogram)[2]], dtype=spectrogram.dtype),
                    tf.ones([freq_max - f0 - f, time_max, tf.shape(spectrogram)[2]], dtype=spectrogram.dtype)
                ], axis=0)
                spectrogram = spectrogram * mask

    # Time masking
    for _ in range(num_masks):
        # Ensure time_max is at least 1 before masking
        if tf.greater(time_max, 1):
            # Asegurar que time_mask_param no sea mayor que time_max
            t_param = tf.minimum(time_mask_param, time_max - 1)
            t_param = tf.maximum(t_param, 1)  # Asegurar que sea al menos 1

            t = tf.random.uniform([], 0, t_param, dtype=tf.int32)
            t = tf.minimum(t, time_max - 1)  # Asegurar que t < time_max

            # Calcular t0 de manera segura
            max_t0 = tf.maximum(time_max - t, 1)
            t0 = tf.random.uniform([], 0, max_t0, dtype=tf.int32)
            t0 = tf.minimum(t0, time_max - t)  # Asegurar que t0 + t <= time_max

            # Crear máscara horizontal solo si tenemos dimensiones válidas
            if tf.greater(t, 0) and tf.greater(t0, 0):
                mask = tf.concat([
                    tf.ones([freq_max, t0, tf.shape(spectrogram)[2]], dtype=spectrogram.dtype),
                    tf.zeros([freq_max, t, tf.shape(spectrogram)[2]], dtype=spectrogram.dtype),
                    tf.ones([freq_max, time_max - t0 - t, tf.shape(spectrogram)[2]], dtype=spectrogram.dtype)
                ], axis=1)
                spectrogram = spectrogram * mask

    return spectrogram

def brightness_contrast_augmentation(spectrogram, input_shape, brightness_range=0.1, contrast_range=0.1):
    """
    Aplica ajustes de brillo y contraste al espectrograma.
    VERSIÓN CORREGIDA con rangos más conservadores.
    """
    # Ensure spectrogram has at least 3 dimensions (height, width, channels)
    spectrogram = tf.ensure_shape(spectrogram, [input_shape, 13, 1])

    # Ajuste de brillo más conservador
    brightness_delta = tf.random.uniform([], -brightness_range, brightness_range)
    spectrogram = spectrogram + brightness_delta

    # Ajuste de contraste más conservador
    contrast_factor = tf.random.uniform([], 1 - contrast_range, 1 + contrast_range)
    mean_val = tf.reduce_mean(spectrogram)
    spectrogram = (spectrogram - mean_val) * contrast_factor + mean_val

    # Clip a un rango más amplio para evitar saturación
    return tf.clip_by_value(spectrogram, -10.0, 10.0)



In [None]:
# Helper para asegurar dtype y shape
def safe_spec_augment(x, y):
    spec = spec_augment(x, input_shape=2152)
    print("Before ensure_shape:", x.shape)
    print("After ensure_shape:", spec.shape)
    spec = tf.ensure_shape(spec, x.shape)
    spec = tf.cast(spec, tf.float32)
    return spec, y

def safe_brightness_augment(x, y):
    spec = brightness_contrast_augmentation(x, input_shape=2152)
    print("Before ensure_shape:", x.shape)
    print("After ensure_shape:", spec.shape)
    spec = tf.ensure_shape(spec, x.shape)
    spec = tf.cast(spec, tf.float32)
    return spec, y


# Aumentado de datos

In [None]:
# Unbatch y aplicar augmentations seguras
spec_augmented_ds = mixed_train_ds.unbatch().map(
    safe_spec_augment, num_parallel_calls=tf.data.AUTOTUNE)

bright_augmented_ds = mixed_train_ds.unbatch().map(
    safe_brightness_augment, num_parallel_calls=tf.data.AUTOTUNE)

# Concatenar augmentaciones
augmented_ds = spec_augmented_ds.concatenate(bright_augmented_ds)

# Agregar también los datos originales
original_ds = mixed_train_ds.unbatch().map(lambda x, y: (tf.ensure_shape(x, (2152, 13, 1)), y))

augmented_ds = augmented_ds.concatenate(original_ds)

# Shuffle, batch y cache
augmented_ds = (augmented_ds
                .shuffle(1000)
                .batch(batch_size)
                .cache()
                .prefetch(tf.data.AUTOTUNE))


Before ensure_shape: (None, 13, 1)
After ensure_shape: (2152, 13, 1)
Before ensure_shape: (None, 13, 1)
After ensure_shape: (2152, 13, 1)


Cuento pasos por epoca para el entrenamiento

In [None]:
def count_samples_in_batched_dataset(dataset):
    """
    Cuenta el número total de muestras en un dataset que puede estar batcheado o no.
    """
    total_samples = 0
    for batch in dataset:
        if isinstance(batch, tuple):
            batch_data = batch[0]
        else:
            batch_data = batch
        batch_size = tf.shape(batch_data)[0]
        total_samples += int(batch_size.numpy())
    return total_samples

# Revisión de shape
print("Especificaciones augmented_ds:")
print(augmented_ds.element_spec)


# Contar muestras correctamente
n_train = count_samples_in_batched_dataset(augmented_ds)
print(f"Steps per epoch (augmented): {n_train//32}")

Element specification of augmented_ds:
(TensorSpec(shape=(None, 2152, 13, 1), dtype=tf.float32, name=None), TensorSpec(shape=(None, 80), dtype=tf.int64, name=None))
Number of training samples (augmented): 59484


# Definición de modelo

In [None]:
import tensorflow as tf

def build_efficientnetv2_b2_scratch(input_shape=(224, 224, 3), num_classes=80, dropout_rate=0.3, lr=1e-3):
    inputs = tf.keras.layers.Input(shape=input_shape)
    x = tf.keras.layers.Resizing(256, 256)(inputs)
    x = tf.keras.layers.Concatenate(axis=-1)([x,x,x])

    base_model = tf.keras.applications.EfficientNetV2B2(
        include_top=False,
        weights=None,  # 🔹 ENTRENADO DESDE CERO
        input_shape=(256,256,3),
    )

    x = base_model(x, training=True)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dropout(dropout_rate)(x)
    outputs = tf.keras.layers.Dense(num_classes, activation='sigmoid')(x)

    model = tf.keras.Model(inputs, outputs)

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
        loss='binary_crossentropy',
        metrics=[tf.keras.metrics.AUC(name='auc', multi_label=True),
                 F1Macro(threshold=0.25)
                 ])


    return model

# Entrenamiento y evaluación de modelo

Calculo de cantidad de pasos por epocas en datasets base

In [None]:
# Haciendo cuentas....
# batch_curated_resampled_train
steps_per_epoch = int(size_train_curated*0.8) // 32
print(steps_per_epoch)
validation_steps = int(size_train_curated*0.2) // 32
print(validation_steps)
# batch_noisy_train
steps_per_epoch_noisy = int(size_train_noisy*0.8) // 32
print(steps_per_epoch_noisy)
validation_steps_noisy = int(size_train_noisy*0.2) // 32
print(validation_steps_noisy)

steps_per_epoch_mixed = 619
print(steps_per_epoch_noisy)
validation_steps_mixed = 154
print(validation_steps_noisy)



124
31
495
123
495
123


In [None]:
num_labels = len(all_labels)
# 16khz = (780, 13, 1)
# 44.1khz = (2152, 13, 1)
input_shape = (2152, 13, 1)

model = build_efficientnetv2_b2_scratch(input_shape,dropout_rate=0.2, lr=0.001)

EPOCHS = 25
history = model.fit(
      augmented_ds,
      validation_data=mixed_val_ds,
      steps_per_epoch=steps_per_epoch_mixed*3,  # Usar el valor calculado
      validation_steps=validation_steps_mixed,  # Usar el valor calculado
      epochs=EPOCHS,
      callbacks=[
          tf.keras.callbacks.EarlyStopping(verbose=1, patience=10, restore_best_weights=True),
          tf.keras.callbacks.ReduceLROnPlateau(monitor='val_auc', factor=0.5, patience=3, verbose=1),
      ]
)

**Evaluación con lwlrap**

In [None]:
# Hacer predicciones
print("Realizando predicciones en conjunto de entrenamiento...")
pred_train = model_curated_resampled.predict(batch_curated_resampled_train_ds)

print("Realizando predicciones en conjunto de validación...")
pred_val = model_curated_resampled.predict(batch_curated_resampled_val_ds)

#print("Realizando predicciones en conjunto de validación...")
#pred_dev = model_curated_resampled.predict(batch_curated_resampled_dev_ds)

# Extraer labels reales
print("Extrayendo labels de entrenamiento...")
labels_train = extract_labels_from_dataset(batch_curated_resampled_train_ds)

print("Extrayendo labels de validación...")
labels_val = extract_labels_from_dataset(batch_curated_resampled_val_ds)

#print("Extrayendo labels de validación...")
#labels_dev = extract_labels_from_dataset(batch_curated_resampled_dev_ds)

# Verificar dimensiones
print(f"Predicciones train: {pred_train.shape}")
print(f"Labels train: {labels_train.shape}")
print(f"Predicciones val: {pred_val.shape}")
print(f"Labels val: {labels_val.shape}")
#print(f"Predicciones val: {pred_dev.shape}")
#print(f"Labels val: {labels_dev.shape}")

# Calcular LWLRAP usando las funciones que ya tienes
print("\n=== LWLRAP CONJUNTO DE ENTRENAMIENTO ===")
per_class_lwlrap_train, weight_per_class_train = calculate_per_class_lwlrap(labels_train, pred_train)
train_lwlrap = np.sum(per_class_lwlrap_train * weight_per_class_train)
print(f"LWLRAP (método per-class): {train_lwlrap:.4f}")

train_lwlrap_sklearn = calculate_overall_lwlrap_sklearn(labels_train, pred_train)
print(f"LWLRAP (sklearn): {train_lwlrap_sklearn:.4f}")

print("\n=== LWLRAP CONJUNTO DE VALIDACIÓN ===")
per_class_lwlrap_val, weight_per_class_val = calculate_per_class_lwlrap(labels_val, pred_val)
val_lwlrap = np.sum(per_class_lwlrap_val * weight_per_class_val)
print(f"LWLRAP (método per-class): {val_lwlrap:.4f}")

val_lwlrap_sklearn = calculate_overall_lwlrap_sklearn(labels_val, pred_val)
print(f"LWLRAP (sklearn): {val_lwlrap_sklearn:.4f}")

[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     name                  : EfficientnetV2_b2 base d_out=0.2
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/facundo2588/proyecto-2/0211bf87a6ba423c85905f8f827f8b9d
[1;38;5;39mCOMET INFO:[0m   Others:
[1;38;5;39mCOMET INFO:[0m     Name             : EfficientnetV2_b2 base d_out=0.2
[1;38;5;39mCOMET INFO:[0m     notebook_url     : https://colab.research.google.com/notebook#fileId=1w-KT824iDPD7UAI_FyU7Z-aHhxcmXAzA
[1;38;5;39mCOMET INFO:[0m     trainable_params : 8882094
[1;38;5;39mCOMET INFO:[0m   Parameters:
[1;38;5;39mCOMET INFO:[0m     Adam_

Epoch 1/25
[1m1857/1857[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m502s[0m 205ms/step - auc: 0.6360 - f1_macro: 0.0172 - loss: 0.0862 - val_auc: 0.7883 - val_f1_macro: 0.0336 - val_loss: 0.0744 - learning_rate: 0.0010
Epoch 2/25
[1m1857/1857[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 33ms/step - auc: 0.5148 - f1_macro: 0.0292 - loss: 0.0577 - val_auc: 0.8042 - val_f1_macro: 0.0401 - val_loss: 0.0712 - learning_rate: 0.0010
Epoch 3/25
[1m1857/1857[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m368s[0m 198ms/step - auc: 0.8097 - f1_macro: 0.0629 - loss: 0.0657 - val_auc: 0.6691 - val_f1_macro: 0.0367 - val_loss: 0.1049 - learning_rate: 0.0010
Epoch 4/25
[1m1857/1857[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - auc: 0.5215 - f1_macro: 0.0975 - loss: 0.0533 - val_auc: 0.6833 - val_f1_macro: 0.0409 - val_loss: 0.1005 - learning_rate: 0.0010
Epoch 5/25
[1m1857/1857[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m369s[0m 198ms/step - auc: 0.8448 - f1

# Función para guardado de csv final

In [None]:
# === FUNCIÓN PARA CREAR SUBMISSION ===
def save_submission(test_df, predictions, output_path):
    # Verificar dimensiones
    print(f"Test DF shape: {test_df.shape}")
    print(f"Predictions shape: {predictions.shape}")

    # Crear copia del test_df para modificar
    submission_df = test_df.copy()

    # Las columnas de clases son todas excepto 'fname'
    class_columns = [col for col in submission_df.columns if col != 'fname']

    # Verificar que el número de clases coincida
    if len(class_columns) != predictions.shape[1]:
        raise ValueError(f"Mismatch: {len(class_columns)} classes in test_df vs {predictions.shape[1]} in predictions")

    # Llenar las columnas de clases con las predicciones
    for i, class_col in enumerate(class_columns):
        submission_df[class_col] = predictions[:, i]

    # Guardar archivo
    submission_df.to_csv(output_path, index=False)
    print(f"Archivo guardado en: {output_path}")

    # Mostrar estadísticas
    print(f"Archivos procesados: {len(submission_df)}")
    print(f"Predicciones promedio: {predictions.mean():.4f}")
    print(f"Predicciones máximas: {predictions.max():.4f}")
    print(f"Predicciones mínimas: {predictions.min():.4f}")

    # Verificar algunos ejemplos
    print("\n=== PRIMEROS 3 EJEMPLOS ===")
    for i in range(min(3, len(submission_df))):
        fname = submission_df.iloc[i]['fname']
        sample_preds = predictions[i]
        top_classes = sample_preds.argsort()[-5:][::-1]  # Top 5 clases

        print(f"Archivo: {fname}")
        print(f"Top 5 predicciones: {sample_preds[top_classes]}")
        print(f"Clases correspondientes: {[class_columns[idx] for idx in top_classes]}")
        print("-" * 40)

    return submission_df

# Predicciones test y creación de csv

In [None]:
# === CREAR SUBMISSION Y ENVIAR A KAGGLE ===
print("=== CREANDO SUBMISSION FINAL ===")

prediction = model.predict(spect_test_ds.batch(32))

# Usar las predicciones que ya tienes
output_path = str(base_dir / 'submission_final.csv')
submission_df = save_submission(test_df, prediction, output_path)

print("\n=== VERIFICACIÓN FINAL ===")
print(f"Forma del archivo final: {submission_df.shape}")
print(f"Columnas: {list(submission_df.columns)[:5]}...")  # Mostrar primeras 5
print(f"Archivo guardado en: {output_path}")