In [2]:
import os
import pandas as pd
from PIL import Image
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset, Subset
from sklearn.model_selection import train_test_split
import torch
from torch.optim import Adam
import torch.nn as nn
from tqdm import tqdm
from sklearn.metrics import f1_score, roc_auc_score, hamming_loss, accuracy_score
import warnings
from sklearn.exceptions import UndefinedMetricWarning

In [3]:
class AudioDataset(Dataset):
    def __init__(self, csv_path, img_dir, transform=None):
        self.data = pd.read_csv(csv_path)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        img_path = os.path.join(self.img_dir, row["filename"])
        img = Image.open(img_path).convert("RGB")

        if self.transform:
            img = self.transform(img)

        # Convertir etiquetas a flotante
        labels = torch.tensor(row.iloc[1:].astype(float).values, dtype=torch.float32)

        return img, labels

In [4]:
def mostrar_predicciones(model, dataloader, device="cuda"):
    model.eval()  # Cambiar a modo de evaluación
    inputs, labels = next(iter(dataloader))  # Obtener el primer batch
    inputs, labels = inputs.to(device), labels.to(device)

    with torch.no_grad():
        outputs = model(inputs)
        preds = (outputs >= 0.5).float()  # Aplicar umbral para obtener predicciones binarias

    # Mostrar las primeras 5 predicciones
    for i in range(min(5, len(inputs))):  # Asegurarse de no exceder el batch
        print(f"Predicción: {preds[i].cpu().numpy()}, Real: {labels[i].cpu().numpy()}")

In [5]:
class EarlyStopping:
    def __init__(self, patience=3, delta=0.001):
        self.patience = patience
        self.delta = delta
        self.best_loss = None
        self.counter = 0

    def __call__(self, val_loss):
        if self.best_loss is None or val_loss < self.best_loss - self.delta:
            self.best_loss = val_loss
            self.counter = 0
            return False  # No detener
        else:
            self.counter += 1
            if self.counter >= self.patience:
                return True  # Detener
        return False

In [6]:
class BasicBlock(nn.Module):
    def __init__(self, in_chan, out_chan):
        super(BasicBlock, self).__init__()

        projection = not (in_chan == out_chan)
        stride = 2 if projection else 1

        self.conv1 = nn.Conv2d(in_chan, out_chan, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_chan)

        self.conv2 = nn.Conv2d(out_chan, out_chan, kernel_size=3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_chan)

        if projection:
            self.proj = nn.Sequential(
                nn.Conv2d(in_chan, out_chan, kernel_size=1, stride=2, bias=False),
                nn.BatchNorm2d(out_chan)
            )
        else:
            self.proj = nn.Identity()

        self.ReLU = nn.ReLU(inplace=True)

    def forward(self, x):
        z = self.conv1(x)
        z = self.bn1(z)
        z = self.ReLU(z)

        z = self.conv2(z)
        z = self.bn2(z)
        z = self.ReLU(z)

        # Projection
        x = self.proj(x)
        return x + z

In [7]:
class ResNet34(nn.Module):
    def __init__(self, feature_dim=512):
        super(ResNet34, self).__init__()

        self.layer0 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer1 = nn.Sequential(
            BasicBlock(64, 64),
            BasicBlock(64, 64),
            BasicBlock(64, 64),
        )
        self.layer2 = nn.Sequential(
            BasicBlock(64, 128),
            BasicBlock(128, 128),
            BasicBlock(128, 128),
            BasicBlock(128, 128),
        )
        self.layer3 = nn.Sequential(
            BasicBlock(128, 256),
            BasicBlock(256, 256),
           
        )
        self.layer4 = nn.Sequential(
            BasicBlock(256, 512),
            BasicBlock(512, 512),
            
        )

        self.AvgPool = nn.AdaptiveAvgPool2d((1, 1))
        self.feature_dim = feature_dim

    def forward(self, x):
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.AvgPool(x)
        x = torch.flatten(x, 1)  # [n_batch, feature_dim]
        return x

In [8]:
class CNN(nn.Module):
    def __init__(self, n_classes):
        super(CNN, self).__init__()
        self.resnet = ResNet34(feature_dim=512)  # ResNet ajustada previamente
        self.fc = nn.Linear(512, n_classes)  # Clasificación final

    def forward(self, x):
        # x: [batch_size, channels, height, width]
        features = self.resnet(x)  # [batch_size, feature_dim]
        output = self.fc(features)  # [batch_size, n_classes]
        return output

In [9]:

def compute_metrics(y_true, y_pred, threshold=0.5):
   
    # Convertir tensores a numpy
    if isinstance(y_true, torch.Tensor):
        y_true = y_true.cpu().numpy()
    if isinstance(y_pred, torch.Tensor):
        y_pred = y_pred.cpu().numpy()
    
    # Aplicar umbral a las predicciones
    y_pred_binary = (y_pred >= threshold).astype(int)

    metrics = {}

    # Cálculo del F1-Score
    metrics["F1-Score (micro)"] = f1_score(y_true, y_pred_binary, average="micro", zero_division=1)
    metrics["F1-Score (macro)"] = f1_score(y_true, y_pred_binary, average="macro", zero_division=1)

    # Cálculo del ROC-AUC, omitiendo clases sin valores positivos
    try:
        metrics["ROC-AUC (macro)"] = roc_auc_score(y_true, y_pred, average="macro", multi_class="ovo")
    except ValueError as e:
        warnings.warn(str(e), UndefinedMetricWarning)
        metrics["ROC-AUC (macro)"] = float("nan")

    # Hamming Loss y Exact Match Ratio
    metrics["Hamming Loss"] = hamming_loss(y_true, y_pred_binary)
    metrics["Exact Match Ratio"] = accuracy_score(y_true, y_pred_binary)

    return metrics

In [10]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, patience=3, device='cuda'):
    model.to(device)
    best_loss = float('inf')  # Para rastrear el mejor val_loss
    patience_counter = 0  # Contador para Early Stopping

    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")
        print("-" * 10)

        for phase in ["train", "val"]:
            if phase == "train":
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            all_labels = []
            all_preds = []

            for inputs, labels in tqdm(dataloaders[phase], desc=f"{phase} Epoch {epoch+1}"):
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == "train"):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    if phase == "train":
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)

                # Guardar predicciones y etiquetas para métricas
                all_labels.append(labels)
                all_preds.append(outputs)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            print(f"{phase} Loss: {epoch_loss:.4f}")

            # Calcular métricas en la fase de validación
            if phase == "val":
                all_labels = torch.cat(all_labels, dim=0)
                all_preds = torch.cat(all_preds, dim=0)
                metrics = compute_metrics(all_labels, all_preds)
                for metric_name, metric_value in metrics.items():
                    print(f"{metric_name}: {metric_value:.4f}")

                # Early Stopping: Verificar si el val_loss mejora
                if epoch_loss < best_loss:
                    best_loss = epoch_loss
                    patience_counter = 0  # Reiniciar contador si mejora
                    print("Validation loss improved. Saving model...")
                    torch.save(model.state_dict(), "best_model.pth")  # Guardar mejor modelo
                else:
                    patience_counter += 1
                    print(f"No improvement. Patience counter: {patience_counter}/{patience}")

                # Detener el entrenamiento si la paciencia se agota
                if patience_counter >= patience:
                    print("Early stopping triggered. Stopping training.")
                    return model

        # Mostrar predicciones al final de cada época
        print("Mostrando predicciones:")
        mostrar_predicciones(model, dataloaders["val"], device)

    return model

In [11]:
def compute_class_weights(data_csv, n_classes):
    data = pd.read_csv(data_csv)
    class_counts = data.iloc[:, 1:].sum().values  # Conteo de ejemplos por clase
    total_samples = len(data)

    # Reemplazar 0 por 1 para evitar división por cero
    class_counts[class_counts == 0] = 1
    class_weights = total_samples / (n_classes * class_counts)

    return torch.tensor(class_weights, dtype=torch.float)

In [12]:
# Transformaciones para las imágenes
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalización básica
])

In [13]:
# Crear dataset
csv_path = "./SoundAnimals_Train/train_spectograms.csv"
img_dir = "./SoundAnimals_Train/train_redimensionadas"

In [14]:
data = pd.read_csv(csv_path)

# Asegurarte de que las etiquetas sean numéricas
print(data.dtypes)  # Revisa que las columnas sean float o int
print(data.isna().sum())

filename    object
SPHSUR       int64
BOABIS       int64
SCIPER       int64
DENNAH       int64
LEPLAT       int64
RHIICT       int64
BOALEP       int64
BOAFAB       int64
PHYCUV       int64
DENMIN       int64
ELABIC       int64
BOAPRA       int64
DENCRU       int64
BOALUN       int64
BOAALB       int64
PHYMAR       int64
PITAZU       int64
PHYSAU       int64
LEPFUS       int64
DENNAN       int64
PHYALB       int64
LEPLAB       int64
SCIFUS       int64
BOARAN       int64
SCIFUV       int64
AMEPIC       int64
LEPPOD       int64
ADEDIP       int64
ELAMAT       int64
PHYNAT       int64
LEPELE       int64
RHISCI       int64
SCINAS       int64
LEPNOT       int64
ADEMAR       int64
BOAALM       int64
PHYDIS       int64
RHIORN       int64
LEPFLA       int64
SCIRIZ       int64
DENELE       int64
SCIALT       int64
dtype: object
filename    0
SPHSUR      0
BOABIS      0
SCIPER      0
DENNAH      0
LEPLAT      0
RHIICT      0
BOALEP      0
BOAFAB      0
PHYCUV      0
DENMIN      0
ELABIC      0
B

In [15]:

dataset = AudioDataset(csv_path, img_dir, transform=transform)

In [16]:
# División 70%-30% (entrenamiento y validación)
train_indices, val_indices = train_test_split(
    range(len(dataset)),
    test_size=0.3,
    random_state=42,
    stratify=dataset.data.iloc[:, 1:].sum(axis=1) > 0  # Estratificación basada en etiquetas
)
train_dataset = Subset(dataset, train_indices)
val_dataset = Subset(dataset, val_indices)



In [17]:
print(f"Número de ejemplos en el dataset: {len(dataset)}")
print(f"Ejemplos en entrenamiento: {len(train_dataset)}")
print(f"Ejemplos en validación: {len(val_dataset)}")

Número de ejemplos en el dataset: 62191
Ejemplos en entrenamiento: 43533
Ejemplos en validación: 18658


In [18]:
# Crear DataLoaders
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=0)
dataloaders = {"train": train_loader, "val": val_loader}

In [19]:
n_classes = 42  # Número de clases
class_weights = compute_class_weights(csv_path, n_classes)

In [20]:
model = CNN(n_classes=n_classes)

In [21]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
criterion = nn.BCEWithLogitsLoss(pos_weight=class_weights)

In [22]:
# Entrenar el modelo
device = "cuda" if torch.cuda.is_available() else "cpu"

trained_model = train_model(
    model=model,
    dataloaders=dataloaders,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=10,
    patience=3,  # Early stopping después de 3 épocas sin mejora
    device=device
)

Epoch 1/10
----------


train Epoch 1: 100%|██████████| 5442/5442 [19:53<00:00,  4.56it/s]


train Loss: 0.0834


val Epoch 1: 100%|██████████| 2333/2333 [03:20<00:00, 11.61it/s]


val Loss: 0.0812




F1-Score (micro): 0.0847
F1-Score (macro): 0.1029
ROC-AUC (macro): nan
Hamming Loss: 0.0346
Exact Match Ratio: 0.3643
Validation loss improved. Saving model...
Mostrando predicciones:
Predicción: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], Real: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0.]
Predicción: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], Real: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Predicción: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], Real: [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.
 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0

train Epoch 2: 100%|██████████| 5442/5442 [24:00<00:00,  3.78it/s]


train Loss: 0.0696


val Epoch 2: 100%|██████████| 2333/2333 [04:05<00:00,  9.49it/s]


val Loss: 0.0650




F1-Score (micro): 0.1491
F1-Score (macro): 0.2023
ROC-AUC (macro): nan
Hamming Loss: 0.0340
Exact Match Ratio: 0.3678
Validation loss improved. Saving model...
Mostrando predicciones:
Predicción: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], Real: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0.]
Predicción: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], Real: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Predicción: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], Real: [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.
 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0

train Epoch 3: 100%|██████████| 5442/5442 [20:30<00:00,  4.42it/s]  


train Loss: 0.0571


val Epoch 3: 100%|██████████| 2333/2333 [03:27<00:00, 11.23it/s]


val Loss: 0.0510




F1-Score (micro): 0.4315
F1-Score (macro): 0.3521
ROC-AUC (macro): nan
Hamming Loss: 0.0272
Exact Match Ratio: 0.4092
Validation loss improved. Saving model...
Mostrando predicciones:
Predicción: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], Real: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0.]
Predicción: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], Real: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Predicción: [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.
 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], Real: [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.
 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0

train Epoch 4: 100%|██████████| 5442/5442 [25:52<00:00,  3.50it/s]    


train Loss: 0.0516


val Epoch 4: 100%|██████████| 2333/2333 [03:19<00:00, 11.70it/s]


val Loss: 0.0533




F1-Score (micro): 0.3974
F1-Score (macro): 0.4017
ROC-AUC (macro): nan
Hamming Loss: 0.0298
Exact Match Ratio: 0.3926
No improvement. Patience counter: 1/3
Mostrando predicciones:
Predicción: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], Real: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0.]
Predicción: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], Real: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Predicción: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], Real: [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.
 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.

train Epoch 5:   5%|▍         | 267/5442 [01:00<19:31,  4.42it/s]


KeyboardInterrupt: 