In [18]:
import torch
print(torch.__version__)        # should show +cu128
print(torch.version.cuda)       # should match runtime CUDA version
print(torch.cuda.is_available())  # True if GPU detected

2.9.1+cu128
12.8
True


In [None]:
import os
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models

# ----------------- Config -----------------
CSV_FILE = "esc50.csv"
IMG_DIR = "dataset_jpg"
IMG_SIZE = 224
BATCH_SIZE = 16
NUM_EPOCHS = 5
DEVICE = torch.device("cuda")# if torch.cuda.is_available() else "cpu"

# ----------------- Leer CSV y crear labels -----------------
df = pd.read_csv(CSV_FILE)
categories = sorted(df['category'].unique())
cat2idx = {cat: i for i, cat in enumerate(categories)}
df['label'] = df['category'].map(cat2idx)

# ----------------- Split train/test -----------------
train_df = df[df["fold"] != 5].reset_index(drop=True)
test_df  = df[df["fold"] == 5].reset_index(drop=True)

# ----------------- Dataset -----------------
class SpectrogramDataset(Dataset):
    def __init__(self, df, img_dir, transform=None):
        self.df = df
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(self.img_dir, row['filename'].replace(".wav", ".jpg"))
        img = Image.open(img_path).convert('RGB')
        label = row['label']

        if self.transform:
            img = self.transform(img)

        return img, label

# ----------------- Transformaciones -----------------
transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
])

# ----------------- DataLoaders -----------------
train_dataset = SpectrogramDataset(train_df, IMG_DIR, transform=transform)
test_dataset  = SpectrogramDataset(test_df, IMG_DIR, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
test_loader  = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)


In [None]:
# ----------------- Modelo -----------------
model = models.resnet50(pretrained=True)
num_classes = len(categories)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(DEVICE)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# ----------------- Train / Test loops -----------------
def train_loop(dataloader, model, loss_fn, optimizer):
    model.train()
    running_loss = 0
    correct = 0
    total = 0

    for batch_idx, (X, y) in enumerate(dataloader):
        X, y = X.to(DEVICE), y.to(DEVICE)

        optimizer.zero_grad()
        outputs = model(X)
        loss = loss_fn(outputs, y)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * X.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == y).sum().item()
        total += y.size(0)

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

def test_loop(dataloader, model, loss_fn):
    model.eval()
    running_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(DEVICE), y.to(DEVICE)
            outputs = model(X)
            loss = loss_fn(outputs, y)
            running_loss += loss.item() * X.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == y).sum().item()
            total += y.size(0)

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

# ----------------- Entrenamiento -----------------
for epoch in range(NUM_EPOCHS):
    train_loss, train_acc = train_loop(train_loader, model, criterion, optimizer)
    test_loss, test_acc = test_loop(test_loader, model, criterion)

    print(f"Epoch [{epoch+1}/{NUM_EPOCHS}] "
          f"Train Loss: {train_loss:.4f} Acc: {train_acc:.4f} | "
          f"Test Loss: {test_loss:.4f} Acc: {test_acc:.4f}")

print(f"\nðŸŽ¯ Final Test Accuracy: {test_acc:.4f} ({test_acc*100:.2f}%)")




Epoch [1/5] Train Loss: 2.7969 Acc: 0.3156 | Test Loss: 1.9100 Acc: 0.5100
Epoch [2/5] Train Loss: 1.2741 Acc: 0.6969 | Test Loss: 1.2825 Acc: 0.6375
