In [9]:
import torch
print(torch.__version__)        # should show +cu128
print(torch.version.cuda)       # should match runtime CUDA version
print(torch.cuda.is_available())  # True if GPU detected

2.9.1+cu128
12.8
True


In [None]:
import os
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

# --- Configuraci√≥n ---
CSV_FILE = "esc50.csv"
IMG_DIR = "dataset_jpg"
IMG_SIZE = 224  # ResNet est√°ndar
BATCH_SIZE = 16

# --- Leer CSV ---
df = pd.read_csv(CSV_FILE)

# Mapear cada categor√≠a a un √≠ndice
categories = sorted(df['category'].unique())
cat2idx = {cat: i for i, cat in enumerate(categories)}
df['label'] = df['category'].map(cat2idx)

# --- Dataset personalizado ---
class SpectrogramDataset(Dataset):
    def __init__(self, df, img_dir, transform=None):
        self.df = df
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(self.img_dir, row['filename'].replace(".wav", ".jpg"))
        img = Image.open(img_path).convert('RGB')  # 3 canales
        label = row['label']

        if self.transform:
            img = self.transform(img)

        return img, label

# --- Transformaciones ---
transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    
])

dataset = SpectrogramDataset(df, IMG_DIR, transform=transform)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)


In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models

device = torch.device("cuda")#if torch.cuda.is_available() else "cpu"
print("Device:", device)


final_acc = 0.0 
# Cargar ResNet50 preentrenado
model = models.resnet50(pretrained=True)

# Cambiar la capa final seg√∫n n√∫mero de clases
num_classes = len(categories)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

num_epochs = 5

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for imgs, labels in dataloader:
        imgs = imgs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * imgs.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    final_acc = epoch_acc 
    print(f"Epoch [{epoch+1}/{num_epochs}] Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")
print(f"\nüéØ Accuracy final despu√©s de {num_epochs} epochs: {final_acc:.4f} ({final_acc*100:.2f}%)")


Device: cuda




Epoch [1/5] Loss: 2.7736 Acc: 0.3595
Epoch [2/5] Loss: 1.1354 Acc: 0.7465
Epoch [3/5] Loss: 0.5036 Acc: 0.9000
Epoch [4/5] Loss: 0.2462 Acc: 0.9575
Epoch [5/5] Loss: 0.1207 Acc: 0.9815

üéØ Accuracy final despu√©s de 5 epochs: 0.9815 (98.15%)
