In [None]:
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision.datasets import CIFAR100
from torchvision.transforms.v2 import CutMix, MixUp
import torchvision.transforms as transforms
from torchvision.models import resnet18
from tqdm import tqdm
import numpy as np

In [None]:
class CIFAR100N(Dataset):
    def __init__(self, root, transform=None, noise_file='./drive/MyDrive/data/CIFAR-100_human.pt'):
        self.cifar100 = CIFAR100(root=root, train=True, download=True, transform=transform)
        noise_data = torch.load(noise_file)
        self.labels = noise_data['noisy_label']
    def __len__(self):
        return len(self.cifar100)

    def __getitem__(self, idx):
        image, _ = self.cifar100[idx]
        label = self.labels[idx]
        return image, label


In [None]:
@torch.no_grad()
def divide_mix(model, loader, threshold=0.8, device="cuda"):
    """Splits the dataset into clean and noisy samples based on model confidence."""
    model.eval()
    clean_indices = []
    noisy_indices = []
    confidences = []

    for i, (images, labels) in enumerate(loader):
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        probs = nn.functional.softmax(outputs, dim=1)
        max_probs, preds = probs.max(dim=1)

        for j in range(len(images)):
            global_idx = i * loader.batch_size + j
            if max_probs[j] > threshold:
                clean_indices.append((global_idx, preds[j].item()))
            else:
                noisy_indices.append((global_idx, preds[j].item()))
            confidences.append(max_probs[j].item())

    print(f"DivideMix Debug -> Clean: {len(clean_indices)}, Noisy: {len(noisy_indices)}")

    return clean_indices, noisy_indices, confidences

In [None]:
def train_epoch_with_dividemix(model, loader, optimizer, criterion, device, augmentation, clean_indices, noisy_indices):
    model.train()
    total_loss = 0.0
    correct = 0
    total = 0

    if len(clean_indices) == 0:
        print("No clean samples identified, skipping clean sample training.")
    if len(noisy_indices) == 0:
        print("No noisy samples identified, skipping noisy sample training.")

    if len(clean_indices) > 0:
        clean_loader = DataLoader(
            Subset(loader.dataset, [idx for idx, _ in clean_indices]),
            batch_size=loader.batch_size,
            shuffle=True,
            num_workers=loader.num_workers
        )
        for images, labels in clean_loader:
            images, labels = images.to(device), labels.to(device)
            images, labels = augmentation(images, labels)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            labels_smooth = labels.argmax(dim=1) if labels.ndim > 1 else labels
            correct += predicted.eq(labels_smooth).sum().item()
            total += labels.size(0)

    if len(noisy_indices) > 0:
        noisy_loader = DataLoader(
            Subset(loader.dataset, [idx for idx, _ in noisy_indices]),
            batch_size=loader.batch_size,
            shuffle=True,
            num_workers=loader.num_workers
        )
        for images, labels in noisy_loader:
            images, labels = images.to(device), labels.to(device)
            images, labels = augmentation(images, labels)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss = 0.5 * loss
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            labels_smooth = labels.argmax(dim=1) if labels.ndim > 1 else labels
            correct += predicted.eq(labels_smooth).sum().item()
            total += labels.size(0)

    return total_loss / total, 100.0 * correct / total

In [None]:
@torch.no_grad()
def evaluate(model, loader, criterion, device):
    model.eval()
    total_loss = 0.0
    correct = 0
    total = 0

    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)

        total_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        labels_smooth = labels.argmax(dim=1) if labels.ndim > 1 else labels
        correct += predicted.eq(labels_smooth).sum().item()
        total += labels.size(0)

    return total_loss / total, 100.0 * correct / total


In [None]:
if __name__ == '__main__':
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    transform = transforms.Compose([
        transforms.Resize(224),
        transforms.RandomCrop(224, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))
    ])
    train_dataset = CIFAR100N(root='./drive/MyDrive/data', transform=transform, noise_file='./drive/MyDrive/data/CIFAR-100_human.pt')
    test_dataset = CIFAR100(root='./drive/MyDrive/data', train=False, download=True, transform=transform)

    train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, pin_memory=True, num_workers=4,
                              persistent_workers=True, prefetch_factor=2)
    test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, pin_memory=True, num_workers=4,
                             persistent_workers=True, prefetch_factor=2)

    model = resnet18(weights="IMAGENET1K_V1")
    for param in model.parameters():
        param.requires_grad = False

    model.fc = nn.Linear(model.fc.in_features, 100)

    # Acum antrenezi doar ultimul strat
    for param in model.fc.parameters():
        param.requires_grad = True

    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)

    cutmix = CutMix(num_classes=100, alpha=1.0)
    mixup = MixUp(num_classes=100, alpha=1.0)

    num_epochs = 100
    best_acc = 0.0

    for epoch in range(num_epochs):
        import time

        start_time = time.time()
        print(f"Epoch {epoch + 1}/{num_epochs}")

        clean_indices, noisy_indices, confidences = divide_mix(model, train_loader, threshold=0.8, device=device)
        print(f"Clean samples: {len(clean_indices)}, Noisy samples: {len(noisy_indices)}")

        augmentation = cutmix if np.random.rand() < 0.5 else mixup
        train_loss, train_acc = train_epoch_with_dividemix(
            model, train_loader, optimizer, criterion, device, augmentation, clean_indices, noisy_indices
        )

        val_loss, val_acc = evaluate(model, test_loader, criterion, device)
        scheduler.step()

        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, "
              f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")
        print(f"Time taken: {time.time() - start_time:.2f}s")

        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), 'best_resnet_dividemix.pth')

    print(f"Best Validation Accuracy: {best_acc:.2f}%")

Files already downloaded and verified


  noise_data = torch.load(noise_file)


Files already downloaded and verified


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 200MB/s]


Epoch 1/100
DivideMix Debug -> Clean: 0, Noisy: 50000
Clean samples: 0, Noisy samples: 50000
No clean samples identified, skipping clean sample training.
Train Loss: 2.0050, Train Acc: 16.08%, Val Loss: 2.9781, Val Acc: 32.09%
Time taken: 72.58s
Epoch 2/100
DivideMix Debug -> Clean: 226, Noisy: 49774
Clean samples: 226, Noisy samples: 49774
Train Loss: 1.7835, Train Acc: 26.68%, Val Loss: 2.5690, Val Acc: 40.71%
Time taken: 70.30s
Epoch 3/100
DivideMix Debug -> Clean: 758, Noisy: 49242
Clean samples: 758, Noisy samples: 49242
Train Loss: 1.8311, Train Acc: 23.13%, Val Loss: 2.4440, Val Acc: 43.08%
Time taken: 70.07s
Epoch 4/100
DivideMix Debug -> Clean: 1290, Noisy: 48710
Clean samples: 1290, Noisy samples: 48710
Train Loss: 1.7429, Train Acc: 30.71%, Val Loss: 2.3536, Val Acc: 43.91%
Time taken: 70.03s
Epoch 5/100
DivideMix Debug -> Clean: 1483, Noisy: 48517
Clean samples: 1483, Noisy samples: 48517
Train Loss: 1.7302, Train Acc: 31.89%, Val Loss: 2.2666, Val Acc: 46.82%
Time taken: 7