In [1]:
import torch
import torchvision

transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),
                                            torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

tr_set = torchvision.datasets.CIFAR10(root = "./datasets/cifar10", train = True, download = True, transform = transform)
te_set = torchvision.datasets.CIFAR10(root = "./datasets/cifar10", train = False, download = True, transform = transform)

tr_data = torch.utils.data.DataLoader(tr_set, batch_size = 128, shuffle = True)
te_data = torch.utils.data.DataLoader(te_set, batch_size = 128, shuffle = False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./datasets/cifar10/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./datasets/cifar10/cifar-10-python.tar.gz to ./datasets/cifar10
Files already downloaded and verified


In [2]:
device = "cuda:0"
result = {}
for index in range(5):
    print("{0} - model learn".format(index + 1))
    
    model = torchvision.models.resnet50(pretrained = True).to(device)
    model.train()
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr = 1e-4)
    for epoch in range(10):
        for x, y in tr_data:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            logits = model(x)
            loss = criterion(logits, y)
            loss.backward()
            optimizer.step()

    score = 0
    model.eval()
    with torch.no_grad():
        for x, y in te_data:
            x, y = x.to(device), y.to(device)
            logits = model(x)
            pred = logits.argmax(dim = 1)
            score += (pred == y).sum().to("cpu").item()
    score = score / len(te_data.dataset)

    print("score : {0:.4f}".format(score))
    path = "model{0}.pth".format(index + 1)
    result[path] = score
    torch.save(model.state_dict(), path)
print("finish")

1 - model learn
score : 0.8291
2 - model learn
score : 0.8337
3 - model learn
score : 0.8308
4 - model learn
score : 0.8355
5 - model learn
score : 0.8317
finish


In [3]:
import os
import numpy as np
import model_soup

print("[Original Performance]")
for k, v in result.items():
    print("[{0}] score:{1:.4f}".format(os.path.basename(k), v))

def metric(y_true, y_pred):
    return ((y_true == y_pred.argmax(axis = -1)).sum() / len(y_true)).to("cpu").item()

print("\n[Greedy Soup Performance]")
greedy_model = model_soup.torch.greedy_soup(model, list(result.keys()), te_data, metric = metric, compare = np.greater_equal, device = device)
score = 0
greedy_model.eval()
with torch.no_grad():
    for x, y in te_data:
        x, y = x.to(device), y.to(device)
        logits = greedy_model(x)
        pred = logits.argmax(dim = 1)
        score += (pred == y).sum().to("cpu").item()
score = score / len(te_data.dataset)
print("score : {0:.4f}".format(score))


print("\n[Uniform Soup Performance]")
uniform_model = model_soup.torch.uniform_soup(model, list(result.keys()), device = device)
score = 0
uniform_model.eval()
with torch.no_grad():
    for x, y in te_data:
        x, y = x.to(device), y.to(device)
        logits = uniform_model(x)
        pred = logits.argmax(dim = 1)
        score += (pred == y).sum().to("cpu").item()
score = score / len(te_data.dataset)
print("score : {0:.4f}".format(score))

[Original Performance]
[model1.pth] score:0.8291
[model2.pth] score:0.8337
[model3.pth] score:0.8308
[model4.pth] score:0.8355
[model5.pth] score:0.8317

[Greedy Soup Performance]
[model1.pth] step: 79 - time: 3.49s - metric: 0.8291
[model2.pth] step: 79 - time: 3.43s - metric: 0.8517
[model3.pth] step: 79 - time: 3.46s - metric: 0.8534
[model4.pth] step: 79 - time: 3.43s - metric: 0.8546
[model5.pth] step: 79 - time: 3.46s - metric: 0.8533
model soup best score : 0.8546
score : 0.8546

[Uniform Soup Performance]
score : 0.8567
