In [1]:
import torch
import torchvision

transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),
                                            torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

tr_set = torchvision.datasets.CIFAR10(root = "./datasets/cifar10", train = True, download = True, transform = transform)
te_set = torchvision.datasets.CIFAR10(root = "./datasets/cifar10", train = False, download = True, transform = transform)

batch_size = 128

tr_data = torch.utils.data.DataLoader(tr_set, batch_size = batch_size, shuffle = True)
te_data = torch.utils.data.DataLoader(te_set, batch_size = batch_size, shuffle = False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./datasets/cifar10/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./datasets/cifar10/cifar-10-python.tar.gz to ./datasets/cifar10
Files already downloaded and verified


In [2]:
device = "cuda:0"

def evaluate(model, data):
    score = 0
    model.eval()
    with torch.no_grad():
        for x, y in data:
            x, y = x.to(device), y.to(device)
            logits = model(x)
            pred = logits.argmax(dim = 1)
            score += (pred == y).sum().to("cpu").item()
    score = score / len(data.dataset)
    return score

result = {}
for index in range(5):
    print("{0} - model learn".format(index + 1))
    
    model = torchvision.models.resnet50(pretrained = True).to(device)
    model.train()
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr = 1e-4)
    for epoch in range(10):
        for x, y in tr_data:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            logits = model(x)
            loss = criterion(logits, y)
            loss.backward()
            optimizer.step()

    score = evaluate(model, te_data)
    print("score : {0:.4f}".format(score))
    path = "model{0}.pth".format(index + 1)
    result[path] = score
    torch.save(model.state_dict(), path)
print("finish")

1 - model learn


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

score : 0.8247
2 - model learn
score : 0.8302
3 - model learn
score : 0.8376
4 - model learn
score : 0.8265
5 - model learn
score : 0.8274
finish


In [3]:
import os
import numpy as np
import model_soup

def metric(y_true, y_pred):
    return ((y_true == y_pred.argmax(axis = -1)).sum() / len(y_true)).to("cpu").item()

print("[Original Performance]")
for k, v in result.items():
    print("[{0}] score:{1:.4f}".format(os.path.basename(k), v))

print("\n[Greedy Soup (uniform weight update) Performance]") #original paper style
greedy_model = model_soup.torch.greedy_soup(model, list(result.keys()), te_data, metric = metric, device = device, compare = np.greater_equal, update_greedy = False)
score = evaluate(greedy_model, te_data)
print("score : {0:.4f}".format(score))

print("\n[Greedy Soup (greedy weight update) Performance]")
greedy_model = model_soup.torch.greedy_soup(model, list(result.keys()), te_data, metric = metric, device = device, compare = np.greater_equal, update_greedy = True)
score = evaluate(greedy_model, te_data)
print("score : {0:.4f}".format(score))

print("\n[Uniform Soup Performance]")
uniform_model = model_soup.torch.uniform_soup(model, list(result.keys()), device = device)
score = evaluate(uniform_model, te_data)
print("score : {0:.4f}".format(score))

[Original Performance]
[model1.pth] score:0.8247
[model2.pth] score:0.8302
[model3.pth] score:0.8376
[model4.pth] score:0.8265
[model5.pth] score:0.8274

[Greedy Soup (uniform weight update) Performance]
[model1.pth] step: 79 - time: 3.75s - metric: 0.8247
[model2.pth] step: 79 - time: 3.48s - metric: 0.8519
[model3.pth] step: 79 - time: 3.50s - metric: 0.8570
[model4.pth] step: 79 - time: 3.76s - metric: 0.8569
[model5.pth] step: 79 - time: 3.98s - metric: 0.8532
model soup best score : 0.8570
score : 0.8570

[Greedy Soup (greedy weight update) Performance]
[model1.pth] step: 79 - time: 3.35s - metric: 0.8247
[model2.pth] step: 79 - time: 3.53s - metric: 0.8519
[model3.pth] step: 79 - time: 3.78s - metric: 0.8576
[model4.pth] step: 79 - time: 3.48s - metric: 0.8569
[model5.pth] step: 79 - time: 3.53s - metric: 0.8524
model soup best score : 0.8576
score : 0.8576

[Uniform Soup Performance]
score : 0.8546
