In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
from torch import optim
import torch.nn.init as init
from torch.autograd import Variable

import numpy as np

from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from torchvision import models

from torchvision import utils
import matplotlib.pyplot as plt

import random
import math
from torchvision import transforms
import torchvision

#### Test Dataset

In [9]:
transformtest = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
])

test_data = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transformtest)

test_loader = DataLoader(test_data, batch_size=128, shuffle=False)

Files already downloaded and verified


### WideResNet

In [None]:
class BasicBlock(nn.Module):
    def __init__(self, in_planes, out_planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_planes)
        self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_planes)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != out_planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_planes)
            )

    def forward(self, x):
        out = torch.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        return torch.relu(out)

class WideResNet(nn.Module):
    def __init__(self, block, num_blocks, widen_factor=10, num_classes=100):
        super(WideResNet, self).__init__()
        self.in_planes = 16 * widen_factor 

        self.conv1 = nn.Conv2d(3, self.in_planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(self.in_planes) 

        self.layer1 = self._make_layer(block, 16 * widen_factor, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 32 * widen_factor, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 64 * widen_factor, num_blocks[2], stride=2)

        self.linear = nn.Linear(64 * widen_factor, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes 
        return nn.Sequential(*layers)

    def forward(self, x):
        out = torch.relu(self.bn1(self.conv1(x))) 
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = torch.mean(out, dim=[2, 3]) 
        return self.linear(out)

### PyramidNet

In [8]:
class ShakeDropFunction(torch.autograd.Function):
    @staticmethod
    def forward(ctx, x, training=True, p_drop=0.5, alpha_range=[-1, 1]):
        if training:
            gate = torch.cuda.FloatTensor([0]).bernoulli_(1 - p_drop)
            ctx.save_for_backward(gate)
            if gate.item() == 0:
                alpha = torch.cuda.FloatTensor(x.size(0)).uniform_(*alpha_range)
                alpha = alpha.view(alpha.size(0), 1, 1, 1).expand_as(x)
                return alpha * x
            else:
                return x
        else:
            return (1 - p_drop) * x

    @staticmethod
    def backward(ctx, grad_output):
        gate = ctx.saved_tensors[0]
        if gate.item() == 0:
            beta = torch.cuda.FloatTensor(grad_output.size(0)).uniform_(0, 1)
            beta = beta.view(beta.size(0), 1, 1, 1).expand_as(grad_output)
            beta = Variable(beta)
            return beta * grad_output, None, None, None
        else:
            return grad_output, None, None, None

class ShakeDrop(nn.Module):
    def __init__(self, p_drop=0.5, alpha_range=[-1, 1]):
        super(ShakeDrop, self).__init__()
        self.p_drop = p_drop
        self.alpha_range = alpha_range

    def forward(self, x):
        return ShakeDropFunction.apply(x, self.training, self.p_drop, self.alpha_range)


class ShakeBasicBlock(nn.Module):
    def __init__(self, in_ch, out_ch, stride=1, p_shakedrop=1.0):
        super(ShakeBasicBlock, self).__init__()
        self.downsampled = stride == 2
        self.branch = self._make_branch(in_ch, out_ch, stride=stride)
        self.shortcut = None if not self.downsampled else nn.AvgPool2d(2)
        self.shake_drop = ShakeDrop(p_shakedrop)

    def forward(self, x):
        h = self.branch(x)
        h = self.shake_drop(h)
        h0 = x if not self.downsampled else self.shortcut(x)
        pad_zero = Variable(torch.zeros(h0.size(0), h.size(1) - h0.size(1), h0.size(2), h0.size(3)).float()).cuda()
        h0 = torch.cat([h0, pad_zero], dim=1)
        return h + h0

    def _make_branch(self, in_ch, out_ch, stride=1):
        return nn.Sequential(
            nn.BatchNorm2d(in_ch),
            nn.Conv2d(in_ch, out_ch, 3, padding=1, stride=stride, bias=False),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_ch, out_ch, 3, padding=1, stride=1, bias=False),
            nn.BatchNorm2d(out_ch))

class ShakePyramidNet(nn.Module):
    def __init__(self, depth=110, alpha=270, label=100):
        super(ShakePyramidNet, self).__init__()
        in_ch = 16
        n_units = (depth - 2) // 6
        in_chs = [in_ch] + [in_ch + math.ceil((alpha / (3 * n_units)) * (i + 1)) for i in range(3 * n_units)]
        block = ShakeBasicBlock

        self.in_chs, self.u_idx = in_chs, 0
        self.ps_shakedrop = [1 - (1.0 - (0.5 / (3 * n_units)) * (i + 1)) for i in range(3 * n_units)]

        self.c_in = nn.Conv2d(3, in_chs[0], 3, padding=1)
        self.bn_in = nn.BatchNorm2d(in_chs[0])
        self.layer1 = self._make_layer(n_units, block, 1)
        self.layer2 = self._make_layer(n_units, block, 2)
        self.layer3 = self._make_layer(n_units, block, 2)
        self.bn_out = nn.BatchNorm2d(in_chs[-1])
        self.fc_out = nn.Linear(in_chs[-1], label)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.bias.data.zero_()

    def forward(self, x):
        h = self.bn_in(self.c_in(x))
        h = self.layer1(h)
        h = self.layer2(h)
        h = self.layer3(h)
        h = F.relu(self.bn_out(h))
        h = F.avg_pool2d(h, 8)
        h = h.view(h.size(0), -1)
        h = self.fc_out(h)
        return h

    def _make_layer(self, n_units, block, stride=1):
        layers = []
        for i in range(int(n_units)):
            layers.append(block(self.in_chs[self.u_idx], self.in_chs[self.u_idx+1], stride, self.ps_shakedrop[self.u_idx]))
            self.u_idx, stride = self.u_idx + 1, 1
        return nn.Sequential(*layers)

### accuracy function

In [None]:
def accuracy_topk(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()

    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
        res.append(correct_k.mul_(100.0 / batch_size)) 
    return res


superclass_mapping = {
    0: [4, 30, 55, 72, 95],   # aquatic mammals
    1: [1, 32, 67, 73, 91],   # fish
    2: [54, 62, 70, 82, 92],  # flowers
    3: [9, 10, 16, 28, 61],   # food containers
    4: [0, 51, 53, 57, 83],   # fruit and vegetables
    5: [22, 39, 40, 86, 87],  # household electrical devices
    6: [5, 20, 25, 84, 94],   # household furniture
    7: [6, 7, 14, 18, 24],    # insects
    8: [3, 42, 43, 88, 97],   # large carnivores
    9: [12, 17, 37, 68, 76],  # large man-made outdoor things
    10: [23, 33, 49, 60, 71], # large natural outdoor scenes
    11: [15, 19, 21, 31, 38], # large omnivores and herbivores
    12: [34, 63, 64, 66, 75], # medium-sized mammals
    13: [26, 45, 77, 79, 99], # non-insect invertebrates
    14: [2, 11, 35, 46, 98],  # people
    15: [27, 29, 44, 78, 93], # reptiles
    16: [36, 50, 65, 74, 80], # small mammals
    17: [47, 52, 56, 59, 96], # trees
    18: [8, 13, 48, 58, 90],  # vehicles 1
    19: [41, 69, 81, 85, 89], # vehicles 2
}

def get_superclass(label):
    for super_class, classes in superclass_mapping.items():
        if label in classes:
            return super_class
    return None


def super_class_accuracy(output, target):
    _, predicted = torch.max(output, 1)

    pred_superclass = torch.tensor([get_superclass(p.item()) for p in predicted], dtype=torch.long)
    target_superclass = torch.tensor([get_superclass(t.item()) for t in target], dtype=torch.long)

    correct = (pred_superclass == target_superclass).sum().item()
    total = target.size(0)

    accuracy = 100.0 * (correct / total)
    return accuracy

## Ensemble (soft voting)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)


model1_path = "your_path/model1_best_model.pth"
model2_path = "your_path/model2_best_model.pth"

model1 = WideResNet(BasicBlock, [4, 4, 4], widen_factor=10).to(device)
model2 = ShakePyramidNet().to(device)


model1.load_state_dict(torch.load(model1_path))
model2.load_state_dict(torch.load(model2_path))


model1.eval()
model2.eval()

def evaluate_ensemble(model1, model2, dataloader, criterion, device):
    total_loss = 0.0
    correct_top1 = 0
    correct_top5 = 0
    correct_superclass = 0
    total = 0

    with torch.no_grad():
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)

            outputs1 = model1(inputs)
            outputs2 = model2(inputs)
            
            outputs_ensemble = F.softmax((0.4 * outputs1 + 0.6 * outputs2) / 2, dim=1)

            loss = criterion(outputs_ensemble, targets)
            total_loss += loss.item() * inputs.size(0)

            # Top-1, Top-5 
            top1_acc, top5_acc = accuracy_topk(outputs_ensemble, targets, topk=(1, 5))
            correct_top1 += top1_acc.item() * inputs.size(0)
            correct_top5 += top5_acc.item() * inputs.size(0)

            # super class accuracy
            superclass_acc = super_class_accuracy(outputs_ensemble, targets)
            correct_superclass += superclass_acc * inputs.size(0)

            total += targets.size(0)


    avg_loss = total_loss / total
    top1_accuracy = correct_top1 / total
    top5_accuracy = correct_top5 / total
    superclass_accuracy = correct_superclass / total

    return avg_loss, top1_accuracy, top5_accuracy, superclass_accuracy


test_loss, test_top1_acc, test_top5_acc, test_superclass_acc = evaluate_ensemble(model1, model2, test_loader, criterion, device)

print(f'Test Top-1 Accuracy: {test_top1_acc:.2f}%')
print(f'Test Top-5 Accuracy: {test_top5_acc:.2f}%')
print(f'Test Super-Class Accuracy: {test_superclass_acc:.2f}%')