In [None]:
import pandas as pd
import glob
import re

results = glob.glob('test_run/vgg16aw_ucb_loss/**/progress.csv')
tems = []
accs = []


for result in results:
    f = pd.read_csv(result)

    t = result.split('/')[-2]
    tem = re.search(r't0=\S+t1=\S+t2=\S+t3=\S+t4=\S{5}', t).group()
    tems.append(tem)
    accs.append(max(f['accuracy']))

df = pd.DataFrame({'temperature': tems, 'accuracy': accs})

df = df.sort_values(by=['accuracy'], ascending=False)
df


In [1]:
import torch
import torch.nn as nn
from models.awpooling import *
from torchvision.datasets import ImageFolder
from torchvision.transforms import transforms

from models.vggaw import VGG11AW, VGG11AWT

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
ds = ImageFolder(root='/home/larry/Datasets/tiny-imagenet-200/train', transform=transforms.Compose([
    transforms.ToTensor(),
    normalize
]))
loader = torch.utils.data.DataLoader(ds, shuffle=True, batch_size=128, num_workers=2)

In [2]:
class Net(nn.Module):
    def __init__(self, num_class=200):
        super(Net, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
        )
        self.aw1 = AWPool2d_()

        self.conv2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(128),
        )
        self.aw2 = AWPool2d_()
    
        self.globalavg = nn.AdaptiveAvgPool2d(1)
        self.classifier = nn.Sequential(
            nn.Linear(64, 256),
            nn.ReLU(),
            # nn.Dropout(),
            nn.Linear(256, 256),
            nn.ReLU(),
            # nn.Dropout(),
            nn.Linear(256, num_class),
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.aw1(x)
        x = self.globalavg(x)
        x = torch.flatten(x, start_dim=1)
        x = self.classifier(x)
        return x

    def _initialize_weights(self) -> None:
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
    def set_t(self, t):
        self.aw1.t = t


In [11]:
h = 2e-3

In [3]:
model = Net()
origin_t = model.aw1.t.item()

In [4]:
x, y = next(iter(loader))
# x, y = next(iter(loader))
criterion = nn.CrossEntropyLoss()
# model = Net()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
x = x.to(device)
y = y.to(device)
origin_t = model.aw1.t.item()

In [5]:
logits = model(x)
loss = criterion(logits, y)
loss.backward()
print(model.aw1.t.grad)

tensor([0.0037], device='cuda:0')


In [None]:
model.aw1.t.requires_grad = False
logits = model(x)
base = criterion(logits, y)

In [12]:
# compute forward loss
model.aw1.t.copy_(torch.tensor(origin_t + h))
print(model.aw1.t.item())
logits = model(x)
forward = criterion(logits, y)
model.aw1.t.copy_(torch.tensor(origin_t - h))
print(model.aw1.t.item())
logits = model(x)
backward = criterion(logits, y)

model.aw1.t.copy_(torch.tensor(origin_t))
print(f'base loss: {base.item()}\nforward loss: {forward.item()}\nbackward loss: {backward.item()}\n'\
      f'forward difference: {(forward - base) / h}\n'\
      f'backward difference: {(base - backward) / h}\n'\
      f'central difference: {(forward - backward) / (2 * h)}')


1.0019999742507935
0.9980000257492065
base loss: 5.295560836791992
forward loss: 5.295570373535156
backward loss: 5.29556131362915
forward difference: 0.004768371116369963
backward difference: -0.00023841856454964727
central difference: 0.0022649762686342


In [13]:
def compute_numerical(model, func, x, h=1e-4):
    logits = func(x)
    forward = func(x + h)
    backward = func(x - h)

    print(f'forward difference {(forward - logits) / h}\nbackward difference: {(logits - backward) / h}\ncentral difference: {(forward-backward)/ (2* h)}')

In [None]:
def softmax_t(x, t=1):
    return np.exp(x / t) / np.sum(np.exp(x / t))

In [2]:
def softmax_pool(x, t):
    return x.dot(softmax_t(x, t))

In [134]:
a = torch.tensor(0.05)
b = a.item()
a.copy_(torch.tensor(3))
print(b, a)


0.05000000074505806 tensor(3.)
