In [8]:
from glob import glob
import os
import pandas as pd
import json
path = 'HPO/tiny-imagenet/vgg16aw'
csv_path = glob(os.path.join(path, '**', 'progress.csv'))
json_path = glob(os.path.join(path, '**', 'params.json'))
tems = []
accs = []

for c, j in zip(csv_path, json_path):
    trial_name = ""
    csv_f = pd.read_csv(c)
    accs.append(max(csv_f['accuracy']))
    f = open(j)
    json_f = json.load(f)
    
    params = list(json_f.keys())[-5:]
    values = list(json_f.values())[-5:]
    for t, v in zip(params, values):
        trial_name += f'{t}={v},'
    tems.append(trial_name)
        
df = pd.DataFrame({'tems': tems, 'accs': accs})
df = df.sort_values(by=['accs'], ascending=False)
df.to_csv(os.path.join(path, 'best.csv'), index=False)
    
    

In [52]:
import torch
import torch.nn as nn
from models.awpooling import *
from torchvision.datasets import ImageFolder
from torchvision.transforms import transforms
from torch.utils.tensorboard import SummaryWriter

from models.vggaw import VGG11AW, VGG11AWT
import os

h = 1e-4
writer = SummaryWriter(log_dir=os.path.join('Gradient_analysis', f'delta={h}'))
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
ds = ImageFolder(root='/home/larry/Datasets/tiny-imagenet-200/train', transform=transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    normalize
]))
loader = torch.utils.data.DataLoader(ds, shuffle=True, batch_size=128, num_workers=2)

In [42]:
class Net(nn.Module):
    def __init__(self, num_class=200):
        super(Net, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
        )
        self.aw1 = AWPool2d_()

        # self.conv2 = nn.Sequential(
        #     nn.Conv2d(64, 128, kernel_size=3, padding=1),
        #     nn.ReLU(),
        #     nn.BatchNorm2d(128),
        #     nn.Conv2d(128, 128, kernel_size=3, padding=1),
        #     nn.ReLU(),
        #     nn.BatchNorm2d(128),
        # )
        # self.aw2 = AWPool2d_()
    
        self.globalavg = nn.AdaptiveAvgPool2d(1)
        self.classifier = nn.Sequential(
            nn.Linear(64, 256),
            nn.ReLU(),
            # nn.Dropout(),
            nn.Linear(256, 256),
            nn.ReLU(),
            # nn.Dropout(),
            nn.Linear(256, num_class),
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.aw1(x)
        x = self.globalavg(x)
        x = torch.flatten(x, start_dim=1)
        x = self.classifier(x)
        return x

    def _initialize_weights(self) -> None:
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
    def set_t(self, t):
        self.aw1.t = t


In [43]:
model = Net()
params = [{'params': p, 'lr': 0.1} for n, p in model.named_parameters() if 'aw' not in n]
optimizer = torch.optim.SGD(params)
criterion = nn.CrossEntropyLoss()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

Net(
  (conv1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (aw1): AWPool2d_()
  (globalavg): AdaptiveAvgPool2d(output_size=1)
  (classifier): Sequential(
    (0): Linear(in_features=64, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=256, bias=True)
    (3): ReLU()
    (4): Linear(in_features=256, out_features=200, bias=True)
  )
)

In [44]:
for i, data in enumerate(loader):
    model.aw1.t.requires_grad = True
    images, label = data
    images = images.to(device)
    label = label.to(device)

    # Autograd
    logits = model(images)
    base_loss = criterion(logits, label)
    base_loss.backward()
    writer.add_scalar('Automatic gardient', model.aw1.t.grad, i)

    # caculate numercial difference
    with torch.no_grad():
        model.aw1.t.requires_grad = False
        origin_t = model.aw1.t.item()

        # forward
        model.aw1.t.copy_(torch.tensor(origin_t + h))
        logits = model(images)
        forward_loss = criterion(logits, label)

        # backward
        model.aw1.t.copy_(torch.tensor(origin_t - h))
        logits = model(images)
        backward_loss = criterion(logits, label)

        writer.add_scalar('Forward difference', (forward_loss - base_loss) / h, i)
        writer.add_scalar('Backward difference', (base_loss - backward_loss) / h, i)
        writer.add_scalar('Central difference', (forward_loss - backward_loss) / (2 * h), i)

    model.aw1.t.copy_(torch.tensor(origin_t))

    optimizer.step()
    optimizer.zero_grad()
    model.aw1.t.grad.zero_()
    

In [33]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [34]:
%tensorboard --logdir Gradient_analysis

In [5]:
logits = model(x)
loss = criterion(logits, y)
loss.backward()
print(model.aw1.t.grad)

tensor([0.0037], device='cuda:0')


In [None]:
model.aw1.t.requires_grad = False
logits = model(x)
base = criterion(logits, y)

In [12]:
# compute forward loss
model.aw1.t.copy_(torch.tensor(origin_t + h))
print(model.aw1.t.item())
logits = model(x)
forward = criterion(logits, y)
model.aw1.t.copy_(torch.tensor(origin_t - h))
print(model.aw1.t.item())
logits = model(x)
backward = criterion(logits, y)

model.aw1.t.copy_(torch.tensor(origin_t))
print(f'base loss: {base.item()}\nforward loss: {forward.item()}\nbackward loss: {backward.item()}\n'\
      f'forward difference: {(forward - base) / h}\n'\
      f'backward difference: {(base - backward) / h}\n'\
      f'central difference: {(forward - backward) / (2 * h)}')


1.0019999742507935
0.9980000257492065
base loss: 5.295560836791992
forward loss: 5.295570373535156
backward loss: 5.29556131362915
forward difference: 0.004768371116369963
backward difference: -0.00023841856454964727
central difference: 0.0022649762686342


In [13]:
def compute_numerical(model, func, x, h=1e-4):
    logits = func(x)
    forward = func(x + h)
    backward = func(x - h)

    print(f'forward difference {(forward - logits) / h}\nbackward difference: {(logits - backward) / h}\ncentral difference: {(forward-backward)/ (2* h)}')

In [None]:
def softmax_t(x, t=1):
    return np.exp(x / t) / np.sum(np.exp(x / t))

In [2]:
def softmax_pool(x, t):
    return x.dot(softmax_t(x, t))

In [134]:
a = torch.tensor(0.05)
b = a.item()
a.copy_(torch.tensor(3))
print(b, a)


0.05000000074505806 tensor(3.)
