In [None]:
import torch
import torchvision
from torch import nn
import numpy as np
from tqdm import tqdm
from torchvision.datasets import CIFAR10
from torchvision.models import resnet
from torch.utils.data import DataLoader
from torchvision import transforms
from datetime import datetime
from google.colab import drive
import random
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
batch_size = 128
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(32),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])])



train_data = CIFAR10(root='data', train=True, transform=train_transform, download=True)


def get_indices(dataset, num_samples):
  indices = []
  for Class in range(10):
    for j in range(num_samples):
      x = np.random.randint(0, 50000)
      while dataset.targets[x] != Class:
        x = np.random.randint(0, 50000)
      indices.append(x)
  return indices



test_data = CIFAR10(root='data', train=False, transform=test_transform, download=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=16, pin_memory=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting data/cifar-10-python.tar.gz to data
Files already downloaded and verified


  cpuset_checked))


In [None]:
class ModelBase(nn.Module):
    """
    Common CIFAR ResNet recipe.
    Comparing with ImageNet ResNet recipe, it:
    (i) replaces conv1 with kernel=3, str=1
    (ii) removes pool1
    """
    def __init__(self, feature_dim=10, arch='resnet18', bn_splits=16):
        super(ModelBase, self).__init__()

        # use split batchnorm
        norm_layer = nn.BatchNorm2d
        # get specified resnet model
        resnet_arch = getattr(resnet, arch)
        net = resnet_arch(num_classes=feature_dim, norm_layer=norm_layer)

        # make changes to original resnet
        self.net = []
        for name, module in net.named_children():
            if name == 'conv1':
                module = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
            if isinstance(module, nn.MaxPool2d):
                continue
            if isinstance(module, nn.Linear):
                self.net.append(nn.Flatten(1))
            self.net.append(module)

        # build net
        self.net = nn.Sequential(*self.net)

    def forward(self, x):
        x = self.net(x)
        # note: not normalized here
        return x

def train_val(net, data_loader, train_optimizer):
    global lr
    schedule = [60, 80]
    criterion = nn.CrossEntropyLoss().cuda()
    is_train = train_optimizer is not None
    net.train() if is_train else net.eval()

    total_loss, total_correct_1, total_correct_3, total_num, data_bar = 0.0, 0.0, 0.0, 0, tqdm(data_loader, position=0, leave=True)
    with (torch.enable_grad() if is_train else torch.no_grad()):
        for data, target in data_bar:
            #print(target.size())
            #insert cropping here
            data, target = data.cuda(non_blocking=True), target.cuda(non_blocking=True)
            out = net(data)
            loss = criterion(out, target)

            if is_train:
                train_optimizer.zero_grad()
                loss.backward()
                train_optimizer.step()


            total_num += data.size(0)
            total_loss += loss.item() * data.size(0)
            prediction = torch.argsort(out, dim=-1, descending=True)
            total_correct_1 += torch.sum((prediction[:, 0:1] == target.unsqueeze(dim=-1)).any(dim=-1).float()).item()
            total_correct_3 += torch.sum((prediction[:, 0:3] == target.unsqueeze(dim=-1)).any(dim=-1).float()).item()
            
            data_bar.set_description('{} Epoch: [{}/{}] lr: {:.4f} Loss: {:.4f} ACC@1: {:.2f}% ACC@5: {:.2f}%'
                                     .format('Train' if is_train else 'Test', epoch, epochs, lr, total_loss / total_num,
                                             total_correct_1 / total_num * 100, total_correct_3 / total_num * 100))
        if is_train:  
          if schedule is not None:
            for milestone in schedule:
                lr *= 0.1 if epoch == milestone else 1.
            for param_group in train_optimizer.param_groups:
                param_group['lr'] = lr

        return total_correct_1/total_num * 100


In [None]:
train_acc = []
test_acc = []

print(f"Random Seed: {1}")
best_train = 0
best_test = 0
np.random.seed(1)

indices = get_indices(train_data, 1000)
sampler = torch.utils.data.SubsetRandomSampler(indices)

train_loader = DataLoader(train_data, batch_size=batch_size, sampler = sampler , num_workers=16, pin_memory=True)

resnet18 = ModelBase()
resnet18.cuda()
lr = 1e-3
optimizer = torch.optim.Adam(resnet18.parameters(), lr=lr, weight_decay = 1e-5)
epoch_start = 1
epochs = 100
for epoch in range(epoch_start, epochs+1):
  acc1 = train_val(resnet18, train_loader, optimizer)
  acc2 = train_val(resnet18, test_loader, None)
  if acc1 > best_train:
    best_train = acc1
  if acc2 > best_test:
    best_test = acc2
train_acc.append(acc1)
test_acc.append(acc2)

torch.save(resnet18.state_dict(), "/content/drive/My Drive/Models/Supervised/supervisedresnet18v1.1seed1.pth")
print(train_acc)
print(test_acc)
top10 = np.sort(train_acc)[::-1]
print("top10 trainacc",np.mean(top10[:10]))
top10 = np.sort(test_acc)[::-1]
print("top10 testacc",np.mean(top10[:10]))
print("best test",np.sort(test_acc)[::-1][0])
train_acc = np.array(train_acc)
test_acc = np.array(test_acc)
print(np.mean(train_acc))
print(np.mean(test_acc))
print(np.std(train_acc))
print(np.std(test_acc))


Random Seed: 1


  cpuset_checked))
Train Epoch: [1/100] lr: 0.0010 Loss: 1.9373 ACC@1: 28.75% ACC@5: 63.05%: 100%|██████████| 79/79 [00:10<00:00,  7.39it/s]
Test Epoch: [1/100] lr: 0.0010 Loss: 1.8579 ACC@1: 34.79% ACC@5: 67.37%: 100%|██████████| 79/79 [00:04<00:00, 18.16it/s]
Train Epoch: [2/100] lr: 0.0010 Loss: 1.7537 ACC@1: 35.57% ACC@5: 69.92%: 100%|██████████| 79/79 [00:10<00:00,  7.47it/s]
Test Epoch: [2/100] lr: 0.0010 Loss: 1.7108 ACC@1: 39.32% ACC@5: 73.18%: 100%|██████████| 79/79 [00:04<00:00, 17.67it/s]
Train Epoch: [3/100] lr: 0.0010 Loss: 1.6478 ACC@1: 40.16% ACC@5: 73.40%: 100%|██████████| 79/79 [00:10<00:00,  7.40it/s]
Test Epoch: [3/100] lr: 0.0010 Loss: 1.4719 ACC@1: 46.52% ACC@5: 79.87%: 100%|██████████| 79/79 [00:04<00:00, 17.41it/s]
Train Epoch: [4/100] lr: 0.0010 Loss: 1.5583 ACC@1: 43.23% ACC@5: 76.22%: 100%|██████████| 79/79 [00:11<00:00,  7.13it/s]
Test Epoch: [4/100] lr: 0.0010 Loss: 1.4088 ACC@1: 49.58% ACC@5: 80.95%: 100%|██████████| 79/79 [00:04<00:00, 16.26it/s]
Train Epo

[88.11]
[83.94]
top10 trainacc 88.11
top10 testacc 83.94
best test 83.94
88.11
83.94
0.0
0.0


In [None]:
import random
def rand_bbox(size, lam):
    W = size
    H = size
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)
    #print(bbx1, bby1, bbx2, bby2)
    return bbx1, bby1, bbx2, bby2

def train_val(net, data_loader, train_optimizer):
    global lr
    schedule = [60, 80]
    criterion = nn.CrossEntropyLoss().cuda()
    is_train = train_optimizer is not None
    net.train() if is_train else net.eval()
    sa = 0
    total_loss, total_correct_1, total_correct_3, total_num, data_bar = 0.0, 0.0, 0.0, 0, tqdm(data_loader, position=0, leave=True)
    with (torch.enable_grad() if is_train else torch.no_grad()):
        for data, target in data_bar:
            #print(target.size())

            #insert cropping here
            #print(data.shape)\
            if is_train:
              dupe= data.clone()
              #print(dupe.shape)
              for i in range(dupe.shape[0]):
                if random.random()<0.5:
                  continue
                lam=np.random.beta(1,1)
                x1,y1,x2,y2=rand_bbox(32,lam)
                #print(target[i].size())
                try:
                  dupe[i][:,x1:x2,y1:y2]=0
                except:
                  print(dupe[i].shape,i,x1,x2,y1,y2)
                  sa+=1
              data = dupe
            data, target = data.cuda(non_blocking=True), target.cuda(non_blocking=True)
            out = net(data)
            loss = criterion(out, target)

            if is_train:
                train_optimizer.zero_grad()
                loss.backward()
                train_optimizer.step()


            total_num += data.size(0)
            total_loss += loss.item() * data.size(0)
            prediction = torch.argsort(out, dim=-1, descending=True)
            total_correct_1 += torch.sum((prediction[:, 0:1] == target.unsqueeze(dim=-1)).any(dim=-1).float()).item()
            total_correct_3 += torch.sum((prediction[:, 0:3] == target.unsqueeze(dim=-1)).any(dim=-1).float()).item()
            
            data_bar.set_description('{} Epoch: [{}/{}] lr: {:.4f} Loss: {:.4f} ACC@1: {:.2f}% ACC@5: {:.2f}%'
                                     .format('Train' if is_train else 'Test', epoch, epochs, lr, total_loss / total_num,
                                             total_correct_1 / total_num * 100, total_correct_3 / total_num * 100))
        if is_train:  
          if schedule is not None:
            for milestone in schedule:
                lr *= 0.1 if epoch == milestone else 1.
            for param_group in train_optimizer.param_groups:
                param_group['lr'] = lr

        return total_correct_1/total_num * 100

train_acc = []
test_acc = []
for i in range(1):
  print(f"Random Seed: {i}")
  best_train = 0
  best_test = 0
  np.random.seed(i)

  indices = get_indices(train_data, 1000)
  sampler = torch.utils.data.SubsetRandomSampler(indices)

  train_loader = DataLoader(train_data, batch_size=batch_size, sampler = sampler , num_workers=16, pin_memory=True)

  resnet18 = ModelBase()
  resnet18.cuda()
  lr = 1e-3
  optimizer = torch.optim.Adam(resnet18.parameters(), lr=lr, weight_decay = 1e-5)
  epoch_start = 1
  epochs = 100
  for epoch in range(epoch_start, epochs+1):
    acc1 = train_val(resnet18, train_loader, optimizer)
    acc2 = train_val(resnet18, test_loader, None)
    if acc1 > best_train:
      best_train = acc1
    if acc2 > best_test:
      best_test = acc2
  train_acc.append(acc1)
  test_acc.append(acc2)
  
  torch.save(resnet18.state_dict(), "/content/drive/My Drive/Models/Supervised/supervisedcutoutresnet18v1.1.pth")
print(train_acc)
print(test_acc)
import numpy as np
train_acc = np.array(train_acc)
test_acc = np.array(test_acc)
print(np.mean(train_acc))
print(np.mean(test_acc))
print(np.std(train_acc))
print(np.std(test_acc))


Random Seed: 0


  cpuset_checked))
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  import sys
Train Epoch: [1/100] lr: 0.0010 Loss: 1.9736 ACC@1: 26.90% ACC@5: 60.18%: 100%|██████████| 79/79 [00:12<00:00,  6.35it/s]
Test Epoch: [1/100] lr: 0.0010 Loss: 1.7584 ACC@1: 37.16% ACC@5: 70.44%: 100%|██████████| 79/79 [00:04<00:00, 16.19it/s]
Train Epoch: [2/100] lr: 0.0010 Loss: 1.8042 ACC@1: 33.54% ACC@5: 68.02%: 100%|██████████| 79/79 [00:12<00:00,  6.09it/s]
Test Epoch: [2/100] lr: 0.0010 Loss: 1.5462 ACC@1: 43.64% ACC@5: 76.55%: 100%|██████████| 79/79 [00:04<00:00, 16.04it/s]
Train Epoch: [3/100] lr: 0.0010 Loss: 1.7363 ACC@1: 36.45% ACC@5: 70.87%: 100%|██████████| 79/79 [00:12<00:00,  6.29it/s]
Test Epoch: [3/100] lr: 0.0010 Loss: 1.6230 ACC@1: 39.81% ACC@5: 78.08%: 100%|██████████| 79/79 [00:04<00:00, 16.

[82.22]
[82.91]
82.22
82.91
0.0
0.0
[82.22]
[82.91]
82.22
82.91
0.0
0.0






In [None]:
  class ModelBase(nn.Module):
    """
    Common CIFAR ResNet recipe.
    Comparing with ImageNet ResNet recipe, it:
    (i) replaces conv1 with kernel=3, str=1
    (ii) removes pool1
    """
    def __init__(self, feature_dim=10, arch='resnet18', bn_splits=16):
        super(ModelBase, self).__init__()

        # use split batchnorm
        norm_layer = nn.BatchNorm2d
        # get specified resnet model
        resnet_arch = getattr(resnet, arch)
        net = resnet_arch(num_classes=feature_dim, norm_layer=norm_layer)

        # make changes to original resnet
        self.net = []
        for name, module in net.named_children():
            if name == 'conv1':
                module = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
            if isinstance(module, nn.MaxPool2d):
                continue
            if isinstance(module, nn.Linear):
                self.net.append(nn.Flatten(1))
            self.net.append(module)

        # build net
        self.net = nn.Sequential(*self.net)

    def forward(self, x):
        x = self.net(x)
        x = torch.flatten(x,start_dim=1)
        # note: not normalized here
        return x
class classifier(nn.Module):

    def __init__(self, feature_dim=10, arch='resnet18', bn_splits=16):
        super(classifier, self).__init__()

        self.net = nn.Linear(1024,10)

    def forward(self, x):
        x = self.net(x)
        # note: not normalized here
        return x
supervised = ModelBase()
supervised.load_state_dict(torch.load("/content/drive/My Drive/Models/Supervised/supervisedresnet18v1.1.pth"))
cutout = ModelBase()
cutout.load_state_dict(torch.load("/content/drive/My Drive/Models/Supervised/supervisedresnet18v1.1seed1.pth"))
supervised.net =supervised.net[:-1]
cutout.net =cutout.net[:-1]
for i in supervised.parameters():
  i.requires_grad = False
for i in cutout.parameters():
  i.requires_grad = False
supervised=supervised.cuda()
cutout = cutout.cuda()
finallayer = classifier()

In [None]:



def train_val(base1,base2,final, data_loader, train_optimizer):
    global lr
    schedule = [60, 80]
    criterion = nn.CrossEntropyLoss().cuda()
    is_train = train_optimizer is not None
    final.train() if is_train else final.eval()
    sa = 0
    total_loss, total_correct_1, total_correct_3, total_num, data_bar = 0.0, 0.0, 0.0, 0, tqdm(data_loader, position=0, leave=True)
    with (torch.enable_grad() if is_train else torch.no_grad()):
        for data, target in data_bar:
            data = data.cuda()
            output1 = base1(data)
            output2 = base2(data)
            #print(output1.shape,output2.shape)
            data = torch.cat((output1,output2),1)
            #print(data.shape)

            data, target = data.cuda(non_blocking=True), target.cuda(non_blocking=True)
            out = final(data)
            loss = criterion(out, target)

            if is_train:
                train_optimizer.zero_grad()
                loss.backward()
                train_optimizer.step()


            total_num += data.size(0)
            total_loss += loss.item() * data.size(0)
            prediction = torch.argsort(out, dim=-1, descending=True)
            total_correct_1 += torch.sum((prediction[:, 0:1] == target.unsqueeze(dim=-1)).any(dim=-1).float()).item()
            total_correct_3 += torch.sum((prediction[:, 0:3] == target.unsqueeze(dim=-1)).any(dim=-1).float()).item()
            
            data_bar.set_description('{} Epoch: [{}/{}] lr: {:.4f} Loss: {:.4f} ACC@1: {:.2f}% ACC@5: {:.2f}%'
                                     .format('Train' if is_train else 'Test', epoch, epochs, lr, total_loss / total_num,
                                             total_correct_1 / total_num * 100, total_correct_3 / total_num * 100))
        if is_train:  
          if schedule is not None:
            for milestone in schedule:
                lr *= 0.1 if epoch == milestone else 1.
            for param_group in train_optimizer.param_groups:
                param_group['lr'] = lr

        return total_correct_1/total_num * 100

train_acc = []
test_acc = []
for i in range(1):
  print(f"Random Seed: {i}")
  best_train = 0
  best_test = 0
  np.random.seed(i)

  indices = get_indices(train_data, 1000)
  sampler = torch.utils.data.SubsetRandomSampler(indices)

  train_loader = DataLoader(train_data, batch_size=batch_size, sampler = sampler , num_workers=16, pin_memory=True)
  finallayer = classifier()
  finallayer=finallayer.cuda()
  lr = 1e-4
  optimizer = torch.optim.Adam(finallayer.parameters(), lr=lr, weight_decay = 1e-5)
  epoch_start = 1
  epochs = 100
  for epoch in range(epoch_start, epochs+1):
    acc1 = train_val(supervised,cutout,finallayer, train_loader, optimizer)
    acc2 = train_val(supervised,cutout,finallayer, test_loader, None)
    if acc1 > best_train:
      best_train = acc1
    if acc2 > best_test:
      best_test = acc2
  train_acc.append(acc1)
  test_acc.append(acc2)
  
  torch.save(finallayer.state_dict(), "/content/drive/My Drive/Models/Supervised/finallayerconcat1024.pth")
print(train_acc)
print(test_acc)
top10 = np.sort(train_acc)[::-1]
print("top10 trainacc",np.mean(top10[:10]))
top10 = np.sort(test_acc)[::-1]
print("top10 testacc",np.mean(top10[:10]))
print("best test",np.sort(test_acc)[::-1][0])
train_acc = np.array(train_acc)
test_acc = np.array(test_acc)
print(np.mean(train_acc))
print(np.mean(test_acc))
print(np.std(train_acc))
print(np.std(test_acc))


Random Seed: 0


Train Epoch: [1/100] lr: 0.0001 Loss: 1.5442 ACC@1: 58.76% ACC@5: 82.39%: 100%|██████████| 79/79 [00:07<00:00, 10.06it/s]
Test Epoch: [1/100] lr: 0.0001 Loss: 0.9724 ACC@1: 82.16% ACC@5: 96.20%: 100%|██████████| 79/79 [00:06<00:00, 11.42it/s]
Train Epoch: [2/100] lr: 0.0001 Loss: 0.8338 ACC@1: 84.46% ACC@5: 96.56%: 100%|██████████| 79/79 [00:07<00:00, 10.16it/s]
Test Epoch: [2/100] lr: 0.0001 Loss: 0.6760 ACC@1: 83.83% ACC@5: 96.69%: 100%|██████████| 79/79 [00:06<00:00, 11.30it/s]
Train Epoch: [3/100] lr: 0.0001 Loss: 0.6555 ACC@1: 85.13% ACC@5: 96.65%: 100%|██████████| 79/79 [00:08<00:00,  9.49it/s]
Test Epoch: [3/100] lr: 0.0001 Loss: 0.5749 ACC@1: 84.15% ACC@5: 96.94%: 100%|██████████| 79/79 [00:07<00:00, 10.15it/s]
Train Epoch: [4/100] lr: 0.0001 Loss: 0.5747 ACC@1: 85.34% ACC@5: 96.60%: 100%|██████████| 79/79 [00:07<00:00,  9.88it/s]
Test Epoch: [4/100] lr: 0.0001 Loss: 0.5265 ACC@1: 84.29% ACC@5: 97.02%: 100%|██████████| 79/79 [00:07<00:00, 11.24it/s]
Train Epoch: [5/100] lr: 0.0

[88.23]
[83.49]
top10 trainacc 88.23
top10 testacc 83.49
best test 83.49
88.23
83.49
0.0
0.0


In [None]:
!pip install torchcam

In [None]:

import matplotlib.pyplot as plt
from torchcam.utils import overlay_mask
supervised = ModelBase()
supervised.load_state_dict(torch.load("/content/drive/My Drive/Models/Supervised/supervisedresnet18v1.1.pth"))

result = overlay_mask(to_pil_image(img), to_pil_image(activation_map[0].squeeze(0), mode='F'), alpha=0.5)
# Display it
plt.imshow(result); plt.axis('off'); plt.tight_layout(); plt.show()