# Segmentation using ResNet18

In [None]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.models import resnet18
from torchvision.datasets import CIFAR10
from tqdm import tqdm_notebook as tqdm
from torchvision.utils import save_image, make_grid
from matplotlib import pyplot as plt
from matplotlib.colors import hsv_to_rgb
from matplotlib.image import BboxImage
from matplotlib.transforms import Bbox, TransformedBbox
import numpy as np
from IPython import display
import requests
from io import BytesIO
from PIL import Image
from PIL import Image, ImageSequence
from IPython.display import HTML
import warnings
from matplotlib import rc
import gc
import matplotlib
import os
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
gc.enable()
plt.ioff()

In [None]:
num_classes = 10
resnet = resnet18(pretrained=True)
resnet.conv1 = nn.Conv2d(3,64,3,stride=1,padding=1)
resnet_ = list(resnet.children())[:-2]
resnet_[3] = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)
classifier = nn.Conv2d(512,num_classes,1)
torch.nn.init.kaiming_normal_(classifier.weight)
resnet_.append(classifier)
resnet_.append(nn.Upsample(size=32, mode='bilinear', align_corners=False))
tiny_resnet = nn.Sequential(*resnet_)

In [None]:
def attention(x):
    return torch.sigmoid(torch.logsumexp(x,1, keepdim=True))

In [None]:
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = CIFAR10(root='.', train=True, download=True, transform=transform_train)
train_iter = DataLoader(trainset, batch_size=128, shuffle=True, num_workers=16, pin_memory=True, drop_last=True)

testset = CIFAR10(root='.', train=False, download=True, transform=transform_test)
test_iter = DataLoader(testset, batch_size=100, shuffle=False, num_workers=16, pin_memory=True)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [None]:
mpath = "/content/drive/MyDrive/segmentCutMix/segment.pth"

model = nn.DataParallel(tiny_resnet).cuda()
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.05, momentum=0.9, weight_decay=1e-4)
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,78,eta_min=0.001)

In [None]:
num_epochs = 33

assert(os.path.isfile(mpath))
if(os.path.isfile(mpath)): 
  saved_state = torch.load(mpath)
  s_epoch = int(saved_state['epoch'])
  model.load_state_dict(saved_state['state'])
  optimizer.load_state_dict(saved_state['optim'])
  
else: 
  s_epoch = 0

In [None]:
s_epoch

In [None]:
losses = []
acces = []
v_losses = []
v_acces = []
for epoch in tqdm(range(s_epoch+1, num_epochs)):
    epoch_loss = 0.0
    acc = 0.0
    var = 0.0
    model.train()
    train_pbar = train_iter
    for i, (x, _label) in enumerate(train_pbar):
        x = x.cuda()
        _label = _label.cuda()
        label = F.one_hot(_label).float()
        seg_out = model(x)
        
        attn = attention(seg_out)
        # Smooth Max Aggregation
        logit = torch.log(torch.exp(seg_out*0.5).mean((-2,-1)))*2
        loss = criterion(logit, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        epoch_loss += loss.item()
        acc += (logit.argmax(-1)==_label).sum()
        #train_pbar.set_description('Accuracy: {:.3f}%'.format(100*(logit.argmax(-1)==_label).float().mean()))
        
    avg_loss = epoch_loss / (i + 1)
    losses.append(avg_loss)
    avg_acc = acc.cpu().detach().numpy() / (len(trainset))
    acces.append(avg_acc)
    model.eval()
    epoch_loss = 0.0
    acc = 0.0
    num_seen = 0

    state = {
        "state": model.state_dict(),
        "epoch": epoch, 
        "optim": optimizer.state_dict()
    }
    torch.save(state, "/content/drive/MyDrive/segmentCutMix/segment.pth")
    
    test_pbar = tqdm(test_iter)
    for i, (x, _label) in enumerate(test_pbar):
        x = x.cuda()
        _label = _label.cuda()
        label = F.one_hot(_label).float()
        seg_out = model(x)
        attn = attention(seg_out)
        logit = torch.log(torch.exp(seg_out*0.5).mean((-2,-1)))*2
        loss = criterion(logit, label)
        epoch_loss += loss.item()
        acc += (logit.argmax(-1)==_label).sum()
        num_seen += label.size(0)
        test_pbar.set_description('Accuracy: {:.3f}%'.format(100*(acc.float()/num_seen)))
    
    avg_loss_val = epoch_loss / (i + 1)
    v_losses.append(avg_loss_val)
    avg_acc_val = acc.cpu().detach().numpy() / (len(testset))
    v_acces.append(avg_acc_val)
    plt.close('all')

    conf = torch.max(nn.functional.softmax(seg_out, dim=1), dim=1)[0]
    hue = (torch.argmax(seg_out, dim=1).float() + 0.5)/10
    x -= x.min()
    x /= x.max()
    gs_im = x.mean(1)
    gs_mean = gs_im.mean()
    gs_min = gs_im.min()
    gs_max = torch.max((gs_im-gs_min))
    gs_im = (gs_im - gs_min)/gs_max
    hsv_im = torch.stack((hue.float(), attn.squeeze().float(), gs_im.float()), -1)
    im = hsv_to_rgb(hsv_im.cpu().detach().numpy())
    ex = make_grid(torch.tensor(im).permute(0,3,1,2), normalize=True, nrow=25)
    attns = make_grid(attn, normalize=False, nrow=25)
    attns = attns.cpu().detach()
    inputs = make_grid(x, normalize=True, nrow=25).cpu().detach()
    display.clear_output(wait=True)
    plt.figure(figsize=(20,8))
    plt.imshow(np.concatenate((inputs.numpy().transpose(1,2,0),ex.numpy().transpose(1,2,0), attns.numpy().transpose(1,2,0)), axis=0))
    #plt.xticks(np.linspace(18,324,10), classes)
    #plt.xticks(fontsize=20) 
    plt.yticks([])
    plt.title('CIFAR10 Epoch:{:02d}, Train:{:.3f}, Test:{:.3f}'.format(epoch, avg_acc, avg_acc_val), fontsize=20)
    display.display(plt.gcf())
    fig, ax = plt.subplots(1,2, figsize=(20,8))
    ax[0].set_title('Crossentropy')
    ax[0].plot(losses, label='Train')
    ax[0].plot(v_losses, label='CIFAR10 Test')
    ax[0].legend()
    ax[1].set_title('Accuracy')
    ax[1].plot(acces, label='Train')
    ax[1].plot(v_acces, label='CIFAR10 Test')
    ax[1].legend()
    display.display(plt.gcf())

In [None]:
epoch_loss = 0.0
acc = 0.0
num_seen = 0

for i, (x, _label) in enumerate(test_iter):
        x = x.cuda()
        _label = _label.cuda()
        label = F.one_hot(_label).float()
        seg_out = model(x)
        attn = attention(seg_out)
        logit = torch.log(torch.exp(seg_out*0.5).mean((-2,-1)))*2
        loss = criterion(logit, label)
        epoch_loss += loss.item()
        acc += (logit.argmax(-1)==_label).sum()
        num_seen += label.size(0)
print(100*(acc.float()/num_seen))

In [None]:
images, out = next(iter(train_iter))
img = images[:2]
img = img * 0.2023 + 0.48

In [None]:
preds = model(img)
attn = attention(preds)
attn = torch.cat((attn, attn, attn), dim=1)
attn[attn < 0.3] = 0.0
attn[attn >= 0.3] = 1.0

In [None]:
img2 = img.cuda() * attn

In [None]:
mixed = 0.8 * img[0].cuda() + 0.2* img2[1]

In [None]:
plt.imshow(mixed.detach().cpu().numpy().transpose(1,2,0))

In [None]:
plt.imshow(img2[0].detach().cpu().numpy().transpose(1,2,0))

In [None]:
plt.imshow(img[1].detach().cpu().numpy().transpose(1,2,0))

In [None]:
plt.imshow(img2[1].detach().cpu().numpy().transpose(1,2,0))

In [None]:
plt.imshow(attn[0].detach().cpu().numpy().transpose(1,2,0).squeeze())

In [None]:
plt.imshow(attn[1].detach().cpu().numpy().transpose(1,2,0).squeeze())

In [None]:
x, y
attn (y)

attn[attn <0.3] =  1
attn[attn >= 0.3] = 0 

attn = attn.bool() 

y[attn] =  0 

mixed = lmbda *x + (1-lmbda)*y[attn]  

In [None]:
for i, (x, _label) in enumerate(train_pbar):
        x, label = next(iter(train_pbar))
        x = x.cuda()
        _label = _label.cuda()
        label = F.one_hot(_label).float()
        seg_out = model(x)
        attns = attention(seg_out)
        
        attns = attns[attns > 0.3] == 1
        attns = attns[attns > 0.3] == 1

In [None]:
make_grid(attn, normalize=False, nrow=25)

In [None]:
plt.imshow(x[66].detach().cpu().numpy().transpose(1,2,0)*0.2023 + 0.48)
plt.show()

In [None]:
plt.imshow(attns[66].detach().cpu().numpy().transpose(1,2,0).squeeze())

In [None]:
print(torch.max(attns))

In [None]:
print(torch.min(attns))

# Segmentation using ResNet50

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.models import resnet50
from torchvision.datasets import CIFAR10
from tqdm import tqdm_notebook as tqdm
from torchvision.utils import save_image, make_grid
from matplotlib import pyplot as plt
from matplotlib.colors import hsv_to_rgb
from matplotlib.image import BboxImage
from matplotlib.transforms import Bbox, TransformedBbox
import numpy as np
from IPython import display
import requests
from io import BytesIO
from PIL import Image
from PIL import Image, ImageSequence
from IPython.display import HTML
import warnings
from matplotlib import rc
import gc
import matplotlib
import os
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
gc.enable()
plt.ioff()

In [2]:
torch.set_default_tensor_type('torch.cuda.HalfTensor')

In [3]:
torch.cuda.is_available() 

True

In [4]:
num_classes = 10
resnet = resnet50(pretrained=True)
resnet.conv1 = nn.Conv2d(3,64,3,stride=1,padding=1)
resnet_ = list(resnet.children())[:-2]
resnet_[3] = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)
classifier = nn.Conv2d(2048, num_classes,1)
torch.nn.init.kaiming_normal_(classifier.weight)
resnet_.append(classifier)
resnet_.append(nn.Upsample(size=32, mode='bilinear', align_corners=False))
tiny_resnet = nn.Sequential(*resnet_)

In [5]:
def attention(x):
    return torch.sigmoid(torch.logsumexp(x,1, keepdim=True))

In [6]:
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = CIFAR10(root='.', train=True, download=True, transform=transform_train)
train_iter = DataLoader(trainset, batch_size=256, shuffle=True, drop_last=True)

testset = CIFAR10(root='.', train=False, download=True, transform=transform_test)
test_iter = DataLoader(testset, batch_size=64, shuffle=False)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [7]:
mpath = "../data/segment_resnet50.pth"

model = nn.DataParallel(tiny_resnet).cuda()
criterion = nn.CrossEntropyLoss() 
optimizer = torch.optim.SGD(model.parameters(), lr = 0.05, momentum=0.9, weight_decay=1e-4)
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,78,eta_min=0.001)

In [8]:
num_epochs = 15

# assert(os.path.isfile(mpath))
if(os.path.isfile(mpath)): 
  saved_state = torch.load(mpath)
  s_epoch = int(saved_state['epoch'])
  model.load_state_dict(saved_state['state'])
  optimizer.load_state_dict(saved_state['optim'])
  
else: 
  s_epoch = 0

In [9]:
s_epoch

11

In [10]:
!nvidia-smi

Mon Mar  8 22:56:53 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P0    39W / 300W |   1227MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

In [12]:
losses = []
acces = []
v_losses = []
v_acces = []
for epoch in tqdm(range(s_epoch+1, num_epochs)):
    epoch_loss = 0.0
    acc = 0.0
    var = 0.0
    model.train()
    train_pbar = train_iter
    for i, (x, label) in enumerate(train_pbar):
        
        x = x.type(torch.HalfTensor).cuda()
        label = label.cuda()
        seg_out = model(x)
        
        attn = attention(seg_out)
        # Smooth Max Aggregation
        logit = torch.log(torch.exp(seg_out*0.5).mean((-2,-1)))*2
        loss = criterion(logit, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        epoch_loss += loss.item()
        acc += (logit.argmax(-1)==label).sum()
        #train_pbar.set_description('Accuracy: {:.3f}%'.format(100*(logit.argmax(-1)==_label).float().mean()))
        
    avg_loss = epoch_loss / (i + 1)
    losses.append(avg_loss)
    avg_acc = acc.cpu().detach().numpy() / (len(trainset))
    acces.append(avg_acc)
    model.eval()
    epoch_loss = 0.0
    acc = 0.0
    num_seen = 0

    state = {
        "state": model.state_dict(),
        "epoch": epoch, 
        "optim": optimizer.state_dict()
    }
    torch.save(state, mpath)
    
    test_pbar = tqdm(test_iter)
    with torch.no_grad(): 
        for i, (x, label) in enumerate(test_pbar):
            x = x.type(torch.HalfTensor).cuda()
            label = label.cuda()
            seg_out = model(x)
            attn = attention(seg_out)
            logit = torch.log(torch.exp(seg_out*0.5).mean((-2,-1)))*2
            if label.shape != logit.shape:
                continue 
            loss = criterion(logit, label)
            epoch_loss += loss.item()
            acc += (logit.argmax(-1)==label).sum()
            num_seen += label.size(0)
            test_pbar.set_description('Accuracy: {:.3f}%'.format(100*(acc.float()/num_seen)))

    avg_loss_val = epoch_loss / (i + 1)
    v_losses.append(avg_loss_val)
    avg_acc_val = acc.cpu().detach().numpy() / (len(testset))
    v_acces.append(avg_acc_val)
    plt.close('all')

    conf = torch.max(nn.functional.softmax(seg_out, dim=1), dim=1)[0]
    hue = (torch.argmax(seg_out, dim=1).float() + 0.5)/10
    x -= x.min()
    x /= x.max()
    gs_im = x.mean(1)
    gs_mean = gs_im.mean()
    gs_min = gs_im.min()
    gs_max = torch.max((gs_im-gs_min))
    gs_im = (gs_im - gs_min)/gs_max
    hsv_im = torch.stack((hue.float(), attn.squeeze().float(), gs_im.float()), -1)
    im = hsv_to_rgb(hsv_im.cpu().detach().numpy())
    ex = make_grid(torch.tensor(im).permute(0,3,1,2), normalize=True, nrow=25)
    attns = make_grid(attn, normalize=False, nrow=25)
    attns = attns.cpu().detach()
    inputs = make_grid(x, normalize=True, nrow=25).cpu().detach()
    display.clear_output(wait=True)
    plt.figure(figsize=(20,8))
    plt.imshow(np.concatenate((inputs.cpu().numpy().transpose(1,2,0),ex.cpu().numpy().transpose(1,2,0), attns.cpu().numpy().transpose(1,2,0)), axis=0))
    #plt.xticks(np.linspace(18,324,10), classes)
    #plt.xticks(fontsize=20) 
    plt.yticks([])
    plt.title('CIFAR10 Epoch:{:02d}, Train:{:.3f}, Test:{:.3f}'.format(epoch, avg_acc, avg_acc_val), fontsize=20)
    display.display(plt.gcf())
    fig, ax = plt.subplots(1,2, figsize=(20,8))
    ax[0].set_title('Crossentropy')
    ax[0].plot(losses, label='Train')
    ax[0].plot(v_losses, label='CIFAR10 Test')
    ax[0].legend()
    ax[1].set_title('Accuracy')
    ax[1].plot(acces, label='Train')
    ax[1].plot(v_acces, label='CIFAR10 Test')
    ax[1].legend()
    display.display(plt.gcf())

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """


  0%|          | 0/3 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


  0%|          | 0/157 [00:00<?, ?it/s]

AttributeError: 'float' object has no attribute 'cpu'