In [48]:
import torch
import torch.utils.data as data
import numpy as np
import torch.nn as nn
import torch.optim as optim
from google.colab import drive
import torch.nn.functional as F
import torchvision.transforms.functional as TF

# TODO 11 classes
# Model: UNET
# Running properly

In [49]:
class DoubleConv(nn.Module):
    """(convolution => [BN] => ReLU) * 2"""

    def __init__(self, in_channels, out_channels, mid_channels=None):
        super().__init__()
        if not mid_channels:
            mid_channels = out_channels
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)


class Down(nn.Module):
    """Downscaling with maxpool then double conv"""

    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )

    def forward(self, x):
        return self.maxpool_conv(x)


class Up(nn.Module):
    """Upscaling then double conv"""

    def __init__(self, in_channels, out_channels, bilinear=True):
        super().__init__()

        # if bilinear, use the normal convolutions to reduce the number of channels
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
            self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
        else:
            self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
            self.conv = DoubleConv(in_channels, out_channels)

    def forward(self, x1, x2):
        x1 = self.up(x1)
        # input is CHW
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]

        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])
        # if you have padding issues, see
        # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
        # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)


class OutConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(OutConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        return self.conv(x)

        
class UNet(nn.Module):
    def __init__(self, n_channels, n_classes, bilinear=False):
        super(UNet, self).__init__()
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.bilinear = bilinear

        self.inc = DoubleConv(n_channels, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
        self.down3 = Down(256, 512)
        factor = 2 if bilinear else 1
        self.down4 = Down(512, 1024 // factor)
        self.up1 = Up(1024, 512 // factor, bilinear)
        self.up2 = Up(512, 256 // factor, bilinear)
        self.up3 = Up(256, 128 // factor, bilinear)
        self.up4 = Up(128, 64, bilinear)
        self.outc = OutConv(64, n_classes)

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        logits = self.outc(x)
        return logits

In [50]:
class DataSet(data.Dataset):
  
  def __init__(self, val:bool):
    self.val = val
    if not val:
      self.images = np.load("/content/gdrive/My Drive/visual_recognition_data/train_X.npy")
      self.images = self.images.reshape([55000, 64, 64, 3])
      self.images = np.transpose(self.images, (0, 3, 1, 2))
      self.masks = np.load("/content/gdrive/My Drive/visual_recognition_data/train_seg.npy")
      self.masks = self.masks.reshape([55000, 64, 64])
    else:
      self.images = np.load("/content/gdrive/My Drive/visual_recognition_data/valid_X.npy")
      self.images = self.images.reshape([5000, 64, 64, 3])
      self.images = np.transpose(self.images,(0, 3, 1, 2))
      self.masks = np.load("/content/gdrive/My Drive/visual_recognition_data/valid_seg.npy")
      self.masks = self.masks.reshape([5000, 64, 64])
  
  def __len__(self):
    if self.val:
      return 5000
    return 55000
  
  def __getitem__(self, idx):
    img = self.images[idx]
    mask = self.masks[idx]
    mask = torch.from_numpy(mask).to(torch.int64)
    decomposed_mask = torch.nn.functional.one_hot(mask, 11).permute(2, 0 ,1)
    return (
      torch.as_tensor(img).float(),
      decomposed_mask.float()
    )

In [51]:
def train(epoch,model,opt,train_loader):
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    model.train()
    for i,(x,y) in enumerate(train_loader):
        x=x.to(device) #Nx1x64x64
        y=y.to(device) #Nx4096
        logit=model(x) #Nx4096
        criterion= nn.CrossEntropyLoss().to(device)
        loss=criterion(logit,y.float())
        opt.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm(model.parameters(), 0.4)
        opt.step()
        torch.cuda.empty_cache()
        if i%100:
            print("Epoch: [%d] [%d], loss %.5f"%(epoch,i,loss.item()))

In [52]:
def test(epoch,model,test_loader):
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    model.eval()
    loss_total=0
    total_acc1 = 0
    total_acc2 = 0
    with torch.no_grad():
      for i, (x, y) in enumerate(test_loader):
          x = x.to(device)
          y = y.to(device)
          criterion= nn.CrossEntropyLoss().to(device)
          logit = model(x)
          loss=criterion(logit,y.float())
          loss_total += loss
          pred = logit.argmax(1).view(-1).long().cpu().numpy()
          target = y.argmax(1).view(-1).cpu().numpy()
          acc1 = compute_seg1(pred, target)
          acc2 = compute_seg2(pred, target)
          total_acc1 += acc1
          total_acc2 += acc2
          print(i)
    print("loss:",loss_total/len(test_loader))
    print("acc1: ", total_acc1/len(test_loader))
    print("acc2: ", total_acc2/len(test_loader))
    
    save_model(model, "/content/gdrive/My Drive/visual_recognition_data/epcoch_%d_box%.4f.pth"%(epoch,loss_total/len(test_loader)))

def compute_seg1(pred, gt):
    # pred value should be from 0 to 10, where 10 is the background.
    # accuracy is calculated for only non background pixels.
    assert pred.shape == gt.shape
    mask = gt != 10
    return (pred[mask] == gt[mask]).astype(int).sum() / gt[mask].size
def compute_seg2(pred, gt):
    # pred value should be from 0 to 10, where 10 is the background.
    # accuracy is calculated for only non background pixels.
    assert pred.shape == gt.shape
    return (pred == gt).astype(int).sum() / gt.size

In [53]:
def save_model(model, filename):
    torch.save(model.state_dict(), filename)

In [54]:
def update_learning_rate(opt,decay_rate=0.9,min_value=1e-4):
    for pg in opt.param_groups:
        pg["lr"]=max(pg["lr"]*decay_rate,min_value)
    print("learning rate",pg["lr"])

In [55]:
def main():
  #############################
  #train dataloader
  drive.mount('/content/gdrive/', force_remount=True)
  train_dataset=DataSet(0)
  train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=100, shuffle=True,
                                              num_workers=1)
  #val dataloader
  val_dataset=DataSet(1)
  val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=100, shuffle=True,
                                              num_workers=1)
  device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
  print(device)
  model = UNet(3,11).to(device)
  optimizer=optim.Adam(list(filter(lambda p: p.requires_grad, model.parameters())),lr=0.001,weight_decay=1e-4)
  for epoch in range(100):
      test(epoch,model,val_loader)
      train(epoch,model,optimizer,train_loader)
      update_learning_rate(optimizer,decay_rate=0.8,min_value=1e-4)

In [56]:
if __name__ == '__main__':  
    main()

Mounted at /content/gdrive/
cuda
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
loss: tensor(3.5832, device='cuda:0')
acc1:  0.06267083158428445
acc2:  0.003647802734374999


  if sys.path[0] == '':


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch: [9] [197], loss 0.00273
Epoch: [9] [198], loss 0.00285
Epoch: [9] [199], loss 0.00328
Epoch: [9] [201], loss 0.00267
Epoch: [9] [202], loss 0.00407
Epoch: [9] [203], loss 0.00269
Epoch: [9] [204], loss 0.00276
Epoch: [9] [205], loss 0.00274
Epoch: [9] [206], loss 0.00302
Epoch: [9] [207], loss 0.00271
Epoch: [9] [208], loss 0.00370
Epoch: [9] [209], loss 0.00237
Epoch: [9] [210], loss 0.00268
Epoch: [9] [211], loss 0.00225
Epoch: [9] [212], loss 0.00265
Epoch: [9] [213], loss 0.00285
Epoch: [9] [214], loss 0.00290
Epoch: [9] [215], loss 0.00286
Epoch: [9] [216], loss 0.00308
Epoch: [9] [217], loss 0.00341
Epoch: [9] [218], loss 0.00302
Epoch: [9] [219], loss 0.00380
Epoch: [9] [220], loss 0.00291
Epoch: [9] [221], loss 0.00221
Epoch: [9] [222], loss 0.00292
Epoch: [9] [223], loss 0.00236
Epoch: [9] [224], loss 0.00299
Epoch: [9] [225], loss 0.00313
Epoch: [9] [226], loss 0.00335
Epoch: [9] [227], loss 0.00306
Epoch

KeyboardInterrupt: ignored