In [1]:
import torch
import torch.utils.data as data
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from collections import OrderedDict
from google.colab import drive
drive.mount('/content/gdrive/', force_remount=True)

Mounted at /content/gdrive/


Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>

In [2]:
def conv_batch(in_num, out_num, kernel_size=3, padding=1, stride=1):
    return nn.Sequential(
        nn.Conv2d(in_num, out_num, kernel_size=kernel_size, stride=stride, padding=padding, bias=False),
        nn.BatchNorm2d(out_num),
        nn.LeakyReLU())


# Residual block
class DarkResidualBlock(nn.Module):
    def __init__(self, in_channels):
        super(DarkResidualBlock, self).__init__()

        reduced_channels = int(in_channels/2)

        self.layer1 = conv_batch(in_channels, reduced_channels, kernel_size=1, padding=0)
        self.layer2 = conv_batch(reduced_channels, in_channels)

    def forward(self, x):
        residual = x

        out = self.layer1(x)
        out = self.layer2(out)
        out += residual
        return out


class Darknet53(nn.Module):
    def __init__(self, block, num_classes):
        super(Darknet53, self).__init__()

        self.num_classes = num_classes

        self.conv1 = conv_batch(3, 32)
        self.conv2 = conv_batch(32, 64, stride=2)
        self.residual_block1 = self.make_layer(block, in_channels=64, num_blocks=1)
        self.conv3 = conv_batch(64, 128, stride=2)
        self.residual_block2 = self.make_layer(block, in_channels=128, num_blocks=2)
        self.conv4 = conv_batch(128, 256, stride=2)
        self.residual_block3 = self.make_layer(block, in_channels=256, num_blocks=8)
        self.conv5 = conv_batch(256, 512, stride=2)
        self.residual_block4 = self.make_layer(block, in_channels=512, num_blocks=8)
        self.conv6 = conv_batch(512, 1024, stride=2)
        self.residual_block5 = self.make_layer(block, in_channels=1024, num_blocks=4)
        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(1024, self.num_classes)

    def forward(self, x):
        out = self.conv1(x)
        out = self.conv2(out)
        out = self.residual_block1(out)
        out = self.conv3(out)
        out = self.residual_block2(out)
        out = self.conv4(out)
        out = self.residual_block3(out)
        out = self.conv5(out)
        out = self.residual_block4(out)
        out = self.conv6(out)
        out = self.residual_block5(out)
        out = self.global_avg_pool(out)
        out = out.view(-1, 1024)
        out = self.fc(out)

        return out

    def make_layer(self, block, in_channels, num_blocks):
        layers = []
        for i in range(0, num_blocks):
            layers.append(block(in_channels))
        return nn.Sequential(*layers)


def darknet53(num_classes):
    return Darknet53(DarkResidualBlock, num_classes)

In [3]:
class DataSet(data.Dataset):
    def __init__(self,x,y,bboxes):
        self.x=np.load(x)
        self.y=np.load(y)
        self.bboxes=np.load(bboxes)

    def __getitem__(self, index):
        tmp=self.x[index,:].reshape(64,64,3)
        img=torch.from_numpy(tmp.astype(np.float32)/255.).permute(2,0,1)
        label=torch.from_numpy(self.y[index,:].astype(np.int32)).long()
        bbox=torch.from_numpy(self.bboxes[index,:].astype(np.float32))
        return img,label,bbox

    def __len__(self):
        return self.x.shape[0]

def train(epoch,model,opt,train_loader):
    global device,criterion_classification,criterion_box
    model.train()
    for i,(x,y,box) in enumerate(train_loader):
        x=x.to(device) #Nx1x64x64
        y=y.to(device) #Nx2
        box=box.to(device) #Nx4
        logit=model(x)#Nx28
        # print(logit.shape,y.shape)
        loss_class=criterion_classification(logit[:,:20].contiguous().view(-1,10),y.contiguous().view(-1))
        # print(logit[:,20:].shape,box.shape)
        loss_box=criterion_box(logit[:,20:],box.view(-1,8))
        loss=loss_class+loss_box*0.05
        
        opt.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm(model.parameters(), 0.4)
        opt.step()
        if i%100:
            print("Epoch: [%d] [%d], loss class %.5f, loss box %.5f"%(epoch,i,loss_class.item(),
                                                                     loss_box.item()))

In [4]:
def test(epoch,model,test_loader):
    global device
    model.eval()
    correct=0
    loss_box_total=0
    for i, (x, y, box) in enumerate(test_loader):
        x = x.to(device)
        y = y.to(device)
        box = box.to(device)

        #logit Nx28
        logit = model(x)
        logit_class=logit[:,:20].contiguous().view(-1,10)
        logit_box=logit[:,20:]
        loss_box=criterion_box(logit[:,20:],box.view(-1,8)).item()
        loss_box_total+=loss_box
        pred_class=logit_class.argmax(1).view(-1).cpu().numpy()
        target_class=y.view(-1).cpu().numpy()
        correct+=np.sum(pred_class==target_class)
    acc=correct/len(test_loader)/2
    print("Test Acc:",acc)
    print("loss box:",loss_box_total/len(test_loader))
    
    save_model(model, "/content/gdrive/My Drive/visual_recognition_data/epcoch_%d_acc%.4f_box%.4f.pth"%(epoch,acc,loss_box_total/len(test_loader)))
    

In [5]:
def save_model(model, filename):
    torch.save(model.state_dict(), filename)

In [6]:
def update_learning_rate(opt,decay_rate=0.9,min_value=1e-4):
    for pg in opt.param_groups:
        pg["lr"]=max(pg["lr"]*decay_rate,min_value)
    print("learning rate",pg["lr"])

In [8]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
criterion_classification=nn.CrossEntropyLoss().to(device)
criterion_box= nn.MSELoss().to(device)
def main():
  #############################
  #train dataloader
  train_dataset=DataSet("/content/gdrive/My Drive/visual_recognition_data/train_X.npy",
                        "/content/gdrive/My Drive/visual_recognition_data/train_Y.npy",
                        "/content/gdrive/My Drive/visual_recognition_data/train_bboxes.npy",)
  train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=100, shuffle=True,
                                              num_workers=1)
  #val dataloader
  val_dataset=DataSet("/content/gdrive/My Drive/visual_recognition_data/valid_X.npy",
                        "/content/gdrive/My Drive/visual_recognition_data/valid_Y.npy",
                        "/content/gdrive/My Drive/visual_recognition_data/valid_bboxes.npy",)
  val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=100, shuffle=True,
                                              num_workers=1)
  print(device)
  model=darknet53(28)
  model=model.to(device)
  optimizer=optim.Adam(list(filter(lambda p: p.requires_grad, model.parameters())),lr=0.001,weight_decay=1e-4)
  for epoch in range(100):
      test(epoch,model,val_loader)
      train(epoch,model,optimizer,train_loader)
      
      update_learning_rate(optimizer,decay_rate=0.9,min_value=1e-4)
main()

cuda
Test Acc: 8.03
loss box: 1326.035908203125




Epoch: [0] [1], loss class 2.82943, loss box 1187.39331
Epoch: [0] [2], loss class 3.67484, loss box 994.03961
Epoch: [0] [3], loss class 3.09586, loss box 844.57452
Epoch: [0] [4], loss class 2.94944, loss box 715.24738
Epoch: [0] [5], loss class 2.83321, loss box 669.78717
Epoch: [0] [6], loss class 2.55868, loss box 589.36658
Epoch: [0] [7], loss class 2.60587, loss box 522.02826
Epoch: [0] [8], loss class 2.49584, loss box 453.17172
Epoch: [0] [9], loss class 2.26828, loss box 384.32678
Epoch: [0] [10], loss class 2.28285, loss box 321.90479
Epoch: [0] [11], loss class 2.35551, loss box 276.79562
Epoch: [0] [12], loss class 2.31549, loss box 248.06058
Epoch: [0] [13], loss class 2.22820, loss box 216.86945
Epoch: [0] [14], loss class 2.18806, loss box 196.03340
Epoch: [0] [15], loss class 2.18608, loss box 171.67491
Epoch: [0] [16], loss class 2.29357, loss box 133.31393
Epoch: [0] [17], loss class 2.27672, loss box 136.23778
Epoch: [0] [18], loss class 2.31950, loss box 119.43322


KeyboardInterrupt: ignored