In [1]:
import torch.nn as nn
import numpy as np
import torch
from torch import optim
import torch.nn.functional as F
from tqdm import tqdm_notebook

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [2]:
class DarkNet(nn.Module):
    def __init__(self):
        super(DarkNet, self).__init__()
        self.stride = 1
        self.pad = 1
        
        self.max_pool_kernel_size = 2
        self.max_pool_stride = 2
        
        # Activation function
        self.activation = nn.LeakyReLU(0.1, inplace=True)
        # Maxpool functions
        self.maxpool_1 = nn.MaxPool2d(self.max_pool_kernel_size, self.max_pool_stride)
        self.maxpool_2 = nn.MaxPool2d(self.max_pool_kernel_size, 1, 1, 2)
        
        self.conv_1 = nn.Conv2d(3, 16, 3, self.stride, self.pad, bias=False)
        self.batch_norm_1 = nn.BatchNorm2d(16)
        
        self.conv_2 = nn.Conv2d(16, 32, 3, self.stride, self.pad, bias=False)
        self.batch_norm_2 = nn.BatchNorm2d(32)
        
        self.conv_3 = nn.Conv2d(32, 64, 3, self.stride, self.pad, bias=False)
        self.batch_norm_3 = nn.BatchNorm2d(64)
        
        self.conv_4 = nn.Conv2d(64, 128, 3, self.stride, self.pad, bias=False)
        self.batch_norm_4 = nn.BatchNorm2d(128)
        
        self.conv_5 = nn.Conv2d(128, 256, 3, self.stride, self.pad, bias=False)
        self.batch_norm_5 = nn.BatchNorm2d(256)
        
        self.conv_6 = nn.Conv2d(256, 512, 3, self.stride, self.pad, bias=False)
        self.batch_norm_6 = nn.BatchNorm2d(512)
        
        self.conv_7 = nn.Conv2d(512, 1024, 3, self.stride, self.pad, bias=False)
        self.batch_norm_7 = nn.BatchNorm2d(1024)
        
        self.conv_8 = nn.Conv2d(1024, 1024, 3, self.stride, self.pad, bias=False)
        self.batch_norm_8 = nn.BatchNorm2d(1024)
        
        self.conv_9 = nn.Conv2d(1024, 125, 1, self.stride)
        
    def forward(self, inp):
        
        x = self.conv_1(inp)
        x = self.batch_norm_1(x)
        x = self.activation(x)
        x = self.maxpool_1(x)
        
        x = self.conv_2(x)
        x = self.batch_norm_2(x)
        x = self.activation(x)
        x = self.maxpool_1(x)
        
        x = self.conv_3(x)
        x = self.batch_norm_3(x)
        x = self.activation(x)
        x = self.maxpool_1(x)

        x = self.conv_4(x)
        x = self.batch_norm_4(x)
        x = self.activation(x)
        x = self.maxpool_1(x)
        
        x = self.conv_5(x)
        x = self.batch_norm_5(x)
        x = self.activation(x)
        x = self.maxpool_1(x)
        
        x = self.conv_6(x)
        x = self.batch_norm_6(x)
        x = self.activation(x)
        x = self.maxpool_2(x)

        x = self.conv_7(x)
        x = self.batch_norm_7(x)
        x = self.activation(x)

        x = self.conv_8(x)
        x = self.batch_norm_8(x)
        x = self.activation(x)

        x = self.conv_9(x)

        return x.reshape(x.shape[0], 5, x.shape[3], x.shape[3], 25).contiguous()

In [3]:
#net = DarkNet()
#net(torch.FloatTensor(1, 3, 416 ,416)).shape

torch.Size([1, 5, 13, 13, 25])

In [5]:
import numpy as np
import torch


class WeightLoader(object):
    def __init__(self):
        super(WeightLoader, self).__init__()
        self.start = 0
        self.buf = None

    def load_conv_bn(self, conv_model, bn_model):
        num_w = conv_model.weight.numel()
        num_b = bn_model.bias.numel()
        bn_model.bias.data.copy_(
            torch.reshape(torch.from_numpy(self.buf[self.start:self.start + num_b]), bn_model.bias.size()))
        self.start = self.start + num_b
        bn_model.weight.data.copy_(
            torch.reshape(torch.from_numpy(self.buf[self.start:self.start + num_b]), bn_model.bias.size()))
        self.start = self.start + num_b
        bn_model.running_mean.copy_(
            torch.reshape(torch.from_numpy(self.buf[self.start:self.start + num_b]), bn_model.bias.size()))
        self.start = self.start + num_b
        bn_model.running_var.copy_(
            torch.reshape(torch.from_numpy(self.buf[self.start:self.start + num_b]), bn_model.bias.size()))
        self.start = self.start + num_b
        conv_model.weight.data.copy_(
            torch.reshape(torch.from_numpy(self.buf[self.start:self.start + num_w]), conv_model.weight.size()))
        self.start = self.start + num_w

    def load_conv(self, conv_model):
        num_w = conv_model.weight.numel()
        num_b = conv_model.bias.numel()
        conv_model.bias.data.copy_(
            torch.reshape(torch.from_numpy(self.buf[self.start:self.start + num_b]), conv_model.bias.size()))
        self.start = self.start + num_b
        conv_model.weight.data.copy_(
            torch.reshape(torch.from_numpy(self.buf[self.start:self.start + num_w]), conv_model.weight.size()))
        self.start = self.start + num_w

    def dfs(self, m):
        print(m)
        children = list(m.children())
        for i, c in enumerate(children):
            if isinstance(c, torch.nn.Sequential):
                self.dfs(c)
            elif isinstance(c, torch.nn.Conv2d):
                if c.bias is not None:
                    self.load_conv(c)
                else:
                    self.load_conv_bn(c, children[i + 1])

    def load(self, model, weights_file):
        self.start = 0
        fp = open(weights_file, 'rb')
        header = np.fromfile(fp, count=4, dtype=np.int32)
        self.buf = np.fromfile(fp, dtype=np.float32)
        fp.close()
        size = self.buf.size
        self.dfs(model)

        # make sure the loaded weight is right
        assert size == self.start

In [6]:
def load_weights(weights_file, model):
    weight_loader = WeightLoader()
    weight_loader.load(model, weights_file)

In [43]:
%history

import torch.nn as nn
import numpy as np
import torch
from torch import optim
import torch.nn.functional as F
from tqdm import tqdm_notebook

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class ReorgLayer(nn.Module):
    def __init__(self, stride=2):
        super(ReorgLayer, self).__init__()
        self.stride = stride

    def forward(self, x):
        B, C, H, W = x.data.size()
        ws = self.stride
        hs = self.stride
        x = x.view(B, C, int(H / hs), hs, int(W / ws), ws).transpose(3, 4).contiguous()
        x = x.view(B, C, int(H / hs * W / ws), hs * ws).transpose(2, 3).contiguous()
        x = x.view(B, C, hs * ws, int(H / hs), int(W / ws)).transpose(1, 2).contiguous()
        x = x.view(B, hs * ws * C, int(H / hs), int(W / ws))
        return x
class DarkNet(nn.Module):
    def __init__(self):
        super(DarkNet, self).__init__()
        self.stride = 1
        self.pad = 1
        
        self.max_pool_kernel_size = 2
        self.max_