# 1.先面向过程

In [2]:
import numpy as np
import cv2
import mtcnn.caffe_pb2 as pb

In [3]:
net = pb.NetParameter()
with open("mtcnn/det1.caffemodel", "rb") as f:
    net.ParseFromString(f.read())

In [20]:
layer_mapper = {item.name : item for item in net.layer}

In [21]:
# x ->      numpy.ndarray
# kernel -> caffemodel array
# bias ->   caffemodel array
def conv2d_forward(x, kernel, bias, in_feature, out_feature, kernel_size, padding=0, stride=1):
    in_shape = x.shape
    ib, ic, ih, iw = in_shape
    oh = (ih + padding * 2 - kernel_size) // stride + 1
    ow = (iw + padding * 2 - kernel_size) // stride + 1
    col_w = oh * ow
    col_h = kernel_size * kernel_size * in_feature
    column = np.zeros((ib, col_h, col_w))
    output = np.zeros((ib, out_feature, oh, ow))
    khalf = kernel_size // 2
    kcol = np.array(kernel).reshape(out_feature, -1)
    for b in range(ib):
        for c in range(ic):
            for oy in range(oh):
                for ox in range(ow):
                    for ky in range(kernel_size):
                        for kx in range(kernel_size):
                            column_y = ky * kernel_size + kx + c * kernel_size * kernel_size
                            column_x = ox + oy * ow
                            ix = ox * stride + kx - padding
                            iy = oy * stride + ky - padding
                            if ix >= 0 and iy >= 0 and ix < iw and iy < ih:
                                column[b, column_y, column_x] = x[b, c, iy, ix]
        output[b] = (kcol @ column[b]).reshape(out_feature, oh, ow) + np.array(bias).reshape(out_feature, 1, 1)
    return output

In [27]:
conv1 = layer_mapper["conv1"]
# output x input x height x width
kernel_weight = conv1.blobs[0]
bias_weight = conv1.blobs[1]
kernel_weight.shape, bias_weight.shape

(dim: 10
 dim: 3
 dim: 3
 dim: 3,
 dim: 10)

In [26]:
image = cv2.imread("my.jpg")
image = image.transpose(2, 0, 1)[None]
image.shape

(1, 3, 12, 12)

In [38]:
conv1_result = conv2d_forward(image, 
    kernel = kernel_weight.data,
    bias = bias_weight.data,
    in_feature = 3,
    out_feature = 10,
    kernel_size = 3,
    padding = 0,
    stride = 1)

In [49]:
def prelu(x, weigth):
    x = x.copy()
    
    channels = x.shape[1]
    for c in range(channels):
        current_channel = x[:, c, :, :]
        select = current_channel < 0
        current_channel[select] *= weigth[c]
    return x

In [52]:
prelu1_weight = layer_mapper["PReLU1"].blobs[0].data
prelu1_result = prelu(conv1_result, prelu1_weight)

In [63]:
def max_pooling2d(x, kernel_size, stride):
    
    ib, ic, ih, iw = x.shape
    output_height = int(np.ceil((ih - kernel_size) / stride) + 1)
    output_width =  int(np.ceil((iw - kernel_size) / stride) + 1)
    output = np.zeros((ib, ic, output_height, output_width))
    minvalue = float("-inf")
    
    for b in range(ib):
        for c in range(ic):
            for oy in range(output_height):
                for ox in range(output_width):
                    value = minvalue
                    for kx in range(kernel_size):
                        for ky in range(kernel_size):
                            ix = ox * stride + kx
                            iy = oy * stride + ky
                            if ix < iw and iy < ih:
                                value = max(value, x[b, c, iy, ix])
                    output[b, c, oy, ox] = value
    return output

In [66]:
pool1_result = max_pooling2d(prelu1_result, 2, 2)
pool1_result.shape

(1, 10, 5, 5)

In [71]:
conv2 = layer_mapper["conv2"]
conv2_result = conv2d_forward(pool1_result, 
    kernel = conv2.blobs[0].data,
    bias = conv2.blobs[1].data,
    in_feature = 10,
    out_feature = 16,
    kernel_size = 3,
    padding = 0,
    stride = 1)
conv2_result.shape

(1, 16, 3, 3)

In [72]:
prelu2_weight = layer_mapper["PReLU2"].blobs[0].data
prelu2_result = prelu(conv2_result, prelu2_weight)

In [73]:
conv3 = layer_mapper["conv3"]
conv3_result = conv2d_forward(prelu2_result, 
    kernel = conv3.blobs[0].data,
    bias = conv3.blobs[1].data,
    in_feature = 16,
    out_feature = 32,
    kernel_size = 3,
    padding = 0,
    stride = 1)
conv3_result.shape

(1, 32, 1, 1)

In [76]:
prelu3_weight = layer_mapper["PReLU3"].blobs[0].data
prelu3_result = prelu(conv3_result, prelu3_weight)

In [77]:
conv4 = layer_mapper["conv4-1"]
conv4_result = conv2d_forward(prelu3_result, 
    kernel = conv4.blobs[0].data,
    bias = conv4.blobs[1].data,
    in_feature = 32,
    out_feature = 2,
    kernel_size = 1,
    padding = 0,
    stride = 1)
conv4_result.shape, conv4_result

((1, 2, 1, 1),
 array([[[[26.32538968]],
 
         [[50.0453394 ]]]]))

In [78]:
conv42 = layer_mapper["conv4-2"]
conv42_result = conv2d_forward(prelu3_result, 
    kernel = conv42.blobs[0].data,
    bias = conv42.blobs[1].data,
    in_feature = 32,
    out_feature = 4,
    kernel_size = 1,
    padding = 0,
    stride = 1)
conv42_result.shape, conv42_result

((1, 4, 1, 1),
 array([[[[-0.95326938]],
 
         [[ 4.97261597]],
 
         [[ 4.53516063]],
 
         [[ 8.28356677]]]]))

In [79]:
# def conv2d_forward(x, kernel, bias, in_feature, out_feature, kernel_size, padding=0, stride=1):

class Conv2d:
    def __init__(self, kernel, bias, in_feature, out_feature, kernel_size, padding, stride):
        self.kernel = kernel
        self.bias = bias
        self.in_feature = in_feature
        self.out_feature = out_feature
        self.kernel_size = kernel_size
        self.padding = padding
        self.stride = stride
    
    def forward(self, x):
        return conv2d_forward(x, self.kernel, self.bias, self.in_feature, self.out_feature, self.kernel_size, self.padding, self.stride)
    
# def prelu(x, weigth):
class PReLU:
    def __init__(self, weight):
        self.weight = weight
        
    def forward(self, x):
        return prelu(x, self.weight)
    
# 亿点点
# def max_pooling2d(x, kernel_size, stride):
class MaxPooling2d:
    def __init__(self, kernel_size, stride):
        self.kernel_size = kernel_size
        self.stride = stride
        
    def forward(self, x):
        return max_pooling2d(x, self.kernel_size, self.stride)

In [80]:
conv1_weight = layer_mapper["conv1"]
conv1 = Conv2d(
    kernel = conv1_weight.blobs[0].data, 
    bias = conv1_weight.blobs[1].data,
    in_feature = 3,
    out_feature = 10,
    kernel_size = 3,
    padding = 0,
    stride = 1
)

prelu1_weight = layer_mapper["PReLU1"]
prelu1 = PReLU(prelu1_weight.blobs[0].data)

pool1 = MaxPooling2d(2, 2)

conv2_weight = layer_mapper["conv2"]
conv2 = Conv2d(
    kernel = conv2_weight.blobs[0].data, 
    bias = conv2_weight.blobs[1].data,
    in_feature = 10,
    out_feature = 16,
    kernel_size = 3,
    padding = 0,
    stride = 1
)

prelu2_weight = layer_mapper["PReLU2"]
prelu2 = PReLU(prelu2_weight.blobs[0].data)

conv3_weight = layer_mapper["conv3"]
conv3 = Conv2d(
    kernel = conv3_weight.blobs[0].data, 
    bias = conv3_weight.blobs[1].data,
    in_feature = 16,
    out_feature = 32,
    kernel_size = 3,
    padding = 0,
    stride = 1
)

prelu3_weight = layer_mapper["PReLU3"]
prelu3 = PReLU(prelu3_weight.blobs[0].data)

conv41_weight = layer_mapper["conv4-1"]
conv41  = Conv2d(
    kernel = conv41_weight.blobs[0].data, 
    bias = conv41_weight.blobs[1].data,
    in_feature = 32,
    out_feature = 2,
    kernel_size = 1,
    padding = 0,
    stride = 1
)

conv42_weight = layer_mapper["conv4-2"]
conv42  = Conv2d(
    kernel = conv42_weight.blobs[0].data, 
    bias = conv42_weight.blobs[1].data,
    in_feature = 32,
    out_feature = 4,
    kernel_size = 1,
    padding = 0,
    stride = 1
)

In [86]:
conv1_result = conv1.forward(image)
prelu1_result = prelu1.forward(conv1_result)
pool1_result = pool1.forward(prelu1_result)
conv2_result = conv2.forward(pool1_result)
prelu2_result = prelu2.forward(conv2_result)
conv3_result = conv3.forward(prelu2_result)
prelu3_result = prelu3.forward(conv3_result)
conv41.forward(prelu3_result), conv42.forward(prelu3_result)

(array([[[[26.32538968]],
 
         [[50.0453394 ]]]]),
 array([[[[-0.95326938]],
 
         [[ 4.97261597]],
 
         [[ 4.53516063]],
 
         [[ 8.28356677]]]]))

In [87]:
class PNet:
    def __init__(self, layer_mapper):
        conv1_weight = layer_mapper["conv1"]
        prelu1_weight = layer_mapper["PReLU1"]
        conv2_weight = layer_mapper["conv2"]
        prelu2_weight = layer_mapper["PReLU2"]
        conv3_weight = layer_mapper["conv3"]
        prelu3_weight = layer_mapper["PReLU3"]
        
        self.layers = [
            Conv2d(
                kernel = conv1_weight.blobs[0].data, 
                bias = conv1_weight.blobs[1].data,
                in_feature = 3,
                out_feature = 10,
                kernel_size = 3,
                padding = 0,
                stride = 1
            ),
            PReLU(prelu1_weight.blobs[0].data),
            MaxPooling2d(2, 2),
            Conv2d(
                kernel = conv2_weight.blobs[0].data, 
                bias = conv2_weight.blobs[1].data,
                in_feature = 10,
                out_feature = 16,
                kernel_size = 3,
                padding = 0,
                stride = 1
            ),
            PReLU(prelu2_weight.blobs[0].data),
            Conv2d(
                kernel = conv3_weight.blobs[0].data, 
                bias = conv3_weight.blobs[1].data,
                in_feature = 16,
                out_feature = 32,
                kernel_size = 3,
                padding = 0,
                stride = 1
            ),
            PReLU(prelu3_weight.blobs[0].data)
        ]
    
        conv41_weight = layer_mapper["conv4-1"]
        self.conv41  = Conv2d(
            kernel = conv41_weight.blobs[0].data, 
            bias = conv41_weight.blobs[1].data,
            in_feature = 32,
            out_feature = 2,
            kernel_size = 1,
            padding = 0,
            stride = 1
        )
    
        conv42_weight = layer_mapper["conv4-2"]
        self.conv42  = Conv2d(
            kernel = conv42_weight.blobs[0].data, 
            bias = conv42_weight.blobs[1].data,
            in_feature = 32,
            out_feature = 4,
            kernel_size = 1,
            padding = 0,
            stride = 1
        )
        
    def forward(self, image):
        x = image
        for layer in self.layers:
            x = layer.forward(x)
        
        return self.conv41.forward(x), self.conv42.forward(x)

In [88]:
pnet = PNet(layer_mapper)
pnet.forward(image)

(array([[[[26.32538968]],
 
         [[50.0453394 ]]]]),
 array([[[[-0.95326938]],
 
         [[ 4.97261597]],
 
         [[ 4.53516063]],
 
         [[ 8.28356677]]]]))

In [99]:
class Module:
    def __call__(self, *args):
        return self.forward(*args)

# def conv2d_forward(x, kernel, bias, in_feature, out_feature, kernel_size, padding=0, stride=1):
class Conv2d(Module):
    def __init__(self, in_feature, out_feature, kernel_size, padding=0, stride=1):
        self.in_feature = in_feature
        self.out_feature = out_feature
        self.kernel_size = kernel_size
        self.padding = padding
        self.stride = stride
    
    def forward(self, x):
        return conv2d_forward(x, self.kernel, self.bias, self.in_feature, self.out_feature, self.kernel_size, self.padding, self.stride)
    
# def prelu(x, weigth):
class PReLU(Module):
    def __init__(self): 
        ...
        
    def forward(self, x):
        return prelu(x, self.weight)
    
# 亿点点
# def max_pooling2d(x, kernel_size, stride):
class MaxPooling2d(Module):
    def __init__(self, kernel_size, stride):
        self.kernel_size = kernel_size
        self.stride = stride
        
    def forward(self, x):
        return max_pooling2d(x, self.kernel_size, self.stride)

class PNet(Module):
    def __init__(self, layer_mapper):
        
        self.layers = [
            Conv2d(3, 10, 3),
            PReLU(), 
            MaxPooling2d(2, 2),
            Conv2d(10, 16, 3),
            PReLU(),  
            Conv2d(16, 32, 3),
            PReLU() 
        ]
        self.conv41  = Conv2d(32, 2, 1)
        self.conv42  = Conv2d(32, 4, 1)
        
        # 给所有的layer赋值权重
        layer_names = ["conv1", "PReLU1", "pool1", "conv2", "PReLU2", "conv3", "PReLU3", "conv4-1", "conv4-2"]
        self.load_param(self.layers + [self.conv41, self.conv42], layer_names, layer_mapper)
        
    def load_param(self, layers, names, layer_mapper):
        
        # 假设，layers和names的一一对应的
        for name, layer in zip(names, layers):
            caffe_weight = layer_mapper[name]
            if isinstance(layer, Conv2d):
                layer.kernel = caffe_weight.blobs[0].data
                layer.bias = caffe_weight.blobs[1].data
            elif isinstance(layer, PReLU):
                layer.weight = caffe_weight.blobs[0].data
        
    def forward(self, image):
        x = image
        for layer in self.layers:
            x = layer(x)
        
        return self.conv41(x), self.conv42(x)

In [100]:
pnet = PNet(layer_mapper)
pnet(image)

(array([[[[26.32538968]],
 
         [[50.0453394 ]]]]),
 array([[[[-0.95326938]],
 
         [[ 4.97261597]],
 
         [[ 4.53516063]],
 
         [[ 8.28356677]]]]))