In [1]:
import numpy as np
import time

In [2]:
# 初始化输入矩阵，标准化
X_ = np.arange(214*214*3).reshape(3,214,214)
X = (X_-np.mean(X_))/np.std(X_)

In [3]:
# 初始化kernel
# 第一维为输出通道数，第二维为输入通道数
# 三四维为卷积核的长宽
H = np.random.rand(64, 3, 3, 3)

In [4]:
# 第一种矩阵乘法，展平Kernel，处理输入矩阵
def func1(H, X):
    channel_in, height, width = X.shape[0:3]
    assert channel_in == H.shape[1], "输入通道数不匹配"
    channel_out = H.shape[0]
    assert H.shape[2] == H.shape[3], "卷积核不为方阵"
    kernel_size = H.shape[2]
    
    Yy = []
    # 以矩阵为单位进行处理        
    for i in range(channel_out):
        Y_ = np.zeros((height - kernel_size +1) * (width - kernel_size +1))
        for j in range(channel_in):
            # 处理输入矩阵
            X_ = np.zeros(((height - kernel_size +1) * (width - kernel_size +1), kernel_size**2))
            Xx = X[j]
            row_num = 0
            for m in range(height - kernel_size + 1):
                for n in range(width - kernel_size + 1):
                    # 将输入矩阵的一块子矩阵展平放到对应的列中
                    X_[row_num,:] = Xx[m:m+kernel_size, n:n+kernel_size].flatten()
                    row_num += 1
            
            # 将Kernel展平
            Hh = H[i][j]
            H_ = Hh.flatten()
            
            # 得到输入和卷积核一个通道上的结果
            Y_ += X_ @ H_  
        Yy.append(Y_.reshape(height - kernel_size +1,  width - kernel_size +1))
    
    Y = np.array(Yy)
    return Y

In [5]:
T1 = time.time()
print(func1(H, X).shape)
T1_ = time.time()
print("运行时间:%s ms" % ((T1_-T1)*1000))

(64, 212, 212)
运行时间:8740.14401435852 ms


In [6]:
# 第二种矩阵乘法，把Kernel变为稀疏矩阵，展平输入
def func2(H, X):
    channel_in, height, width = X.shape[0:3]
    assert channel_in == H.shape[1], "输入通道数不匹配"
    channel_out = H.shape[0]
    assert H.shape[2] == H.shape[3], "卷积核不为方阵"
    kernel_size = H.shape[2]
    
    Yy = []
    # 同样以矩阵为单位进行处理
    for i in range(channel_out):
        Y_ = np.zeros((height - kernel_size +1) * (width - kernel_size +1))
        for j in range(channel_in):
            # 得到稀疏矩阵
            H_ = np.zeros(((height - kernel_size +1) * (width - kernel_size +1), height * width))
            
            # 将核展开并在行之间插入0
            length = kernel_size ** 2 + kernel_size -1
            Hh = np.zeros(length)
            m = 0
            count = 0
            while m < length:
                Hh[m:m+kernel_size] = H[i][j][count]
                m += kernel_size + 1
                count += 1
            
            for m in range((height - kernel_size +1) * (width - kernel_size +1)):
                H_[m][m:m+length] = Hh
            
            # 展平X
            X_ = X[j].flatten()
            Y_ += H_ @ X_
        Yy.append(Y_.reshape(height - kernel_size +1,  width - kernel_size +1))
    
    Y = np.array(Yy)
    return Y

In [7]:
T2 = time.time()
print(func2(H, X).shape)
T2_ = time.time()
print("运行时间:%s ms" % ((T2_-T2)*1000))

(64, 212, 212)
运行时间:476282.5219631195 ms
