In [5]:
import numpy as np
def con_beginner(image,kernel):
    (iH,iW)=image.shape
    (kH,kW)=kernel.shape
    oH,oW=iH-kH+1,iW-kW+1
    output= np.zeros((oH,oW))

    for y in range(oH):
        for x in range(oW):
            for i in range(kH):
                for j in range(kW):
                    output[y,x] +=image[y+i,x+j]*kernel[i,j]
    return output

if __name__ == "__main__":
    img = np.random.rand(10, 10)
    ker = np.array([[1, 0], [0, -1]])
    res_beg = con_beginner(img, ker)
    print("--- 輸出形狀檢查 ---")
    print(f"新手版: {res_beg.shape}")


--- 輸出形狀檢查 ---
新手版: (9, 9)


In [13]:
#普通版
import numpy as np
def con_normal(image,kernel):
    (iH,iW)=image.shape
    (kH,kW)=kernel.shape
    oH,oW = iH-kH+1,iW-kW+1
    output = np.zeros((oH,oW))
    for y in range(oH):
        for x in range(oW):
            window = image[y:y+kH,x:x+kW]
            output[y,x]=np.sum(window*kernel)
    return output

if __name__ == "__main__":
    img = np.random.rand(10, 10)
    ker = np.array([[1, 0], [0, -1]])

    res_normal= con_normal(img,ker)
    print(f"{res_normal.shape}")

(9, 9)


In [15]:
#高手版
def con_master(image,kernel):
    (iH, iW) = image.shape
    (kH, kW) = kernel.shape
    oH, oW = iH - kH + 1, iW - kW + 1
    def get_cols(img,kH,kW,oH,oW):
        cols=[]
        for y in range(oH):
            for x in range(oW):
                cols.append(img[y:y+kH,x:x+kW].reshape(-1))
        return np.array(cols)
    image_matrix = get_cols(image,kH,kW,oH,oW)
    kernel_vector = kernel.reshape(-1)
    output = np.dot(image_matrix,kernel_vector)
    return output.reshape(oH,oW)

if __name__ == "__main__":
    img = np.random.rand(10, 10)
    ker = np.array([[1, 0], [0, -1]])
    res_mas = con_master(img,ker)
    print(f"{res_mas}")

[[-0.2434514   0.046903    0.7225932  -0.01438662 -0.02299765 -0.352007
   0.08196908  0.04865084 -0.00619129]
 [ 0.36563219  0.69493058 -0.1588962  -0.11527824 -0.50536142  0.31118418
   0.67415849  0.46786374  0.87345368]
 [-0.63304069  0.13631314 -0.85762023  0.28850968 -0.23148809  0.155451
  -0.3602997  -0.41812172 -0.02346378]
 [-0.25603515 -0.02558821 -0.31627644  0.60330062  0.35115468  0.15052575
  -0.45178116  0.13237584  0.16987224]
 [-0.10067683 -0.16517999 -0.03670637  0.02991428 -0.05357517 -0.37474048
  -0.59143179  0.50564024  0.45217177]
 [ 0.08768485  0.04584749 -0.10885686  0.81116073  0.26670342 -0.34361138
  -0.21573087  0.47298669  0.33816653]
 [ 0.31466596 -0.33020926  0.11015169 -0.05630265 -0.77305242 -0.80031692
   0.02711996 -0.05888038  0.3784238 ]
 [-0.51917324  0.08478105 -0.01326555 -0.65603948  0.67595249  0.27088699
   0.81903092  0.16382669  0.58893445]
 [ 0.10755734  0.67421924 -0.42781973  0.58414415  0.52822611 -0.06429834
   0.19487206 -0.64989738 

In [16]:
import numpy as np

def im2col(input_data, filter_h, filter_w, stride=1, pad=0):
    """
    將 4D 輸入影像轉換為 2D 矩陣
    """
    N, C, H, W = input_data.shape
    out_h = (H + 2*pad - filter_h) // stride + 1
    out_w = (W + 2*pad - filter_w) // stride + 1

    img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
    col = np.zeros((N, C, filter_h, filter_w, out_h, out_w))

    for y in range(filter_h):
        y_max = y + stride * out_h
        for x in range(filter_w):
            x_max = x + stride * out_w
            col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]

    col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1)
    return col

def col2im(col, input_shape, filter_h, filter_w, stride=1, pad=0):
    """
    將 2D 梯度矩陣還原為 4D 影像形狀
    """
    N, C, H, W = input_shape
    out_h = (H + 2*pad - filter_h) // stride + 1
    out_w = (W + 2*pad - filter_w) // stride + 1
    col = col.reshape(N, out_h, out_w, C, filter_h, filter_w).transpose(0, 3, 4, 5, 1, 2)

    img = np.zeros((N, C, H + 2*pad + stride - 1, W + 2*pad + stride - 1))
    for y in range(filter_h):
        y_max = y + stride * out_h
        for x in range(filter_w):
            x_max = x + stride * out_w
            img[:, :, y:y_max:stride, x:x_max:stride] += col[:, :, y, x, :, :]

    return img[:, :, pad:H + pad, pad:W + pad]

class ConvLayerExpert:
    
    def __init__(self, weight_shape, stride=1, pad=0):
        # weight_shape: (Filter_Num, Channels, Kernel_H, Kernel_W)
        self.W = np.random.randn(*weight_shape) * 0.01
        self.b = np.zeros(weight_shape[0])
        self.stride = stride
        self.pad = pad
        
        # 狀態快取
        self.x = None
        self.col = None
        self.col_W = None
        
        # 梯度快取
        self.dW = None
        self.db = None

    def forward(self, x):
        
        FN, C, FH, FW = self.W.shape
        N, C, H, W = x.shape
        out_h = int(1 + (H + 2*self.pad - FH) / self.stride)
        out_w = int(1 + (W + 2*self.pad - FW) / self.stride)

        # 1. 影像矩陣化 (核心優化)
        col = im2col(x, FH, FW, self.stride, self.pad)
        # 2. 濾鏡矩陣化
        col_W = self.W.reshape(FN, -1).T

        # 3. 執行矩陣運算 (GEMM)
        out = np.dot(col, col_W) + self.b
        # 4. 重新調整維度為 (N, FN, OH, OW)
        out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)

        self.x = x
        self.col = col
        self.col_W = col_W

        return out

    def backward(self, dout):
        FN, C, FH, FW = self.W.shape
        # 調整 dout 的維度以便進行矩陣乘法
        dout = dout.transpose(0, 2, 3, 1).reshape(-1, FN)

        # 1. 計算偏置梯度 (db)
        self.db = np.sum(dout, axis=0)
        
        # 2. 計算權重梯度 (dW)
        # dW = col^T * dout (矩陣乘法即為所有 Batch 的梯度累加)
        self.dW = np.dot(self.col.T, dout)
        self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW)

        # 3. 計算輸入影像的梯度 (dx)
        # dcol = dout * W^T
        dcol = np.dot(dout, self.col_W.T)
        # 利用 col2im 將展開的矩陣還原成圖片格式
        dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)

        return dx

# 專家級單元測試
if __name__ == "__main__":
    # 模擬 8 張 3 通道的 32x32 圖片 (CIFAR-10 風格)
    x_test = np.random.randn(8, 3, 32, 32)
    # 建立 16 個 3x3 的卷積核
    expert_layer = ConvLayerExpert((16, 3, 3, 3), stride=1, pad=1)
    
    # 前向傳播
    forward_out = expert_layer.forward(x_test)
    print(f"前向傳播輸出形狀 (Batch, FN, H, W): {forward_out.shape}")
    
    # 模擬從下一層傳回來的誤差 (梯度)
    dout_test = np.random.randn(*forward_out.shape)
    # 反向傳播
    backward_out = expert_layer.backward(dout_test)
    print(f"反向傳播輸出形狀 (應與輸入相同): {backward_out.shape}")

前向傳播輸出形狀 (Batch, FN, H, W): (8, 16, 32, 32)
反向傳播輸出形狀 (應與輸入相同): (8, 3, 32, 32)


In [17]:
if __name__ == "__main__":
    # 建立隨機圖片與濾鏡
    img = np.random.rand(10, 10)
    ker = np.array([[1, 0], [0, -1]])
    
    # 執行三個版本的運算
    res_beg = con_beginner(img, ker)
    res_int = con_normal(img, ker)
    res_adv = con_master(img, ker)
    
    print("--- 輸出形狀檢查 ---")
    print(f"新手版: {res_beg.shape}")
    print(f"普通版: {res_int.shape}")
    print(f"高手版: {res_adv.shape}")
    
    print("\n--- 數值正確性驗證 (與新手版對比) ---")
    # 使用 np.allclose 檢查數值是否在誤差範圍內相同
    is_int_correct = np.allclose(res_beg, res_int)
    is_adv_correct = np.allclose(res_beg, res_adv)
    
    print(f"普通版運算正確: {is_int_correct}")
    print(f"高手版運算正確: {is_adv_correct}")
    
    if is_int_correct and is_adv_correct:
        print("\n✅ 所有版本計算結果完全一致！")

--- 輸出形狀檢查 ---
新手版: (9, 9)
普通版: (9, 9)
高手版: (9, 9)

--- 數值正確性驗證 (與新手版對比) ---
普通版運算正確: True
高手版運算正確: True

✅ 所有版本計算結果完全一致！
