In [6]:
import numpy as np

class Perceptron:
    def __init__(self, input_size, output_size):
        self.input_size = input_size
        self.output_size = output_size
        self.weights = np.random.randn(output_size, input_size)
        self.bias = np.zeros((output_size, 1))

    def forward(self, x):
        z = np.dot(self.weights, x) + self.bias
        a = self.sigmoid(z)
        return a

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        s = self.sigmoid(x)
        return s * (1 - s)

    def backward(self, x, y, a):
        dz = (a - y) * self.sigmoid_derivative(a)
        dw = np.dot(dz, x.T)
        db = dz
        dx = np.dot(self.weights.T, dz)
        return dx, dw, db

class TwoLayerPerceptron:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.perceptron1 = Perceptron(input_size, hidden_size)
        self.perceptron2 = Perceptron(hidden_size, output_size)

    def forward(self, x):
        a1 = self.perceptron1.forward(x)
        a2 = self.perceptron2.forward(a1)
        return a2

    def backward(self, x, y, a):
        da2, dw2, db2 = self.perceptron2.backward(a1, y, a)
        da1, dw1, db1 = self.perceptron1.backward(x, a1, da2)
        return dw1, db1, dw2, db2


In [7]:
import numpy as np

class Perceptron:
    def __init__(self, input_size, output_size):
        self.input_size = input_size
        self.output_size = output_size
        self.weights = np.random.randn(output_size, input_size)
        self.bias = np.zeros((output_size, 1))

    def forward(self, x):
        z = np.dot(self.weights, x) + self.bias
        a = self.sigmoid(z)
        return a

    def sigmoid(self, x):
        return np.max(0,x)

    def sigmoid_derivative(self, x):
        s = self.sigmoid(x)
        return s * (1 - s)

    def backward(self, x, y, a):
        dz = (a - y) * self.sigmoid_derivative(a)
        dw = np.dot(dz, x.T)
        db = dz
        dx = np.dot(self.weights.T, dz)
        return dx, dw, db

class TwoLayerPerceptron:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.perceptron1 = Perceptron(input_size, hidden_size)
        self.perceptron2 = Perceptron(hidden_size, output_size)

    def forward(self, x):
        a1 = self.perceptron1.forward(x)
        a2 = self.perceptron2.forward(a1)
        return a2

    def backward(self, x, y, a):
        da2, dw2, db2 = self.perceptron2.backward(a1, y, a)
        da1, dw1, db1 = self.perceptron1.backward(x, a1, da2)
        return dw1, db1, dw2, db2

# 使用示例数据
X = np.array([[0,0,1],[0,1,1],[1,0,1],[1,1,1]])
y = np.array([[0],[1],[1],[0]])

# 创建一个双层感知器
model = TwoLayerPerceptron(input_size = 3, hidden_size = 4,output_size = 1)

# 训练模型
epochs = 10000
learning_rate = 0.1
for i in range(epochs):
    # 前向传播
    a = model.forward(X.T)

    # 计算误差和梯度
    error = y.T - a
    dw1, db1, dw2, db2 = model.backward(X.T, y.T, a)

    # 更新权重和偏置
    model.perceptron1.weights += learning_rate * dw1
    model.perceptron1.bias += learning_rate * db1
    model.perceptron2.weights += learning_rate * dw2
    model.perceptron2.bias += learning_rate * db2

# 输出预测结果
print(model.forward(X.T))


NameError: name 'a1' is not defined

In [8]:
H = 256
W = 256
C = 3
FH = 4
FW = 4

x_col = np.zeros((C * FH * FW, H * W))
x_col.shape

(48, 65536)

In [9]:
w_row = w.reshape(F, C * FH * FW)
w_row

NameError: name 'w' is not defined

In [10]:
def conv_forward(x, w, b):
  N, C, H, W = x.shape 
  F, _, FH, FW = w.shape

  # other parameters. Set so that the input shape remains unchanged 
  stride = 1                  # stride to apply filter
  padding =  (FH - 1) // 2     # padding on each side 

  out = np.zeros((N, F, H, W))

  padded_x = np.pad(x, ((0, 0), (0, 0), (padding, padding), (padding, padding)), mode='constant')
  _, _, padded_H, padded_W = padded_x.shape

  x_col = np.zeros((C * FH * FW, H * W))
  w_row = w.reshape(F, C * FH * FW)

  for i in range(N):
    c = 0
    for j in range(0, padded_H - FH + 1, stride):
      for k in range(0, padded_W - FW + 1, stride):
        x_col[:, c] = padded_x[i, :, j:j+FH, k:k+FW].reshape(C * FH * FW)
        c += 1
    out[i, :] = (np.dot(w_row, x_col) + b.reshape(-1, 1)).reshape(F, H, W)
  
  cache = (x, w, b, stride, padding)
  return out, cache 