In [1]:
import numpy as np

In [2]:
arr1 = np.array([1, 2, 3])
arr2 = np.array([[4, 5, 6], [7, 8, 9]])
arr3 = arr1*arr2
print(arr3)

[[ 4 10 18]
 [ 7 16 27]]


In [3]:
a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
b = a.reshape(1, -1)
print(b)

[[1 2 3 4 5 6 7 8 9]]


In [5]:
class MultiLayerPerceptron:
    def __init__(self, layer_dims, learning_rate=0.01):
        self.layer_dims = layer_dims
        self.learning_rate = learning_rate
        self.parameters = {}
        self.caches = []
        
        # 初始化参数
        for l in range(1, len(layer_dims)):
            self.parameters[f"W{l}"] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
            self.parameters[f"b{l}"] = np.zeros((layer_dims[l], 1))
    
    def relu(self, Z):
        return np.maximum(0, Z)
    
    def sigmoid(self, Z):
        return 1 / (1 + np.exp(-Z))
    
    def sigmoid_derivative(self, A):
        return A * (1 - A)
    
    def relu_derivative(self, Z):
        return np.where(Z > 0, 1, 0)
    
    def forward_activation(self, A_prev, W, b, activation):
        Z = np.dot(W, A_prev) + b
        if activation == "relu":
            A = self.relu(Z)
        elif activation == "sigmoid":
            A = self.sigmoid(Z)
        cache = (A_prev, W, b, Z)
        return A, cache
    
    def forward_propagation(self, X):
        A = X
        for l in range(1, len(self.layer_dims)):
            A_prev = A
            A, cache = self.forward_activation(A_prev, self.parameters[f"W{l}"], self.parameters[f"b{l}"], "relu" if l < len(self.layer_dims) - 1 else "sigmoid")
            self.caches.append(cache)
        return A
    
    def compute_loss(self, Y, AL):
        m = Y.shape[1]
        loss = -1/m * (np.dot(Y, np.log(AL).T) + np.dot(1-Y, np.log(1-AL).T))
        return np.squeeze(loss)
    
    def backward_activation(self, dA, cache, activation):
        A_prev, W, b, Z = cache
        m = A_prev.shape[1]
        
        if activation == "relu":
            dZ = dA * self.relu_derivative(Z)
        elif activation == "sigmoid":
            dZ = dA * self.sigmoid_derivative(A_prev)
        
        dW = 1/m * np.dot(dZ, A_prev.T)
        db = 1/m * np.sum(dZ, axis=1, keepdims=True)
        dA_prev = np.dot(W.T, dZ)
        
        return dA_prev, dW, db
    
    def backward_propagation(self, Y):
        grads = {}
        m = Y.shape[1]
        L = len(self.layer_dims) - 1
        AL = self.caches[-1][0]
        dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
        current_cache = self.caches[-1]
        grads[f"dA{L-1}"], grads[f"dW{L}"], grads[f"db{L}"] = self.backward_activation(dAL, current_cache, "sigmoid")
        
        for l in reversed(range(L-1)):
            current_cache = self.caches[l]
            dA_prev_temp, dW_temp, db_temp = self.backward_activation(grads[f"dA{l+1}"], current_cache, "relu")
            grads[f"dA{l}"] = dA_prev_temp
            grads[f"dW{l+1}"] = dW_temp
            grads[f"db{l+1}"] = db_temp
        
        for l in range(1, L+1):
            self.parameters[f"W{l}"] -= self.learning_rate * grads[f"dW{l}"]
            self.parameters[f"b{l}"] -= self.learning_rate * grads[f"db{l}"]
            
    def train(self, X, Y, epochs=1000):
        for i in range(epochs):
            AL = self.forward_propagation(X)
            loss = self.compute_loss(Y, AL)
            self.backward_propagation(Y)
            if i % 100 == 0:
                print(f'Epoch {i}, Loss: {loss}')
    
    def predict(self, X):
        AL = self.forward_propagation(X)
        return np.round(AL)

In [6]:
# 生成示例数据
np.random.seed(0)
X = np.random.randn(2, 500)
Y = np.array([0 if x[0]**2 + x[1]**2 < 1 else 1 for x in X.T]).reshape(1, -1)

# 创建神经网络模型
model = MultiLayerPerceptron(layer_dims=[2, 4, 3, 1], learning_rate=0.01)

# 训练模型
model.train(X, Y, epochs=2000)

# 预测结果
predictions = model.predict(X)
print("Accuracy: " + str(np.mean(predictions == Y)))


  dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
  dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
  dZ = dA * self.sigmoid_derivative(A_prev)


ValueError: shapes (3,1) and (3,500) not aligned: 1 (dim 1) != 3 (dim 0)