In [4]:
import numpy as np

def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

def d_sigmoid(a):
    return a * (1.0 - a)

class DenseLayer:
    """A fully-connected layer with sigmoid activation."""
    def __init__(self, n_in, n_out, lr=0.9, W=None, b=None, name=""):
        self.n_in = n_in
        self.n_out = n_out
        self.lr = lr
        self.name = name

        # Parameters (attributes)
        self.W = np.array(W, dtype=float) if W is not None else np.random.randn(n_in, n_out) * 0.1
        self.b = np.array(b, dtype=float) if b is not None else np.zeros(n_out)

        # Caches for backprop
        self.X = None          # input to this layer
        self.Z = None          # net input
        self.A = None          # activation (output)

    # ---- forward & backward ----
    def forward(self, X):
        self.X = X                       # (batch, n_in)
        self.Z = X @ self.W + self.b     # (batch, n_out)
        self.A = sigmoid(self.Z)         # (batch, n_out)
        return self.A

    def backward_output(self, T):
        """Backprop for output layer: Err = O*(1-O)*(T-O). Updates params."""
        O = self.A
        delta = d_sigmoid(O) * (T - O)                # (batch, n_out)
        dW = self.X.T @ delta                         # (n_in, n_out)
        db = delta.sum(axis=0)                        # (n_out,)
        self.W += self.lr * dW
        self.b += self.lr * db
        return delta

    def backward_hidden(self, next_W, next_delta):
        """Backprop for hidden layer: Err = O*(1-O) * (next_delta @ next_W^T)."""
        O = self.A
        backflow = next_delta @ next_W.T              # (batch, n_out_this)
        delta = d_sigmoid(O) * backflow
        dW = self.X.T @ delta
        db = delta.sum(axis=0)
        self.W += self.lr * dW
        self.b += self.lr * db
        return delta

class NeuralNetwork321:
    """3-2-1 network: input(3) -> hidden(2) -> output(1) with sigmoid and backprop."""
    def __init__(self, lr=0.9, init_from_table=True):
        self.lr = lr

        if init_from_table:
            # From your table:
            W_ih = [[ 0.2, -0.3],
                    [ 0.4,  0.1],
                    [-0.5,  0.2]]
            b_h  = [-0.4, 0.2]
            W_ho = [[-0.3],
                    [-0.2]]
            b_o  = [0.1]
        else:
            W_ih = b_h = W_ho = b_o = None

        self.hidden = DenseLayer(3, 2, lr=lr, W=W_ih, b=b_h, name="hidden")
        self.out    = DenseLayer(2, 1, lr=lr, W=W_ho, b=b_o, name="output")

    # ---- API ----
    def forward(self, X):
        H = self.hidden.forward(X)
        O = self.out.forward(H)
        return O

    def backward_one_step(self, T):
        delta_out = self.out.backward_output(T)
        _ = self.hidden.backward_hidden(self.out.W, delta_out)
        return delta_out, _

    def train_one_step_verbose(self, X, T):
        # Forward
        O_hidden = self.hidden.forward(X)
        O_out = self.out.forward(O_hidden)

        # Print Table 6.4 (net inputs & outputs)
        I4, I5 = self.hidden.Z.ravel()
        I6 = self.out.Z.ravel()[0]
        O4, O5 = O_hidden.ravel()
        O6 = O_out.ravel()[0]
        print(f"Net inputs (I4, I5, I6): {I4:.3f}, {I5:.3f}, {I6:.3f}")
        print(f"Outputs    (O4, O5, O6): {O4:.3f}, {O5:.3f}, {O6:.3f}")

        # Backward
        delta_out = self.out.backward_output(T)                         # Err6
        delta_hid = self.hidden.backward_hidden(self.out.W, delta_out)  # Err4, Err5

        # Print Table 6.5 (errors)
        Err6 = float(delta_out.ravel()[0])
        Err4, Err5 = delta_hid.ravel()[0], delta_hid.ravel()[1]
        print(f"Err6: {Err6:.4f}")
        print(f"Err5: {Err5:.4f}, Err4: {Err4:.4f}")

        # Print Table 6.6 (updated params)
        W_ih, b_h = self.hidden.W, self.hidden.b
        W_ho, b_o = self.out.W, self.out.b
        print("\nUpdated weights/biases (η=0.9):")
        print(f"w46: {W_ho[0,0]:.3f}, w56: {W_ho[1,0]:.3f}")
        print("w14..w35 rows (x1,x2,x3 -> [4,5]):")
        print(np.round(W_ih, 3))
        print(f"θ6: {b_o[0]:.3f}")
        print(f"θ5: {b_h[1]:.3f}, θ4: {b_h[0]:.3f}")

# ------------------- demo -------------------
if __name__ == "__main__":
    # Input and target from your example
    X = np.array([[1.0, 0.0, 1.0]])   # (x1, x2, x3)
    T = np.array([[1.0]])             # target

    nn = NeuralNetwork321(lr=0.9, init_from_table=True)
    nn.train_one_step_verbose(X, T)


Net inputs (I4, I5, I6): -0.700, 0.100, -0.105
Outputs    (O4, O5, O6): 0.332, 0.525, 0.474
Err6: 0.1312
Err5: -0.0045, Err4: -0.0076

Updated weights/biases (η=0.9):
w46: -0.261, w56: -0.138
w14..w35 rows (x1,x2,x3 -> [4,5]):
[[ 0.193 -0.304]
 [ 0.4    0.1  ]
 [-0.507  0.196]]
θ6: 0.218
θ5: 0.196, θ4: -0.407


In [8]:
import numpy as np

def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

def d_sigmoid(a):
    return a * (1.0 - a)

class DenseLayer:
    def __init__(self, n_in, n_out, lr=0.9, W=None, b=None, name=""):
        self.n_in = n_in
        self.n_out = n_out
        self.lr = lr
        self.name = name
        self.W = np.array(W, dtype=float) if W is not None else np.random.randn(n_in, n_out) * 0.1
        self.b = np.array(b, dtype=float) if b is not None else np.zeros(n_out)
        self.X = None
        self.Z = None
        self.A = None

    def forward(self, X):
        self.X = X
        self.Z = X @ self.W + self.b
        self.A = sigmoid(self.Z)
        return self.A

    def backward_output(self, T):
        O = self.A
        delta = d_sigmoid(O) * (T - O)
        dW = self.X.T @ delta
        db = delta.sum(axis=0)
        self.W += self.lr * dW
        self.b += self.lr * db
        return delta

    def backward_hidden(self, next_W, next_delta):
        O = self.A
        backflow = next_delta @ next_W.T
        delta = d_sigmoid(O) * backflow
        dW = self.X.T @ delta
        db = delta.sum(axis=0)
        self.W += self.lr * dW
        self.b += self.lr * db
        return delta

class NeuralNetwork432:
    """4 -> 3 -> 2"""
    def __init__(self, lr=0.9, init_params=None):
        self.lr = lr
        # init_params: [(W_in_hidden, b_hidden), (W_hidden_out, b_out)]
        if init_params is not None:
            (W1, b1), (W2, b2) = init_params
        else:
            W1 = b1 = W2 = b2 = None
        self.hidden = DenseLayer(4, 3, lr=lr, W=W1, b=b1, name="hidden")
        self.out    = DenseLayer(3, 2, lr=lr, W=W2, b=b2, name="output")

    def forward(self, X):
        H = self.hidden.forward(X)
        O = self.out.forward(H)
        return O

    def backward_one_step(self, T):
        delta_out = self.out.backward_output(T)
        _ = self.hidden.backward_hidden(self.out.W, delta_out)
        return delta_out, _

    def train_one_step_verbose(self, X, T):
        # forward
        H = self.hidden.forward(X)
        O = self.out.forward(H)
        print("[Forward] hidden Z:", np.round(self.hidden.Z, 6))
        print("[Forward] hidden A:", np.round(self.hidden.A, 6))
        print("[Forward] output Z:", np.round(self.out.Z, 6))
        print("[Forward] output A:", np.round(self.out.A, 6))
        # backward (output)
        O_last = self.out.A
        delta_out = d_sigmoid(O_last) * (T - O_last)
        print("[Backward] output delta:", np.round(delta_out, 6))
        dW = self.out.X.T @ delta_out
        db = delta_out.sum(axis=0)
        self.out.W += self.lr * dW
        self.out.b += self.lr * db
        print("[Update] output W:\n", np.round(self.out.W, 6))
        print("[Update] output b:", np.round(self.out.b, 6))
        # backward (hidden)
        backflow = delta_out @ self.out.W.T
        delta_h = d_sigmoid(self.hidden.A) * backflow
        print("[Backward] hidden delta:", np.round(delta_h, 6))
        dW_h = self.hidden.X.T @ delta_h
        db_h = delta_h.sum(axis=0)
        self.hidden.W += self.lr * dW_h
        self.hidden.b += self.lr * db_h
        print("[Update] hidden W:\n", np.round(self.hidden.W, 6))
        print("[Update] hidden b:", np.round(self.hidden.b, 6))

if __name__ == "__main__":
    np.set_printoptions(suppress=True)

    # Example sample (batch=1): 4 inputs → 2 targets
    X = np.array([[1.0, 0.0, 1.0, 0.0]])  # shape (1,4)
    T = np.array([[1.0, 0.0]])            # shape (1,2)

    nn = NeuralNetwork432(lr=0.9)
    print("[Before] predict:", np.round(nn.forward(X), 6))
    nn.train_one_step_verbose(X, T)
    print("[After ] predict:", np.round(nn.forward(X), 6))


[Before] predict: [[0.524598 0.523482]]
[Forward] hidden Z: [[0.043064 0.091206 0.133167]]
[Forward] hidden A: [[0.510764 0.522786 0.533243]]
[Forward] output Z: [[0.098472 0.093998]]
[Forward] output A: [[0.524598 0.523482]]
[Backward] output delta: [[ 0.118563 -0.130582]]
[Update] output W:
 [[ 0.150261 -0.022465]
 [ 0.12236  -0.026617]
 [ 0.084575  0.04349 ]]
[Update] output b: [ 0.106707 -0.117524]
[Backward] hidden delta: [[0.005185 0.004486 0.001082]]
[Update] hidden W:
 [[ 0.052225  0.017541  0.110679]
 [-0.045499 -0.167539 -0.055455]
 [ 0.000171  0.081741  0.024436]
 [ 0.054336  0.033211  0.034501]]
[Update] hidden b: [0.004666 0.004038 0.000974]
[After ] predict: [[0.572847 0.470073]]


In [9]:
import numpy as np

def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

def d_sigmoid(a):
    return a * (1.0 - a)

class DenseLayer:
    def __init__(self, n_in, n_out, lr=0.9, W=None, b=None, name=""):
        self.n_in = n_in
        self.n_out = n_out
        self.lr = lr
        self.name = name
        self.W = np.array(W, dtype=float) if W is not None else np.random.randn(n_in, n_out) * 0.1
        self.b = np.array(b, dtype=float) if b is not None else np.zeros(n_out)
        self.X = None
        self.Z = None
        self.A = None

    def forward(self, X):
        self.X = X
        self.Z = X @ self.W + self.b
        self.A = sigmoid(self.Z)
        return self.A

    def backward_output(self, T):
        O = self.A
        delta = d_sigmoid(O) * (T - O)
        dW = self.X.T @ delta
        db = delta.sum(axis=0)
        self.W += self.lr * dW
        self.b += self.lr * db
        return delta

    def backward_hidden(self, next_W, next_delta):
        O = self.A
        backflow = next_delta @ next_W.T
        delta = d_sigmoid(O) * backflow
        dW = self.X.T @ delta
        db = delta.sum(axis=0)
        self.W += self.lr * dW
        self.b += self.lr * db
        return delta

class NeuralNetwork431:
    """4 -> 3 -> 1"""
    def __init__(self, lr=0.9, init_params=None):
        self.lr = lr
        if init_params is not None:
            (W1, b1), (W2, b2) = init_params
        else:
            W1 = b1 = W2 = b2 = None
        self.hidden = DenseLayer(4, 3, lr=lr, W=W1, b=b1, name="hidden")
        self.out    = DenseLayer(3, 1, lr=lr, W=W2, b=b2, name="output")

    def forward(self, X):
        H = self.hidden.forward(X)
        O = self.out.forward(H)
        return O

    def backward_one_step(self, T):
        delta_out = self.out.backward_output(T)
        _ = self.hidden.backward_hidden(self.out.W, delta_out)
        return delta_out, _

    def train_one_step_verbose(self, X, T):
        # forward
        H = self.hidden.forward(X)
        O = self.out.forward(H)
        print("[Forward] hidden Z:", np.round(self.hidden.Z, 6))
        print("[Forward] hidden A:", np.round(self.hidden.A, 6))
        print("[Forward] output Z:", np.round(self.out.Z, 6))
        print("[Forward] output A:", np.round(self.out.A, 6))
        # backward (output)
        O_last = self.out.A
        delta_out = d_sigmoid(O_last) * (T - O_last)
        print("[Backward] output delta:", np.round(delta_out, 6))
        dW = self.out.X.T @ delta_out
        db = delta_out.sum(axis=0)
        self.out.W += self.lr * dW
        self.out.b += self.lr * db
        print("[Update] output W:\n", np.round(self.out.W, 6))
        print("[Update] output b:", np.round(self.out.b, 6))
        # backward (hidden)
        backflow = delta_out @ self.out.W.T
        delta_h = d_sigmoid(self.hidden.A) * backflow
        print("[Backward] hidden delta:", np.round(delta_h, 6))
        dW_h = self.hidden.X.T @ delta_h
        db_h = delta_h.sum(axis=0)
        self.hidden.W += self.lr * dW_h
        self.hidden.b += self.lr * db_h
        print("[Update] hidden W:\n", np.round(self.hidden.W, 6))
        print("[Update] hidden b:", np.round(self.hidden.b, 6))

if __name__ == "__main__":
    np.set_printoptions(suppress=True)

    # Example sample (batch=1): 4 inputs → 1 target
    X = np.array([[0.0, 1.0, 1.0, 0.0]])  # shape (1,4)
    T = np.array([[1.0]])                 # shape (1,1)

    nn = NeuralNetwork431(lr=0.9)
    print("[Before] predict:", np.round(nn.forward(X), 6))
    nn.train_one_step_verbose(X, T)
    print("[After ] predict:", np.round(nn.forward(X), 6))


[Before] predict: [[0.518249]]
[Forward] hidden Z: [[-0.045214 -0.372256  0.008378]]
[Forward] hidden A: [[0.488698 0.407996 0.502094]]
[Forward] output Z: [[0.073029]]
[Forward] output A: [[0.518249]]
[Backward] output delta: [[0.120277]]
[Update] output W:
 [[0.011212]
 [0.024957]
 [0.255985]]
[Update] output b: [0.10825]
[Backward] hidden delta: [[0.000337 0.000725 0.007697]]
[Update] hidden W:
 [[ 0.123219  0.074843 -0.024056]
 [-0.01255  -0.221387  0.001769]
 [-0.032058 -0.149564  0.020464]
 [ 0.080371  0.057306 -0.033827]]
[Update] hidden b: [0.000303 0.000653 0.006927]
[After ] predict: [[0.563108]]
