In [36]:
import numpy as np
import sys
import matplotlib.pyplot as plt

from lib import (
    Net, NetConfig,
    SymbolicActivation, SymbolicLoss, WeightInit,
    CheckpointConfig, compute_data_hash
)

In [37]:
# Generate training data
np.random.seed(42)

q1 = np.random.uniform(-1, 1, 1000)
q2 = np.random.uniform(-1, 1, 1000)

def target_function(q1, q2):
    X1 = q1 + (q2**2) - 1
    X2 = q1 - q2 + 1
    return X1, X2

X1, X2 = target_function(q1, q2)

inputs = [[q1[i], q2[i]] for i in range(len(q1))]
targets = [[X1[i], X2[i]] for i in range(len(X1))]

inputs = np.array(inputs)
targets = np.array(targets)

input_list = [inputs[i] for i in range(len(inputs))]
target_list = [targets[i] for i in range(len(targets))]

print(f"Generated {len(inputs)} training samples")
print(f"Input shape: {inputs.shape}")
print(f"Target shape: {targets.shape}")

Generated 1000 training samples
Input shape: (1000, 2)
Target shape: (1000, 2)


In [38]:
# Compute hash for training data (enables checkpoint sharing)
data_hash = compute_data_hash(input_list, target_list)
print(f"Data hash: {data_hash}")

# Configure checkpointing
checkpoint_config = CheckpointConfig(
    enabled=True,
    cache_dir='.net_cache',
    checkpoint_interval=20,
    data_hash=data_hash,
    overwrite=False
)
print("Checkpointing enabled")

Data hash: a1ef014a45550963
Checkpointing enabled


In [39]:
config = NetConfig(hidden_activation=SymbolicActivation.tanh(), output_activation=SymbolicActivation.linear(), 
    n_inputs=2,
    n_outputs=2,
    n_hidden_layers=1,
    n_neurons_per_hidden=10,
    loss_func=SymbolicLoss.mse(),
    weight_init=WeightInit.XAVIER,
    bias_init_std=0.01,
    seed=0
)

net = Net.fully_connected(config)

result = net.train(
    inputs=input_list,
    targets=target_list,
    learning_rate=0.01,
    epochs=100000,
    batch_size=32,
    checkpoint_config=checkpoint_config
)

Loaded checkpoint from epoch 110220, returning first 100000 epochs of loss history.


In [40]:
net

Net(layers=[Layer(activation_func=SymbolicActivation(expression=tanh(z), derivative=1 - tanh(z)**2, forward_func=<function _lambdifygenerated at 0x7f354d5c2200>, deriv_func=<function _lambdifygenerated at 0x7f354d5c02c0>, name='tanh'), neurons=[Neuron(bias=np.float64(0.4687027593493743), fixed_bias=False, connections=[Connection(from_index=0, weight=np.float64(0.27166931845108494), fixed_weight=False), Connection(from_index=1, weight=np.float64(-0.004254184162832678), fixed_weight=False)]), Neuron(bias=np.float64(-0.5422392385789817), fixed_bias=False, connections=[Connection(from_index=0, weight=np.float64(-0.31620828001157025), fixed_weight=False), Connection(from_index=1, weight=np.float64(0.15184211808051565), fixed_weight=False)]), Neuron(bias=np.float64(0.1537382973415393), fixed_bias=False, connections=[Connection(from_index=0, weight=np.float64(0.0917804000905386), fixed_weight=False), Connection(from_index=1, weight=np.float64(-0.370437651978326), fixed_weight=False)]), Neuron

## Weights and Biases

In [41]:
# Extract weights and biases from the trained network
print("Network Architecture:")
print(f"  Input neurons: {config.n_inputs}")
print(f"  Hidden layers: {config.n_hidden_layers}")
print(f"  Neurons per hidden layer: {config.n_neurons_per_hidden}")
print(f"  Output neurons: {config.n_outputs}")
print()

# Convert network to array format
weights, biases = net._to_arrays()

# Input -> Hidden layer 1
W1 = weights[0].T  # Transpose to get (n_inputs, n_hidden)
b1 = biases[0]     # Shape: (n_hidden,)

print("=" * 80)
print("Layer 1: Input -> Hidden (tanh activation)")
print("=" * 80)
print(f"\nWeights W1 (shape {W1.shape}):")
print(W1)
print(f"\nBiases b1 (shape {b1.shape}):")
print(b1)
print()

# Hidden -> Output layer
W2 = weights[1].T  # Transpose to get (n_hidden, n_outputs)
b2 = biases[1]     # Shape: (n_outputs,)

print("=" * 80)
print("Layer 2: Hidden -> Output (linear activation)")
print("=" * 80)
print(f"\nWeights W2 (shape {W2.shape}):")
print(W2)
print(f"\nBiases b2 (shape {b2.shape}):")
print(b2)

Network Architecture:
  Input neurons: 2
  Hidden layers: 1
  Neurons per hidden layer: 10
  Output neurons: 2

Layer 1: Input -> Hidden (tanh activation)

Weights W1 (shape (2, 10)):
[[ 0.27166932 -0.31620828  0.0917804   0.32567764 -0.16074913 -0.48834191
   0.0144965   0.04745527 -0.02112384 -0.20787654]
 [-0.00425418  0.15184212 -0.37043765 -0.17241109 -0.23108189  0.20638085
   1.35953507  0.78044253  1.13710633  0.01425111]]

Biases b1 (shape (10,)):
[ 0.46870276 -0.54223924  0.1537383   0.50365899  0.21451754  0.82761637
 -1.51660902 -0.13699666  1.27016473 -0.23640293]

Layer 2: Hidden -> Output (linear activation)

Weights W2 (shape (10, 2)):
[[ 0.63124831  0.71249896]
 [-0.41419734 -1.07696792]
 [ 0.05287032  0.66006111]
 [ 1.22647416  0.59282931]
 [-0.45715935 -0.58104125]
 [-0.93861434 -0.95576768]
 [ 1.41523508 -0.18726013]
 [ 0.54715137 -0.54881026]
 [-1.64351412 -0.34308421]
 [-0.45460746 -0.30192443]]

Biases b2 (shape (2,)):
[1.33423875 0.52972723]


## Mathematical Equations for Each Output

In [42]:
# Build mathematical equations for the network
# Network: q1, q2 -> 10 hidden neurons (tanh) -> X1, X2 (linear)

print("Network Computation:")
print("=" * 80)
print("\nStep 1: Compute hidden layer activations (h0 to h9)")
print("-" * 80)

for i in range(10):
    w1_q1 = W1[0, i]
    w1_q2 = W1[1, i]
    bias = b1[i]
    
    print(f"\nh{i} = tanh({w1_q1:.8f} * q1 + {w1_q2:.8f} * q2 + {bias:.8f})")

print("\n" + "=" * 80)
print("\nStep 2: Compute output X1")
print("-" * 80)
print("\nX1 = ", end="")

terms = []
for i in range(10):
    w = W2[i, 0]
    terms.append(f"({w:.8f} * h{i})")

print(" + ".join(terms))
print(f"     + {b2[0]:.8f}")

print("\n" + "=" * 80)
print("\nStep 3: Compute output X2")
print("-" * 80)
print("\nX2 = ", end="")

terms = []
for i in range(10):
    w = W2[i, 1]
    terms.append(f"({w:.8f} * h{i})")

print(" + ".join(terms))
print(f"     + {b2[1]:.8f}")

Network Computation:

Step 1: Compute hidden layer activations (h0 to h9)
--------------------------------------------------------------------------------

h0 = tanh(0.27166932 * q1 + -0.00425418 * q2 + 0.46870276)

h1 = tanh(-0.31620828 * q1 + 0.15184212 * q2 + -0.54223924)

h2 = tanh(0.09178040 * q1 + -0.37043765 * q2 + 0.15373830)

h3 = tanh(0.32567764 * q1 + -0.17241109 * q2 + 0.50365899)

h4 = tanh(-0.16074913 * q1 + -0.23108189 * q2 + 0.21451754)

h5 = tanh(-0.48834191 * q1 + 0.20638085 * q2 + 0.82761637)

h6 = tanh(0.01449650 * q1 + 1.35953507 * q2 + -1.51660902)

h7 = tanh(0.04745527 * q1 + 0.78044253 * q2 + -0.13699666)

h8 = tanh(-0.02112384 * q1 + 1.13710633 * q2 + 1.27016473)

h9 = tanh(-0.20787654 * q1 + 0.01425111 * q2 + -0.23640293)


Step 2: Compute output X1
--------------------------------------------------------------------------------

X1 = (0.63124831 * h0) + (-0.41419734 * h1) + (0.05287032 * h2) + (1.22647416 * h3) + (-0.45715935 * h4) + (-0.93861434 * h5) + (1.4

# Forward

In [43]:
def forward_manual(q1, q2, W1, b1, W2, b2):
    """
    Manual forward pass through the network.
    
    Args:
        q1: First input value
        q2: Second input value
        W1: Weights from input to hidden layer (shape: 2 x 10)
        b1: Biases for hidden layer (shape: 10)
        W2: Weights from hidden to output layer (shape: 10 x 2)
        b2: Biases for output layer (shape: 2)
    
    Returns:
        (X1, X2): The two output values
    """
    # Input vector
    input_vec = np.array([q1, q2])
    
    # Layer 1: Input -> Hidden (with tanh activation)
    hidden_pre = W1.T @ input_vec + b1  # Shape: (10,)
    hidden = np.tanh(hidden_pre)
    
    # Layer 2: Hidden -> Output (linear activation)
    output = W2.T @ hidden + b2  # Shape: (2,)
    
    X1, X2 = output[0], output[1]
    return X1, X2

# Test the function
print("Testing the manual forward function:")
print("=" * 80)

test_inputs = [
    (0.5, 0.3),
    (-0.8, 0.6),
    (0.0, 0.0),
    (1.0, -1.0)
]

for q1_test, q2_test in test_inputs:
    X1_manual, X2_manual = forward_manual(q1_test, q2_test, W1, b1, W2, b2)
    net_output = net.forward(np.array([q1_test, q2_test]))
    # net.forward returns (activations, pre_activations), get final activation
    final_output = net_output[0][-1]
    X1_net, X2_net = final_output[0], final_output[1]
    
    print(f"\nInput: q1={q1_test:6.2f}, q2={q2_test:6.2f}")
    print(f"  Manual function:     X1={X1_manual:9.6f}, X2={X2_manual:9.6f}")
    print(f"  Network .forward():  X1={X1_net:9.6f}, X2={X2_net:9.6f}")
    print(f"  Difference:          ΔX1={abs(X1_manual-X1_net):9.6e}, ΔX2={abs(X2_manual-X2_net):9.6e}")

Testing the manual forward function:

Input: q1=  0.50, q2=  0.30
  Manual function:     X1=-0.409791, X2= 1.200539
  Network .forward():  X1=-0.409791, X2= 1.200539
  Difference:          ΔX1=0.000000e+00, ΔX2=0.000000e+00

Input: q1= -0.80, q2=  0.60
  Manual function:     X1=-1.442128, X2=-0.400750
  Network .forward():  X1=-1.442128, X2=-0.400750
  Difference:          ΔX1=0.000000e+00, ΔX2=0.000000e+00

Input: q1=  0.00, q2=  0.00
  Manual function:     X1=-0.998109, X2= 1.000284
  Network .forward():  X1=-0.998109, X2= 1.000284
  Difference:          ΔX1=0.000000e+00, ΔX2=0.000000e+00

Input: q1=  1.00, q2= -1.00
  Manual function:     X1= 0.985525, X2= 2.993411
  Network .forward():  X1= 0.985525, X2= 2.993411
  Difference:          ΔX1=0.000000e+00, ΔX2=0.000000e+00


## Example: Computing Outputs for Specific Input

In [44]:
# Example: Compute outputs for q1=0.5, q2=0.3
q1_example = 0.5
q2_example = 0.3

print(f"Computing outputs for q1={q1_example}, q2={q2_example}")
print("=" * 80)

# Compute hidden layer activations
print("\nHidden layer activations:")
hidden_pre = W1.T @ np.array([q1_example, q2_example]) + b1
hidden = np.tanh(hidden_pre)
for i, h in enumerate(hidden):
    print(f"  h{i} = {h:.8f}")

# Compute outputs
output = W2.T @ hidden + b2
X1_result, X2_result = output[0], output[1]

print(f"\nFinal outputs:")
print(f"  X1 = {X1_result:.8f}")
print(f"  X2 = {X2_result:.8f}")

# Compare with target function
X1_target, X2_target = target_function(q1_example, q2_example)
print(f"\nTarget values (from original function):")
print(f"  X1 = {X1_target:.8f}")
print(f"  X2 = {X2_target:.8f}")

print(f"\nError:")
print(f"  ΔX1 = {abs(X1_result - X1_target):.8f}")
print(f"  ΔX2 = {abs(X2_result - X2_target):.8f}")

Computing outputs for q1=0.5, q2=0.3

Hidden layer activations:
  h0 = 0.53936607
  h1 = -0.57488623
  h2 = 0.08826689
  h3 = 0.54747925
  h4 = 0.06472778
  h5 = 0.56853784
  h6 = -0.80103727
  h7 = 0.12027862
  h8 = 0.92177907
  h9 = -0.32396065

Final outputs:
  X1 = -0.40979123
  X2 = 1.20053870

Target values (from original function):
  X1 = -0.41000000
  X2 = 1.20000000

Error:
  ΔX1 = 0.00020877
  ΔX2 = 0.00053870
