## Imports

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib.gridspec import GridSpec
from matplotlib.patches import Circle
from matplotlib.colors import Normalize

## Generate synthetic data


In [2]:
np.random.seed(0)
X = np.linspace(-1, 1, 100).reshape(-1, 1)
y = X**2 + np.random.normal(0, 0.1, X.shape)
print("X shape:", X.shape)
print("y shape:", y.shape)
print(X[:5])
print(y[:5])

X shape: (100, 1)
y shape: (100, 1)
[[-1.        ]
 [-0.97979798]
 [-0.95959596]
 [-0.93939394]
 [-0.91919192]]
[[1.17640523]
 [1.0000198 ]
 [1.0186982 ]
 [1.10655029]
 [1.03166958]]


## Initialize weights and biases


In [3]:
w1 = np.random.randn(1, 10) * 0.5
b1 = np.zeros((1, 10))
w2 = np.random.randn(10, 1) * 0.5
b2 = np.zeros((1, 1))

print("w1: ",w1,"\n")
print("b1: ",b1,"\n")
print("w2: ",w2,"\n")
print("b2: ",b2,"\n")

w1:  [[ 0.94157535 -0.67387953 -0.6352425   0.48469835 -0.5865617   0.97181059
  -0.20680949 -0.37372741  0.96147101  0.7402574 ]] 

b1:  [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]] 

w2:  [[ 0.93377948]
 [ 0.45302233]
 [-0.43061284]
 [ 0.95503248]
 [-0.13400169]
 [ 0.4012282 ]
 [ 0.47362598]
 [-0.07750505]
 [ 0.30703969]
 [ 0.46110334]] 

b2:  [[0.]] 



## Define Variables

In [4]:
learning_rate = 0.1
display_interval = 5
history = []
loss_history = []
grad_history = []

## Activation function (ReLU)


In [5]:
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

## Training loop


In [6]:
for epoch in range(101):
    # Forward pass
    z1 = X @ w1 + b1
    a1 = relu(z1)
    z2 = a1 @ w2 + b2
    y_pred = z2

    # Compute loss (MSE)
    loss = np.mean((y_pred - y) ** 2)
    loss_history.append(loss)

    # Backpropagation
    dloss = 2 * (y_pred - y) / y.shape[0]
    dw2 = a1.T @ dloss
    db2 = np.sum(dloss, axis=0, keepdims=True)
    da1 = dloss @ w2.T
    dz1 = da1 * relu_derivative(z1)
    dw1 = X.T @ dz1
    db1 = np.sum(dz1, axis=0, keepdims=True)

    if epoch % display_interval == 0:
        grad_history.append((dw1.copy(), db1.copy(), dw2.copy(), db2.copy()))

    # Update weights
    w1 -= learning_rate * dw1
    b1 -= learning_rate * db1
    w2 -= learning_rate * dw2
    b2 -= learning_rate * db2

    if epoch % display_interval == 0:
        history.append((w1.copy(), b1.copy(), w2.copy(), b2.copy(), loss))


## Set Up the Figure

In [7]:
fig = plt.figure(figsize=(22, 15))
gs = GridSpec(3, 3, figure=fig)

<Figure size 2200x1500 with 0 Axes>

## Subplots


In [8]:
ax_data = fig.add_subplot(gs[0:2, 0])
ax_loss = fig.add_subplot(gs[2, 0])
ax_net = fig.add_subplot(gs[0:2, 1])
ax_w1 = fig.add_subplot(gs[0, 2])
ax_w2 = fig.add_subplot(gs[1, 2])
ax_b = fig.add_subplot(gs[2, 1])
ax_grad = fig.add_subplot(gs[2, 2])


## Color scheme explanation


In [9]:
color_info = """
Color Coding:
- Forward Connections:
  Blue (Negative weights) to Red (Positive weights)
- Backward Flow: Purple dotted lines
- Neurons:
  Skyblue (Input), Lightgreen (Hidden), Salmon (Output)
- Weight Values:
  Black text (Hidden), White text (Output)
"""
fig.text(0.80, 0.01, color_info, fontsize=10, bbox=dict(facecolor='white', alpha=0.7))

Text(0.8, 0.01, '\nColor Coding:\n- Forward Connections:\n  Blue (Negative weights) to Red (Positive weights)\n- Backward Flow: Purple dotted lines\n- Neurons:\n  Skyblue (Input), Lightgreen (Hidden), Salmon (Output)\n- Weight Values:\n  Black text (Hidden), White text (Output)\n')

## Color normalization for weights


In [10]:
w_min = min(np.min(w1), np.min(w2))
w_max = max(np.max(w1), np.max(w2))
norm = Normalize(vmin=w_min, vmax=w_max)


## Update function

In [11]:
def update(frame):
    for ax in [ax_data, ax_loss, ax_net, ax_w1, ax_w2, ax_b, ax_grad]:
        ax.clear()

    w1, b1, w2, b2, loss = history[frame]
    dw1, db1, dw2, db2 = grad_history[frame]
    epoch = frame * display_interval

    # 1. Data and prediction plot
    ax_data.scatter(X, y, color='blue', alpha=0.3, label='True Data')
    y_pred = relu(X @ w1 + b1) @ w2 + b2
    ax_data.plot(X, y_pred, color='red', linewidth=2, label='Prediction')
    ax_data.set_title(f'Data Fit (Epoch {epoch})\nLoss: {loss:.4f}', fontsize=12)
    ax_data.legend()
    ax_data.grid(True)

    # 2. Loss curve
    ax_loss.plot(loss_history[:epoch+1], 'b-', label='Loss')
    ax_loss.scatter(epoch, loss, color='red', s=50)
    ax_loss.set_title('Training Loss', fontsize=12)
    ax_loss.set_xlabel('Epoch')
    ax_loss.grid(True)
    ax_loss.set_xlim(0, 100)
    ax_loss.set_ylim(0, max(loss_history)*1.1)

    # 3. Network architecture
    ax_net.set_title(f'Network Architecture (Epoch {epoch})', fontsize=12)
    ax_net.set_xlim(-2.5, 2.5)
    ax_net.set_ylim(-1, 11)
    ax_net.axis('off')

    layer_spacing = 1.5

    # Input layer with weight values
    input_circle = Circle((-layer_spacing, 5), 0.4, color='skyblue', alpha=0.8)
    ax_net.add_patch(input_circle)
    ax_net.text(-layer_spacing, 5, 'Input\n(x)', ha='center', va='center')

    # Hidden layer with weights inside
    for i in range(10):
        hidden_circle = Circle((0, i), 0.4, color='lightgreen', alpha=0.8)
        ax_net.add_patch(hidden_circle)

        # Display weights inside hidden neurons
        ax_net.text(0, i, f'h{i+1}\nw2={w2[i,0]:.2f}\nb={b1[0,i]:.2f}',
                   ha='center', va='center', fontsize=9,
                   bbox=dict(facecolor='white', alpha=0.7, boxstyle='round'))

        # Forward connections with weight values
        weight = w1[0, i]
        color = plt.cm.coolwarm(norm(weight))
        ax_net.plot([-layer_spacing+0.4, -0.4], [5, i],
                   color=color, linewidth=abs(weight)*5)
        ax_net.text((-layer_spacing)/2, (5+i)/2, f'{weight:.2f}',
                   ha='center', va='center', fontsize=8,
                   bbox=dict(facecolor='white', alpha=0.7))

    # Output layer with weights
    output_circle = Circle((layer_spacing, 5), 0.4, color='salmon', alpha=0.8)
    ax_net.add_patch(output_circle)
    ax_net.text(layer_spacing, 5, f'Output\n(ŷ)\nb={b2[0,0]:.2f}',
               ha='center', va='center',
               bbox=dict(facecolor='white', alpha=0.7, boxstyle='round'))

    # Forward connections (hidden to output) with weight signs
    for i in range(10):
        weight = w2[i, 0]
        color = plt.cm.coolwarm(norm(weight))
        ax_net.plot([0.4, layer_spacing-0.4], [i, 5],
                   color=color, linewidth=abs(weight)*5)

        # Add weight sign indicators
        weight_sign = '+' if weight > 0 else '-'
        ax_net.text(layer_spacing/2, (5+i)/2, weight_sign,
                   ha='center', va='center', fontsize=12,
                   color='white' if weight > 0 else 'black',
                   bbox=dict(facecolor=color, alpha=0.7, boxstyle='circle'))

    # Backpropagation flow
    for i in range(10):
        ax_net.plot([layer_spacing-0.4, 0.4], [5, i],
                   color='purple', linestyle=':', alpha=0.7, linewidth=1.5)
        ax_net.text(layer_spacing/2+0.2, (5+i)/2, f'∂h{i+1}',
                   ha='center', va='center', color='purple', fontsize=9)

    # 4. Enhanced Weight matrices with values
    # W1 weights with values
    im1 = ax_w1.imshow(w1.T, cmap='coolwarm', aspect='auto', norm=norm)
    ax_w1.set_title('Input → Hidden Weights', fontsize=12)
    ax_w1.set_ylabel('Hidden Units')
    ax_w1.set_xticks([])

    # Add weight values to cells
    for i in range(10):
        ax_w1.text(0, i, f'{w1[0,i]:.2f}',
                  ha='center', va='center',
                  color='white' if abs(w1[0,i]) > (w_max-w_min)/2 else 'black')

    # W2 weights with positive/negative indicators
    im2 = ax_w2.imshow(w2, cmap='coolwarm', aspect='auto', norm=norm)
    ax_w2.set_title('Hidden → Output Weights', fontsize=12)
    ax_w2.set_xlabel('Output Unit')
    ax_w2.set_ylabel('Hidden Units')

    # Add weight values with sign indicators
    for i in range(10):
        weight = w2[i,0]
        ax_w2.text(0, i, f'{"+" if weight>0 else ""}{weight:.2f}',
                  ha='center', va='center',
                  color='white' if abs(weight) > (w_max-w_min)/2 else 'black')

    # 5. Biases with color coding
    colors = ['green' if b < 0 else 'red' for b in b1[0]]
    ax_b.bar(np.arange(10)-0.2, b1[0], width=0.4, label='Hidden Biases', color=colors)
    ax_b.bar(10.8, b2[0,0], width=0.4,
            label='Output Bias',
            color='green' if b2[0,0] < 0 else 'red')
    ax_b.set_title('Bias Values', fontsize=12)
    ax_b.set_xticks(np.arange(11))
    ax_b.set_xticklabels([f'h{i+1}' for i in range(10)] + ['out'])
    ax_b.legend()
    ax_b.grid(True)

    # 6. Gradients with magnitude indicators
    grad_values = np.concatenate([dw1.flatten(), [dw2.mean()]])
    colors = ['blue' if g < 0 else 'red' for g in grad_values]
    ax_grad.bar(np.arange(11), grad_values, color=colors)
    ax_grad.set_title('Weight Gradients', fontsize=12)
    ax_grad.set_xticks(np.arange(11))
    ax_grad.set_xticklabels([f'w1_{i+1}' for i in range(10)] + ['w2_avg'])
    ax_grad.grid(True)

    # Add gradient values
    for i, val in enumerate(grad_values):
        ax_grad.text(i, val/2, f'{val:.2f}',
                    ha='center', va='center',
                    color='white' if abs(val) > max(abs(grad_values))/2 else 'black')

    plt.tight_layout()

## Create and save animation


In [12]:
ani = animation.FuncAnimation(fig, update, frames=len(history), interval=500)
ani.save('NN_vis.gif', writer='pillow', dpi=120, fps=2)
plt.draw()
plt.show()

<Figure size 640x480 with 0 Axes>