# Imports

In [1]:
import numpy as np

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

# Data

In [29]:
# Create a training set of 1024 examples.
# For instance, side lengths between 0 and 10.
np.random.seed(509)  # Fix the random state for reproducibility
num_samples = 2**20
x_train = np.random.uniform(low=0, high=10, size=(num_samples, 2)).astype(np.float32)
y_train = np.expand_dims(np.prod(x_train, axis=1), axis=-1)  # area = side1 * side2

# samples
print(f"Data: {x_train.shape}")
print(x_train[1])
print(y_train[1])

Data: (1048576, 2)
[7.0583777 3.7767043]
[26.657406]


# Model

In [31]:
def square_activation(x):
    return tf.math.pow(x, 2)

model = Sequential([
    Dense(2, activation=square_activation, input_shape=(2,), use_bias=False),
    Dense(1, use_bias=False)
])

model.summary()


In [32]:
model.compile(
    loss='mean_squared_error',
    optimizer=keras.optimizers.Adam(0.1),
    metrics=['mae']  # Mean Absolute Error for additional monitoring
    
)


In [34]:
history = model.fit(
    x_train, y_train,
    epochs=700,
    batch_size=2**12,
    callbacks=[tf.keras.callbacks.EarlyStopping(patience=10, monitor='mae', min_delta=0.001),
               tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=3, monitor='mae')],
    verbose=1
)


Epoch 1/700


[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 715.2131 - mae: 12.1323 - learning_rate: 0.1000
Epoch 2/700
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 10.0334 - mae: 2.0326 - learning_rate: 0.1000
Epoch 3/700
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 2.7672e-05 - mae: 0.0021 - learning_rate: 0.1000
Epoch 4/700
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 7.6108e-11 - mae: 5.9704e-06 - learning_rate: 0.1000
Epoch 5/700
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 6.9763e-11 - mae: 5.8064e-06 - learning_rate: 0.1000
Epoch 6/700
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 5.7160e-11 - mae: 5.1737e-06 - learning_rate: 0.1000
Epoch 7/700
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 4.2379e-11 - mae: 4.3911e-06 - learning_rate:

In [35]:
# Test with a known example: side lengths 3 and 4 (expected area = 12)
test_input = np.array([[3, 4]], dtype=np.float32)
predicted_area = model.predict(test_input)
print("Input sides:", test_input)
print("Predicted area:", predicted_area[0, 0])


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step
Input sides: [[3. 4.]]
Predicted area: 12.000002


# Weights

In [36]:
# print the weights
for layer in model.layers:
    print(layer.get_weights())

[array([[-0.6046531,  0.4209206],
       [-1.11246  , -0.7744229]], dtype=float32)]
[array([[ 0.37166297],
       [-0.76694095]], dtype=float32)]


In [37]:
print("=== Weights for the First Layer (Dense with square activation) ===")
W1 = model.layers[0].get_weights()
print("W1:\n", W1)

print("\n=== Weights for the Second Layer (Output Dense Layer) ===")
W2 = model.layers[1].get_weights()
print("W2:\n", W2)

# Now, let's manually calculate the output for an example input.
# We choose an input of two side lengths. For example:
input_example = np.array([[3, 4]], dtype=np.float32)  # shape (1,2)
print("\nInput example:", input_example)

# ---- Step 1: First Layer Linear Combination ----
# Compute z1 = input * W1 + b1
z1 = np.dot(input_example, W1)
print("\nStep 1: Compute z1 (linear output of first layer):")
print("z1 =", z1)

# ---- Step 2: Apply the Squaring Activation ----
# Our custom activation squares each element: a1 = z1^2 (elementwise)
a1 = np.power(z1, 2)
print("\nStep 2: Apply square activation (a1 = z1^2 elementwise):")
print("a1 =", a1)

# ---- Step 3: Second Layer Calculation ----
# Compute the final output: output = a1 * W2 + b2
output_manual = np.dot(a1, W2)
print("\nStep 3: Compute final output (a1 dot W2 + b2):")
print("Output =", output_manual)

# Compare with model.predict
output_model = model.predict(input_example)
print("\nOutput from model.predict:", output_model)

=== Weights for the First Layer (Dense with square activation) ===
W1:
 [array([[-0.6046531,  0.4209206],
       [-1.11246  , -0.7744229]], dtype=float32)]

=== Weights for the Second Layer (Output Dense Layer) ===
W2:
 [array([[ 0.37166297],
       [-0.76694095]], dtype=float32)]

Input example: [[3. 4.]]

Step 1: Compute z1 (linear output of first layer):
z1 = [[[-6.2637997 -1.8349297]]]

Step 2: Apply square activation (a1 = z1^2 elementwise):
a1 = [[[39.235188  3.366967]]]

Step 3: Compute final output (a1 dot W2 + b2):
Output = [[[[12.000002]]]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step

Output from model.predict: [[12.000002]]
