In [1]:
import torch
import torch.nn.functional as F


torch.manual_seed(44)
# Define input tensor
x_in = torch.tensor([0.5, -1.0, 2.0], requires_grad=True)
error_grad = torch.tensor([1.0, 1.0, 1.0])  # Example gradient of loss w.r.t output

# Sigmoid
sigmoid_out = torch.sigmoid(x_in)
sigmoid_out.backward(error_grad, retain_graph=True)
sigmoid_grad = x_in.grad.clone()
x_in.grad.zero_()  # Reset gradient

# Tanh
tanh_out = torch.tanh(x_in)
tanh_out.backward(error_grad, retain_graph=True)
tanh_grad = x_in.grad.clone()
x_in.grad.zero_()

# ReLU
relu_out = F.relu(x_in)
relu_out.backward(error_grad, retain_graph=True)
relu_grad = x_in.grad.clone()

# Print results
print("Sigmoid Gradient:\n", sigmoid_grad)
print("Tanh Gradient:\n", tanh_grad)
print("ReLU Gradient:\n", relu_grad)


Sigmoid Gradient:
 tensor([0.2350, 0.1966, 0.1050])
Tanh Gradient:
 tensor([0.7864, 0.4200, 0.0707])
ReLU Gradient:
 tensor([1., 0., 1.])


2. ### Example of the sgmoid activation function in respect of (∂E/∂χ_in)

In [2]:
import torch

# Define input tensor
x_in = torch.tensor([0.5, -1.0, 2.0], requires_grad=True)  # Input values
error_grad = torch.tensor([1.0, 1.0, 1.0])  # Example gradient of loss w.r.t output

# Apply sigmoid activation
sigmoid_out = torch.sigmoid(x_in)

# Compute gradients using backpropagation
sigmoid_out.backward(error_grad, retain_graph=True)

# Extract computed gradients
sigmoid_grad = x_in.grad.clone()

# Print results
print("Input (χ_in):", x_in)
print("Sigmoid Output (χ_out):", sigmoid_out)
print("Computed Gradient (∂E/∂χ_in):", sigmoid_grad)


Input (χ_in): tensor([ 0.5000, -1.0000,  2.0000], requires_grad=True)
Sigmoid Output (χ_out): tensor([0.6225, 0.2689, 0.8808], grad_fn=<SigmoidBackward0>)
Computed Gradient (∂E/∂χ_in): tensor([0.2350, 0.1966, 0.1050])


3. ### An example of Tanh activation function in respect of (∂E/∂χ_in)

In [3]:
import torch

# Define an input tensor with requires_grad enabled to track gradients
x_in = torch.tensor([0.5, -1.0, 2.0], requires_grad=True)

# Example gradient from the next layer (∂E/∂χ_out)
error_grad = torch.tensor([1.0, 1.0, 1.0])

# Apply tanh activation
tanh_out = torch.tanh(x_in)

# Perform backpropagation: compute ∂E/∂χ_in
tanh_out.backward(error_grad)

# The gradient computed is:
# ∂E/∂χ_in = ∂E/∂χ_out * (1 - tanh(x_in)^2)
print("Input (χ_in):", x_in)
print("Tanh Output (χ_out):", tanh_out)
print("Computed Gradient (∂E/∂χ_in):", x_in.grad)


Input (χ_in): tensor([ 0.5000, -1.0000,  2.0000], requires_grad=True)
Tanh Output (χ_out): tensor([ 0.4621, -0.7616,  0.9640], grad_fn=<TanhBackward0>)
Computed Gradient (∂E/∂χ_in): tensor([0.7864, 0.4200, 0.0707])


4. ### An example of ReLU activation function in respect of (∂E/∂χ_in)

In [4]:
import torch

# Define an input tensor with requires_grad=True to track gradients
x_in = torch.tensor([-1.0, 0.0, 2.0], requires_grad=True)

# Example gradient from the next layer (∂E/∂χ_out)
error_grad = torch.tensor([1.0, 1.0, 1.0])  # Assume error is propagated equally

# Apply ReLU activation
relu_out = torch.relu(x_in)

# Perform backpropagation
relu_out.backward(error_grad)

# The gradient computed is:
# ∂E/∂χ_in = ∂E/∂χ_out * ReLU'(χ_in)
print("Input (χ_in):", x_in)
print("ReLU Output (χ_out):", relu_out)
print("Computed Gradient (∂E/∂χ_in):", x_in.grad)


Input (χ_in): tensor([-1.,  0.,  2.], requires_grad=True)
ReLU Output (χ_out): tensor([0., 0., 2.], grad_fn=<ReluBackward0>)
Computed Gradient (∂E/∂χ_in): tensor([0., 0., 1.])
