In [None]:
# Essential imports for gradient analysis
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import seaborn as sns
from scipy import optimize
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("🚀 Ready to explore gradient dynamics!")
print(f"TensorFlow version: {tf.__version__}")

# Set random seed
np.random.seed(42)
tf.random.set_seed(42)


In [None]:
# Simulate gradient flow through a deep network
def simulate_gradient_flow(activation_func, derivative_func, num_layers=10, input_gradient=1.0):
    """
    Simulate how gradients flow through a deep network
    
    Args:
        activation_func: Activation function
        derivative_func: Derivative of activation function
        num_layers: Number of layers to simulate
        input_gradient: Initial gradient value
    
    Returns:
        Array of gradient magnitudes at each layer
    """
    gradients = [input_gradient]
    current_gradient = input_gradient
    
    # Simulate typical activations in each layer
    layer_activations = np.random.normal(0, 1, num_layers)
    
    for i in range(num_layers):
        # Gradient of activation function at this layer
        activation_derivative = derivative_func(layer_activations[i])
        
        # Apply chain rule: multiply by local gradient
        current_gradient *= activation_derivative
        gradients.append(current_gradient)
    
    return np.array(gradients)

# Define common activation functions and their derivatives
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def sigmoid_activation(x):
    return 1 / (1 + np.exp(-np.clip(x, -500, 500)))

def sigmoid_derivative(x):
    s = sigmoid_activation(x)
    return s * (1 - s)

def tanh_activation(x):
    return np.tanh(x)

def tanh_derivative(x):
    return 1 - np.tanh(x)**2

# Simulate gradient flow for different activation functions
activation_functions = {
    'ReLU': (relu, relu_derivative),
    'Sigmoid': (sigmoid_activation, sigmoid_derivative),
    'Tanh': (tanh_activation, tanh_derivative)
}

print("🔍 Gradient Flow Analysis:")
print("=" * 50)

# Run simulation multiple times and average
num_simulations = 100
num_layers = 15

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

all_results = {}
for name, (func, deriv) in activation_functions.items():
    layer_gradients = []
    
    for sim in range(num_simulations):
        gradients = simulate_gradient_flow(func, deriv, num_layers)
        layer_gradients.append(gradients)
    
    # Average across simulations
    avg_gradients = np.mean(layer_gradients, axis=0)
    std_gradients = np.std(layer_gradients, axis=0)
    all_results[name] = (avg_gradients, std_gradients)
    
    # Plot gradient magnitude vs layer depth
    layers = range(len(avg_gradients))
    ax1.plot(layers, np.abs(avg_gradients), label=f'{name}', linewidth=2, alpha=0.8)
    ax1.fill_between(layers, 
                     np.abs(avg_gradients) - std_gradients, 
                     np.abs(avg_gradients) + std_gradients, 
                     alpha=0.2)

ax1.set_xlabel('Layer Depth')
ax1.set_ylabel('Gradient Magnitude')
ax1.set_title('Gradient Flow Through Deep Networks')
ax1.set_yscale('log')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Plot gradient ratio (current/initial)
for name, (avg_gradients, std_gradients) in all_results.items():
    gradient_ratios = avg_gradients / avg_gradients[0]
    layers = range(len(gradient_ratios))
    ax2.plot(layers, gradient_ratios, label=f'{name}', linewidth=2, alpha=0.8)

ax2.set_xlabel('Layer Depth')
ax2.set_ylabel('Gradient Ratio (Current/Initial)')
ax2.set_title('Gradient Preservation Through Layers')
ax2.axhline(y=1, color='red', linestyle='--', alpha=0.5, label='No change')
ax2.axhline(y=0.1, color='orange', linestyle='--', alpha=0.5, label='90% loss')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Print numerical results
print("\nGradient Analysis Results:")
for name, (avg_gradients, _) in all_results.items():
    final_gradient = avg_gradients[-1]
    initial_gradient = avg_gradients[0]
    ratio = final_gradient / initial_gradient
    
    print(f"\n{name}:")
    print(f"  Initial gradient: {initial_gradient:.4f}")
    print(f"  Final gradient: {final_gradient:.4f}")
    print(f"  Ratio (final/initial): {ratio:.6f}")
    
    if abs(ratio) < 0.01:
        print(f"  ⚠️  Vanishing gradient problem detected!")
    elif abs(ratio) > 100:
        print(f"  💥 Exploding gradient problem detected!")
    else:
        print(f"  ✅ Gradient flow is stable")
