In [12]:
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

def generate_y(X, config):
    x0, x1, x2, x3, x4 = X[:, 0], X[:, 1], X[:, 2], X[:, 3], X[:, 4]
    if config == 1:
        return (
            2.0 * np.sin(np.pi * x0) +
            3.0 * (x1 - 0.5)**2 +
            np.exp(x2) +
            2.0 * x3 +
            np.log1p(np.abs(x4))
        )
    elif config == 2:
        return (
            3.0 * np.cos(2 * np.pi * x0) +
            2.0 * x1 * np.sin(np.pi * x1) +
            1.5 * np.tanh(3 * x2 - 1.5) +
            0.5 * x3**3 +
            2.0 * np.sqrt(np.abs(x4))
        )
    elif config == 3:
        return (
        4.0 * (x0 > 0.5).astype(float) +
        3.0 * np.maximum(0, x1 - 0.3)**1.5 +
        -1.0 * np.exp(-5 * np.abs(x2)) +
        2.5 * np.sin(np.pi * x3) * np.exp(-x3) +
        1.5 * np.arctan(5 * (x4 - 0.5))
        )
    elif config == "interaction":
        return (
        np.sin(np.pi * (x0 + x1 * x2)) +
        3.0 * x2 * x3 +
        np.exp(-x4 * x0) +
        0.7 * np.tanh(x1 * x4 + x0 * x2)
        )
    else:
        raise ValueError("Unknown config")


def estimate_standardized_contributions(config, D=5, sigma=1.0, samples_per_feature=10000, seed=42):
    np.random.seed(seed)

    # Estimate total variance of y (clean + noise)
    X_full = np.random.normal(0, 1, size=(samples_per_feature, D))
    y_clean = generate_y(X_full, config=config)
    y_total = y_clean + np.random.normal(0, sigma, size=samples_per_feature)
    var_y = np.var(y_total)

    # Estimate per-feature contributions (one-at-a-time variance)
    contribs = []
    for j in range(D):
        X = np.random.normal(0, 1, size=(samples_per_feature, D))
        for k in range(D):
            if k != j:
                X[:, k] = np.random.normal(0, 1)  # fix others
        y_j = generate_y(X, config=config)
        var_j = np.var(y_j)
        contribs.append(var_j)

    # Add noise variance
    noise_var = sigma**2
    contribs.append(noise_var)

    # Normalize to make sum of contributions â‰ˆ total variance
    contribs = np.array(contribs)
    contribs /= var_y  # now they sum to ~1

    return contribs  # length D+1


In [None]:
D = 5
sigma = 1.0
labels = [f"$x_{i+1}$" for i in range(D)] + ["noise"]

contribs = estimate_standardized_contributions(config="interaction", D=D, sigma=sigma)
plt.figure(figsize=(10, 6))
plt.bar(labels, contribs, color='skyblue')
plt.title(f"Config {1}, $\sigma=${sigma}")
plt.ylabel("Proportion of Output Variance")
plt.ylim(0, 1.05)
plt.grid(True)

#plt.suptitle("Standardized Feature Contributions + Noise", fontsize=14)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()

In [None]:
D = 5
sigma = 3.0
labels = [f"$x_{i+1}$" for i in range(D)] + ["noise"]

contribs = estimate_standardized_contributions(config="interaction", D=D, sigma=sigma)
plt.figure(figsize=(10, 6))
plt.bar(labels, contribs, color='skyblue')
plt.title(f"Config {1}, $\sigma={sigma}$")
plt.ylabel("Proportion of Output Variance")
plt.ylim(0, 1.05)
plt.grid(True)

#plt.suptitle("Standardized Feature Contributions + Noise", fontsize=14)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()

In [None]:
D = 5
sigma = 1.0
labels = [f"$x_{i+1}$" for i in range(D)] + ["noise"]

contribs = estimate_standardized_contributions(config=2, D=D, sigma=sigma)
plt.figure(figsize=(10, 6))
plt.bar(labels, contribs, color='skyblue')
plt.title(f"Config {2}, $\sigma=${sigma}")
plt.ylabel("Proportion of Output Variance")
plt.ylim(0, 1.05)
plt.grid(True)

#plt.suptitle("Standardized Feature Contributions + Noise", fontsize=14)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()

In [None]:
D = 5
sigma = 3.0
labels = [f"$x_{i+1}$" for i in range(D)] + ["noise"]

contribs = estimate_standardized_contributions(config=2, D=D, sigma=sigma)
plt.figure(figsize=(10, 6))
plt.bar(labels, contribs, color='skyblue')
plt.title(f"Config {2}, $\sigma=${sigma}")
plt.ylabel("Proportion of Output Variance")
plt.ylim(0, 1.05)
plt.grid(True)

#plt.suptitle("Standardized Feature Contributions + Noise", fontsize=14)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()

In [None]:
D = 5
sigma = 1.0
labels = [f"$x_{i+1}$" for i in range(D)] + ["noise"]

contribs = estimate_standardized_contributions(config=1, D=D, sigma=sigma)
plt.figure(figsize=(10, 6))
plt.bar(labels, contribs, color='skyblue')
plt.title(f"Config {2}, $\sigma=${sigma}")
plt.ylabel("Proportion of Output Variance")
plt.ylim(0, 1.05)
plt.grid(True)

#plt.suptitle("Standardized Feature Contributions + Noise", fontsize=14)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Target function definition
# def generate_y(X, config):
#     x0, x1, x2, x3, x4 = X[:, 0], X[:, 1], X[:, 2], X[:, 3], X[:, 4]
#     if config == 1:
#         return (
#             2.0 * np.sin(np.pi * x0) +
#             3.0 * (x1 - 0.5)**2 +
#             np.exp(x2) +
#             2.0 * x3 +
#             np.log1p(np.abs(x4))
#         )
#     elif config == 2:
#         return (
#             3.0 * np.cos(2 * np.pi * x0) +
#             2.0 * x1 * np.sin(np.pi * x1) +
#             1.5 * np.tanh(3 * x2 - 1.5) +
#             0.5 * x3**3 +
#             2.0 * np.sqrt(np.abs(x4))
#         )
#     elif config == 3:
#         return (
#             4.0 * (x0 > 0.5).astype(float) +
#             3.0 * np.maximum(0, x1 - 0.3)**1.5 +
#             -1.0 * np.exp(-5 * np.abs(x2)) +
#             2.5 * np.sin(np.pi * x3) * np.exp(-x3) +
#             1.5 * np.arctan(5 * (x4 - 0.5))
#         )

# Number of points for the plot
n_points = 200
var_names = ['x0', 'x1', 'x2', 'x3', 'x4']

# Fixed values: set to 0 since mean of standard normal
fixed_vals = np.zeros(5)
X = np.tile(fixed_vals, (n_points, 1))

# Define variable-specific ranges (spread around mean 0)
x_ranges = [
    np.linspace(-1.96, 1.96, n_points),   # x0
    np.linspace(-1.96, 1.96, n_points),   # x1
    np.linspace(-1.96, 1.96, n_points),   # x2
    np.linspace(-1.96, 1.96, n_points),   # x3
    np.linspace(-1.96, 1.96, n_points),   # x4 (log1p(abs(.)) is slow-changing)
]

# Create the plots
fig, axes = plt.subplots(1, 5, figsize=(20, 4))

for i, ax in enumerate(axes):
    X_varied = X.copy()
    X_varied[:, i] = x_ranges[i]
    y = generate_y(X_varied, config="interaction")
    ax.plot(x_ranges[i], y)
    ax.set_title(f'y vs {var_names[i]}')
    ax.set_xlabel(var_names[i])
    ax.set_ylabel('y')
    ax.grid(True)


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# --- Target function ---
def generate_y(X, config="interaction"):
    x0, x1, x2, x3, x4 = X[:, 0], X[:, 1], X[:, 2], X[:, 3], X[:, 4]
    return (
        np.sin(np.pi * (x0 + x1 * x2)) +
        3.0 * x2 * x3 +
        np.exp(-x4 * x0) +
        0.7 * np.tanh(x1 * x4 + x0 * x2)
    )

# --- 2D Surface Plots ---
var_pairs = [(0, 1), (0, 2), (1, 2), (2, 3), (0, 4), (1, 4)]  # (x_i, x_j) pairs
fig = plt.figure(figsize=(18, 12))

for idx, (i, j) in enumerate(var_pairs):
    grid_points = 50
    xi_vals = np.linspace(-1.5, 1.5, grid_points)
    xj_vals = np.linspace(-1.5, 1.5, grid_points)
    XI, XJ = np.meshgrid(xi_vals, xj_vals)

    X_grid = np.zeros((grid_points * grid_points, 5))
    X_grid[:, i] = XI.ravel()
    X_grid[:, j] = XJ.ravel()

    y_vals = generate_y(X_grid, config="interaction").reshape(XI.shape)

    ax = fig.add_subplot(3, 2, idx + 1, projection='3d')
    ax.plot_surface(XI, XJ, y_vals, cmap='viridis', edgecolor='none')
    ax.set_xlabel(f'x{i}')
    ax.set_ylabel(f'x{j}')
    ax.set_zlabel('y')
    ax.set_title(f'y vs x{i} and x{j}')

plt.tight_layout()
plt.show()

# --- Partial Dependence Plots (PDPs) ---
pdp_vars = [0, 1, 2, 3, 4]
n_points = 100
samples_for_avg = 100

fig, axes = plt.subplots(1, 5, figsize=(20, 4))

for idx, var in enumerate(pdp_vars):
    x_vals = np.linspace(-1.96, 1.96, n_points)
    pdp_vals = np.zeros_like(x_vals)

    for i, val in enumerate(x_vals):
        X_random = np.random.normal(0, 1, size=(samples_for_avg, 5))
        X_random[:, var] = val
        y_vals = generate_y(X_random, config="interaction")
        pdp_vals[i] = y_vals.mean()

    axes[idx].plot(x_vals, pdp_vals, color='orange')
    axes[idx].set_title(f'PDP for x{var}')
    axes[idx].set_xlabel(f'x{var}')
    axes[idx].set_ylabel('y (avg)')
    axes[idx].grid(True)

plt.tight_layout()
plt.show()


In [19]:
import numpy as np

N, D = 1_000_000, 8
X = np.random.normal(0, 1, size=(N, D))
x0, x1, x2, x3, x4 = X[:, 0], X[:, 1], X[:, 2], X[:, 3], X[:, 4]

y_strong_interactions = (
    np.sin(np.pi * (x0 + x1 * x2)) +
    3.0 * x2 * x3 +
    np.cos(2 * np.pi * x4 * x0) +
    0.7 * np.tanh(x1 * x4 + x0 * x2)
)

y = y_strong_interactions + np.random.normal(0, 1, size=N)


In [None]:
np.var(y) 