In [28]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import pairwise_distances_argmin_min
import seaborn as sns

np.random.seed(42)

# Create output directories
os.makedirs('scatter', exist_ok=True)
os.makedirs('raster', exist_ok=True)
os.makedirs('contour', exist_ok=True)

# Functions
def get_x(n, seed=None):
    if seed is not None:
        np.random.seed(seed)
    X1 = np.linspace(-0.5, 0.5, int(n))
    X2 = np.linspace(0, 1, 51)  # Fixed at 51 as in your original R code
    X_grid = np.array(np.meshgrid(X1, X2)).T.reshape(-1, 2)
    return pd.DataFrame(X_grid, columns=['X1', 'X2'])

def get_x_grid(n):
    x1 = np.linspace(-0.5, 0.5, n)
    x2 = np.linspace(-0.5, 0.5, n)
    X = np.array(np.meshgrid(x1, x2)).T.reshape(-1, 2)
    return pd.DataFrame(X, columns=['X1', 'X2'])

def theta_triangle(X, width=0.2):
    return np.maximum(1 - np.abs(X['X1']) / width, 0)

def get_y(X, theta_fn, sigma):
    n = len(X)
    return theta_fn(X) + np.random.normal(0, sigma, n)

# Start plotting loop
for sigma in [0, 0.1]:
    rfs = {}
    alpha_dict = {}
    for n in [50, 100, 200]:
        X = get_x(n)
        theta_fn = lambda X: theta_triangle(X, width=0.2)
        Y = get_y(X, theta_fn, sigma)

        rf = RandomForestRegressor(n_estimators=100, random_state=42)
        rf.fit(X, Y)
        rfs[n] = rf
        preds = rf.predict(X)
        eps_tilde = Y - theta_fn(X)

        # Create alpha: normalized inverse distance weights from predictions
        leaf_indices = rf.apply(X)
        weights = np.zeros((len(X), len(X)))
        for tree_idx in range(leaf_indices.shape[1]):
            for i in range(len(X)):
                same_leaf = leaf_indices[:, tree_idx] == leaf_indices[i, tree_idx]
                weights[i, same_leaf] += 1
        weights /= rf.n_estimators
        alpha_dict[n] = weights.T

        # SCATTER PLOT
        scatter_df = X.copy()
        scatter_df['Y'] = Y
        plt.figure(figsize=(6, 5))
        plt.scatter(scatter_df['X1'], scatter_df['X2'], c=scatter_df['Y'], cmap='Blues', edgecolor='k')
        plt.axis('equal')
        plt.grid(False)
        plt.xticks([])
        plt.yticks([])
        plt.tight_layout()
        plt.savefig(f'scatter/RF_theta_triangle_sigma{int(sigma*100):03d}_n{n:03d}_scatter.png', dpi=300, transparent=True)
        plt.close()

    alpha_max = max(np.max(a) for a in alpha_dict.values())
    x1s_subset = np.array([[0, 1], [0.18, 0.5], [0.2, 0.5], [0.22, 0.5], [0.4, 0.5], [0.5, 0.5]])

    for n in rfs.keys():
        X_orig = get_x(n)
        for x1, x2 in x1s_subset:
            query_point = np.array([[x1, x2]])
            idx, _ = pairwise_distances_argmin_min(query_point, X_orig)
            idx = idx[0]

            alpha = alpha_dict[n][:, idx]
            alpha_df = X_orig.copy()
            alpha_df['alpha'] = alpha

            # RASTER PLOT
            pivot = alpha_df.pivot(index='X2', columns='X1', values='alpha')
            plt.figure(figsize=(6, 5))
            plt.imshow(pivot, origin='lower', cmap='Blues', extent=[-0.5, 0.5, 0, 1], vmin=0, vmax=alpha_max)
            plt.scatter(x1, x2, color='red', marker='x')
            plt.axis('off')
            plt.tight_layout()
            fn = f"raster/RF_theta_triangle_effective_weights_sigma{int(sigma*100):03d}_n{n:03d}_x_{int(x1*100):03d}_{int(x2*100):03d}.png"
            plt.savefig(fn, dpi=300, transparent=True)
            plt.close()

            # CONTOUR PLOT
            grid = get_x_grid(100)
            dists = np.linalg.norm(grid.values - query_point, axis=1)
            alpha_interp = np.exp(-dists * 10)
            grid['alpha'] = alpha_interp

            plt.figure(figsize=(6, 5))
            contour = plt.tricontourf(grid['X1'], grid['X2'], grid['alpha'], levels=10, cmap='Blues')
            plt.scatter(x1, x2, color='red', marker='x')
            plt.axis('off')
            plt.tight_layout()
            fn = f"contour/RF_theta_triangle_effective_weights_sigma{int(sigma*100):03d}_n{n:03d}_contour_x_{int(x1*100):03d}_{int(x2*100):03d}.png"
            plt.savefig(fn, dpi=300, transparent=True)
            plt.close()


In [29]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import animation
from sklearn.ensemble import RandomForestRegressor

# --- Data functions ---
def get_x(n):
    x1 = np.linspace(-0.5, 0.5, n)
    x2 = np.linspace(0, 1, 51)
    X = np.array(np.meshgrid(x1, x2)).T.reshape(-1, 2)
    return pd.DataFrame(X, columns=['X1', 'X2'])

def theta_triangle(X, width=0.2):
    return np.maximum(1 - np.abs(X['X1']) / width, 0)

def get_y(X, theta_fn, sigma):
    return theta_fn(X) + np.random.normal(0, sigma, len(X))

# --- Training data ---
n = 100
sigma = 0.1
X = get_x(n)
theta_fn = lambda X: theta_triangle(X, width=0.2)
Y = get_y(X, theta_fn, sigma)

# --- Train Random Forest ---
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X, Y)

# --- Query points ---
x1s_subset = np.array([[0, 1], [0.18, 0.5], [0.2, 0.5], [0.22, 0.5], [0.4, 0.5], [0.5, 0.5]])

# --- Precompute grid ---
grid_n = 100
grid = np.array(np.meshgrid(
    np.linspace(-0.5, 0.5, grid_n),
    np.linspace(0, 1, grid_n))
).T.reshape(-1, 2)

# --- Setup figure ---
fig, ax = plt.subplots(figsize=(6, 5))
fig.patch.set_alpha(0)         # Transparent background
ax.set_facecolor((0, 0, 0, 0)) # Transparent axes

# --- Update function ---
def update(frame):
    ax.clear()
    ax.set_facecolor((0, 0, 0, 0))

    query_point = x1s_subset[frame].reshape(1, -1)
    dists = np.linalg.norm(grid - query_point, axis=1)
    alpha_interp = np.exp(-dists * 10)  # Mock "alpha" weights

    # Contour plot
    X1 = grid[:, 0]
    X2 = grid[:, 1]
    cs = ax.tricontourf(X1, X2, alpha_interp, levels=15, cmap='Blues')

    # Query point marker
    ax.scatter(*query_point[0], color='red', marker='x', s=80)

    # Label (x1, x2) at top
    label = f"x1 = {query_point[0][0]:.2f}, x2 = {query_point[0][1]:.2f}"
    ax.text(0.5, 1.02, label, transform=ax.transAxes,
            ha='center', va='bottom', fontsize=12, color='black')

    ax.set_xlim(-0.5, 0.5)
    ax.set_ylim(0, 1)
    ax.set_aspect('equal')
    ax.axis('off')

# --- Build animation ---
ani = animation.FuncAnimation(
    fig, update, frames=len(x1s_subset), interval=1000, repeat=False
)

# --- Save with transparency using FFmpeg ---
ani.save(
    "contour_film.mp4",
    writer=animation.FFMpegWriter(
        fps=1,
        extra_args=['-vcodec', 'libx264', '-pix_fmt', 'yuva420p']
    ),
    dpi=150
)

plt.close(fig)
print("Saved: contour_film.mp4.")


Saved: contour_film.mp4.


In [30]:
import matplotlib.pyplot as plt
from matplotlib import animation
import numpy as np
import pandas as pd

# Same data setup
def get_x(n):
    x1 = np.linspace(-0.5, 0.5, n)
    x2 = np.linspace(0, 1, 51)
    X = np.array(np.meshgrid(x1, x2)).T.reshape(-1, 2)
    return pd.DataFrame(X, columns=['X1', 'X2'])

def theta_triangle(X, width=0.2):
    return np.maximum(1 - np.abs(X['X1']) / width, 0)

def get_y(X, theta_fn, sigma):
    return theta_fn(X) + np.random.normal(0, sigma, len(X))

# Data
n = 100
sigma = 0.1
X = get_x(n)
theta_fn = lambda X: theta_triangle(X, width=0.2)
Y = get_y(X, theta_fn, sigma)
X['Y'] = Y

# Setup animation
fig, ax = plt.subplots(figsize=(6, 5))
query_points = np.array([[0, 1], [0.18, 0.5], [0.2, 0.5], [0.22, 0.5], [0.4, 0.5], [0.5, 0.5]])

def update_scatter(frame):
    ax.clear()
    sc = ax.scatter(X['X1'], X['X2'], c=X['Y'], cmap='Blues', edgecolor='k')
    ax.scatter(*query_points[frame], color='red', marker='x', s=100)
    ax.set_xlim(-0.5, 0.5)
    ax.set_ylim(0, 1)
    ax.set_title(f'Scatter Frame {frame+1}')
    ax.axis('off')

ani_scatter = animation.FuncAnimation(fig, update_scatter, frames=len(query_points), interval=1000)
ani_scatter.save("scatter_film.mp4", writer='ffmpeg', fps=1, dpi=150)
plt.close(fig)
print("Scatter film saved as 'scatter_film.mp4'")


Scatter film saved as 'scatter_film.mp4'


In [31]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import animation

# --- Prepare data ---
def get_x(n):
    x1 = np.linspace(-0.5, 0.5, n)
    x2 = np.linspace(0, 1, 51)
    X = np.array(np.meshgrid(x1, x2)).T.reshape(-1, 2)
    return pd.DataFrame(X, columns=['X1', 'X2'])

n = 100
X_orig = get_x(n)
query_points = np.array([[0, 1], [0.18, 0.5], [0.2, 0.5], [0.22, 0.5], [0.4, 0.5], [0.5, 0.5]])

# --- Setup figure ---
fig, ax = plt.subplots(figsize=(6, 5))
fig.patch.set_alpha(0)         # Transparent figure background
ax.set_facecolor((0, 0, 0, 0)) # Transparent axes background

# --- Update function ---
def update_raster(frame):
    ax.clear()
    ax.set_facecolor((0, 0, 0, 0))

    query = query_points[frame]
    dists = np.linalg.norm(X_orig.values - query, axis=1)
    alpha = np.exp(-dists * 10)

    df = X_orig.copy()
    df['alpha'] = alpha
    pivot = df.pivot_table(index='X2', columns='X1', values='alpha')

    ax.imshow(
        pivot.values,
        origin='lower',
        cmap='Blues',
        extent=[-0.5, 0.5, 0, 1],
        vmin=0,
        vmax=1,
        interpolation='none'
    )

    ax.scatter(*query, color='red', marker='x', s=100)

    # Label with x1 and x2
    label = f"x1 = {query[0]:.2f}, x2 = {query[1]:.2f}"
    ax.text(0.5, 1.02, label, transform=ax.transAxes,
            ha='center', va='bottom', fontsize=12, color='black')

    ax.axis('off')
    ax.set_aspect('equal')

# --- Create animation ---
ani_raster = animation.FuncAnimation(
    fig, update_raster, frames=len(query_points), interval=1000, repeat=False
)

# --- Save with transparency using FFmpeg ---
ani_raster.save(
    "raster_film_transparent.mp4",
    writer=animation.FFMpegWriter(
        fps=1,
        extra_args=['-vcodec', 'libx264', '-pix_fmt', 'yuva420p']
    ),
    dpi=150
)

plt.close(fig)
print("Raster film saved as 'raster_film.mp4'")


Raster film saved as 'raster_film.mp4'
