In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import RobustScaler
from sklearn.neighbors import NearestNeighbors
from numba import jit

@jit(nopython=True)
def rossler(xyz, a=0.2, b=0.2, c=5.7):
    x, y, z = xyz
    return np.array([-y - z, x + a * y, b + z * (x - c)])

def rossler_dm_x(x,y,z, a=0.2, b=0.2, c=5.7):
    return np.array([x, -y - z, -x - a*y - b - z*(x - c)])

def rossler_dm_y(x,y,z, a=0.2, b=0.2, c=5.7):
    return np.array([y, x + a*y, -y - z + a*x + a*a*y])

def rossler_dm_z(x,y,z, a=0.2, b=0.2, c=5.7):
    return np.array([z, b + z*(x - c), (b + z*(x - c))*(x - c) + z*(-y - z)])

def rossler_dm_yz(x,y,z, a=0.2, b=0.2, c=5.7):
    return np.array([
        y + z,
        b + x + a*y - c*z + x*z,
        -b*c + (a + b)*x + (a*a - 1)*y + (c*c - 1)*z - (2*c + 1)*x*z - z*z + x*x*z
    ])

def rossler_dm_zx(x,y,z, a=0.2, b=0.2, c=5.7):
    return np.array([
        z + x,
        -y - z + b + z*(x - c),
        -b*(c + 1) + (b - 1)*x - a*y + c*(c + 1)*z + (1 - 2*c)*x*z - y*z - z*z
    ])

def rossler_dm_xy(x,y,z, a=0.2, b=0.2, c=5.7):
    return np.array([
        x + y,
        x + (a - 1)*y - z,
        -b + (a - 1)*x + (a*a - a + 1)*y + (c - 1)*z - x*z
    ])

def rossler_y_z_y(x,y,z, a=0.2, b=0.2, c=5.7):
    return np.array([y, z, x + a*y])

def rossler_x_y_x(x,y,z, a=0.2, b=0.2, c=5.7):
    return np.array([x, y, -y - z])

def rossler_x_y_y(x,y,z, a=0.2, b=0.2, c=5.7):
    return np.array([x, y, x + a*y])

def rossler_y_z_z(x,y,z, a=0.2, b=0.2, c=5.7):
    return np.array([y, z, b + z*(x - c)])

def rossler_x_z_z(x,y,z, a=0.2, b=0.2, c=5.7):
    return np.array([x, z, b + z*(x - c)])

def rossler_x_z_x(x,y,z, a=0.2, b=0.2, c=5.7):
    return np.array([x, z, -y - z])

def build_embeddings_from_y(y_arr):
    x, y, z = y_arr[:,0], y_arr[:,1], y_arr[:,2]
    return {
        "x_x_x": rossler_dm_x(x, y, z).T,
        "y_y_y": rossler_dm_y(x, y, z).T,
        "z_z_z": rossler_dm_z(x, y, z).T,
        "y_z_y": rossler_y_z_y(x, y, z).T,
        "x_y_x": rossler_x_y_x(x, y, z).T,
        "x_y_y": rossler_x_y_y(x, y, z).T,
        "y_z_z": rossler_y_z_z(x, y, z).T,
        "x_z_z": rossler_x_z_z(x, y, z).T,
        "x_z_x": rossler_x_z_x(x, y, z).T,
        "y_plus_z": rossler_dm_yz(x, y, z).T,
        "x_plus_z": rossler_dm_zx(x, y, z).T,
        "x_plus_y": rossler_dm_xy(x, y, z).T,
    }

@jit(nopython=True, fastmath=True)
def compute_errors_numba(neighbor_clouds, max_iter=50, eps=1e-5):
    n_samples, k, dim = neighbor_clouds.shape
    errors = np.empty(n_samples, dtype=np.float64)
    
    for i in range(n_samples):
        cloud = neighbor_clouds[i]
        y = np.zeros(dim)
        for d in range(dim):
            sum_val = 0.0
            for j in range(k): sum_val += cloud[j, d]
            y[d] = sum_val / k
            
        for _ in range(max_iter):
            sum_weights = 0.0
            y_next = np.zeros(dim)
            
            for j in range(k):
                dist_sq = 0.0
                for d in range(dim):
                    diff = cloud[j, d] - y[d]
                    dist_sq += diff * diff
                dist = np.sqrt(dist_sq)
                
                w = 1.0 / (dist + 1e-12)
                sum_weights += w
                for d in range(dim):
                    y_next[d] += cloud[j, d] * w
            
            diff_norm_sq = 0.0
            for d in range(dim):
                y_next[d] /= sum_weights
                diff = y[d] - y_next[d]
                diff_norm_sq += diff * diff
                y[d] = y_next[d]
                
            if np.sqrt(diff_norm_sq) < eps:
                break
        
        total_dist = 0.0
        for j in range(k):
            dist_sq = 0.0
            for d in range(dim):
                diff = cloud[j, d] - y[d]
                dist_sq += diff * diff
            total_dist += np.sqrt(dist_sq)
            
        errors[i] = total_dist / k
        
    return errors

def compute_ground_truth_fnn(embedding_neighbors_idx, true_state, query_indices, macroscopic_threshold):
    states_Q = true_state[query_indices]           
    states_N = true_state[embedding_neighbors_idx] 
    dists_true = np.linalg.norm(states_Q[:, np.newaxis, :] - states_N, axis=2)
    is_false_neighbor = dists_true > macroscopic_threshold
    return np.mean(is_false_neighbor), np.mean(np.any(is_false_neighbor, axis=1))

def estimate_intrinsic_stochasticity(embedding_data, pushforward, k=50, n_samples=5000, 
                                     random_state=0, theiler_w=0, buffer_mult=6, 
                                     true_state=None, delta_state=None):
    scaler = RobustScaler()
    data_norm = scaler.fit_transform(np.asarray(embedding_data))
    
    T = data_norm.shape[0]
    max_start = T - pushforward
    X_curr = data_norm[:max_start]
    Y_fut  = data_norm[pushforward:]

    rng = np.random.default_rng(random_state)
    n_q = int(min(n_samples, max_start))
    query_idx = rng.choice(max_start, size=n_q, replace=False)

    k_query = int(min(max_start, max(k * buffer_mult, k + 2 * theiler_w + 5)))
    nbrs = NearestNeighbors(n_neighbors=k_query, algorithm="auto", n_jobs=-1).fit(X_curr)
    dist_raw, idx_raw = nbrs.kneighbors(X_curr[query_idx])

    nn_idx = np.empty((n_q, k), dtype=np.int64)
    
    for r in range(n_q):
        q = int(query_idx[r])
        cand = idx_raw[r]
        picked = 0
        for j in range(k_query):
            t = int(cand[j])
            if t == q or abs(t - q) <= theiler_w: continue
            nn_idx[r, picked] = t
            picked += 1
            if picked == k: break

    neighbor_clouds = Y_fut[nn_idx]
    local_errors = compute_errors_numba(neighbor_clouds)
    E_star_k = float(np.mean(local_errors))

    fold_frac, fold_any = compute_ground_truth_fnn(nn_idx, true_state, query_idx, delta_state)

    return {
        "E_star_k": E_star_k,
        "fold_frac": fold_frac,
        "fold_any": fold_any
    }

dt = 0.01
num_steps = int(2e4)
y_clean = np.zeros((num_steps + 1, 3), dtype=np.float64)
y_clean[0] = np.array([1.0, 1.0, 0.0]) 

for i in range(num_steps):
    y_clean[i + 1] = y_clean[i] + rossler(y_clean[i]) * dt

rng_init = np.random.default_rng(42)
idx_A = rng_init.choice(len(y_clean), 2000)
idx_B = rng_init.choice(len(y_clean), 2000)
MACRO_THRESHOLD = 0.05 * np.max(np.linalg.norm(y_clean[idx_A] - y_clean[idx_B], axis=1))

embeddings_clean = build_embeddings_from_y(y_clean)
y_scale = float(np.mean(np.std(y_clean, axis=0, ddof=0)))
all_rows = []

for p in [0.00]:
    for rep in range(10):
        rng = np.random.default_rng(123 + rep)
        for name, emb_clean in embeddings_clean.items():
            emb_arr = np.asarray(emb_clean)
            if emb_arr.ndim == 1: emb_arr = emb_arr.reshape(-1, 1)
            
            emb_noisy = emb_arr if p == 0.0 else emb_arr + rng.normal(0.0, p * y_scale, size=emb_arr.shape)

            out = estimate_intrinsic_stochasticity(
                embedding_data=emb_noisy, pushforward=20, k=50, n_samples=5000,
                random_state=rep, theiler_w=30, buffer_mult=6,
                true_state=y_clean, delta_state=MACRO_THRESHOLD
            )

            all_rows.append({
                "noise_level": p,
                "embedding": name,
                "E_star_k": out["E_star_k"],
                "fold_frac": out["fold_frac"], 
                "fold_any": out["fold_any"]
            })

summary = pd.DataFrame(all_rows).groupby(["noise_level", "embedding"]).agg(
    E_star_mean=("E_star_k", "mean"),
    E_star_std=("E_star_k", "std"),
    Fold_Frac_mean=("fold_frac", "mean"), 
    Fold_Any_mean=("fold_any", "mean")
).reset_index()

pd.set_option('display.max_rows', None)
pd.set_option('display.precision', 4)
print(summary.sort_values("E_star_mean"))