In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
from sklearn.preprocessing import RobustScaler
from sklearn.neighbors import NearestNeighbors
from numba import jit

@jit(nopython=True)
def rossler(xyz, a=0.2, b=0.2, c=5.7):
    x, y, z = xyz
    x_dot = -y - z
    y_dot = x + a * y
    z_dot = b + z * (x - c)
    return np.array([x_dot, y_dot, z_dot])

def rossler_dm_x(x,y,z, a=0.2, b=0.2, c=5.7):
    x_dot = x
    y_dot = -y - z
    z_dot = -x - a*y - b - z*(x - c)
    return np.array([x_dot, y_dot, z_dot])

def rossler_dm_y(x,y,z, a=0.2, b=0.2, c=5.7):
    x_dot = y
    y_dot = x + a*y
    z_dot = -y - z + a*x + a*a*y
    return np.array([x_dot, y_dot, z_dot])

def rossler_dm_z(x,y,z, a=0.2, b=0.2, c=5.7):
    x_dot = z
    y_dot = b + z*(x - c)
    z_dot = (b + z*(x - c))*(x - c) + z*(-y - z)
    return np.array([x_dot, y_dot, z_dot])

def rossler_dm_yz(x,y,z, a=0.2, b=0.2, c=5.7):
    x_dot = y + z
    y_dot = b + x + a*y - c*z + x*z
    z_dot = -b*c + (a + b)*x + (a*a - 1)*y + (c*c - 1)*z - (2*c + 1)*x*z - z*z + x*x*z
    return np.array([x_dot, y_dot, z_dot])

def rossler_dm_zx(x,y,z, a=0.2, b=0.2, c=5.7):
    x_dot = z + x
    y_dot = -y - z + b + z*(x - c)
    z_dot = -b*(c + 1) + (b - 1)*x - a*y + c*(c + 1)*z + (1 - 2*c)*x*z - y*z - z*z
    return np.array([x_dot, y_dot, z_dot])

def rossler_dm_xy(x,y,z, a=0.2, b=0.2, c=5.7):
    x_dot = x + y
    y_dot = x + (a - 1)*y - z
    z_dot = -b + (a - 1)*x + (a*a - a + 1)*y + (c - 1)*z - x*z
    return np.array([x_dot, y_dot, z_dot])

def rossler_y_z_y(x,y,z, a=0.2, b=0.2, c=5.7):
    return np.array([y, z, x + a*y])

def rossler_x_y_x(x,y,z, a=0.2, b=0.2, c=5.7):
    return np.array([x, y, -y - z])

def rossler_x_y_y(x,y,z, a=0.2, b=0.2, c=5.7):
    return np.array([x, y, x + a*y])

def rossler_y_z_z(x,y,z, a=0.2, b=0.2, c=5.7):
    return np.array([y, z, b + z*(x - c)])

def rossler_x_z_z(x,y,z, a=0.2, b=0.2, c=5.7):
    return np.array([x, z, b + z*(x - c)])

def rossler_x_z_x(x,y,z, a=0.2, b=0.2, c=5.7):
    return np.array([x, z, -y - z])

def build_embeddings_from_y(y_arr):
    x, y, z = y_arr[:,0], y_arr[:,1], y_arr[:,2]
    
    return {
        "x_x_x": rossler_dm_x(x, y, z).T,
        "y_y_y": rossler_dm_y(x, y, z).T,
        "z_z_z": rossler_dm_z(x, y, z).T,
        "y_z_y": rossler_y_z_y(x, y, z).T,
        "x_y_x": rossler_x_y_x(x, y, z).T,
        "x_y_y": rossler_x_y_y(x, y, z).T,
        "y_z_z": rossler_y_z_z(x, y, z).T,
        "x_z_z": rossler_x_z_z(x, y, z).T,
        "x_z_x": rossler_x_z_x(x, y, z).T,
        "y_plus_z": rossler_dm_yz(x, y, z).T,
        "x_plus_z": rossler_dm_zx(x, y, z).T,
        "x_plus_y": rossler_dm_xy(x, y, z).T,
    }

@jit(nopython=True, fastmath=True)
def compute_errors_numba(neighbor_clouds, max_iter=50, eps=1e-5):
    n_samples, k, dim = neighbor_clouds.shape
    errors = np.empty(n_samples, dtype=np.float64)
    
    for i in range(n_samples):
        cloud = neighbor_clouds[i]
        
        y = np.zeros(dim)
        for d in range(dim):
            sum_val = 0.0
            for j in range(k): sum_val += cloud[j, d]
            y[d] = sum_val / k
            
        for _ in range(max_iter):
            sum_weights = 0.0
            y_next = np.zeros(dim)
            all_non_zero = True
            
            for j in range(k):
                dist_sq = 0.0
                for d in range(dim):
                    diff = cloud[j, d] - y[d]
                    dist_sq += diff * diff
                dist = np.sqrt(dist_sq)
                
                if dist < 1e-12: 
                    all_non_zero = False
                    for d in range(dim): y[d] = cloud[j, d]
                    break
                
                w = 1.0 / dist
                sum_weights += w
                for d in range(dim):
                    y_next[d] += cloud[j, d] * w
            
            if not all_non_zero: break 
            
            diff_norm_sq = 0.0
            for d in range(dim):
                y_next[d] /= sum_weights
                diff = y[d] - y_next[d]
                diff_norm_sq += diff * diff
                y[d] = y_next[d]
                
            if np.sqrt(diff_norm_sq) < eps:
                break
        
        total_dist = 0.0
        for j in range(k):
            dist_sq = 0.0
            for d in range(dim):
                diff = cloud[j, d] - y[d]
                dist_sq += diff * diff
            total_dist += np.sqrt(dist_sq)
            
        errors[i] = total_dist / k
        
    return errors

def compute_ground_truth_fnn(
    embedding_neighbors_idx, 
    true_state,              
    query_indices,           
    macroscopic_threshold   
):
    states_Q = true_state[query_indices]           
    states_N = true_state[embedding_neighbors_idx] 
    
    dists_true = np.linalg.norm(states_Q[:, np.newaxis, :] - states_N, axis=2)
    
    is_false_neighbor = dists_true > macroscopic_threshold
    
    fold_frac = np.mean(is_false_neighbor)
    
    fold_any = np.mean(np.any(is_false_neighbor, axis=1))
    
    return fold_frac, fold_any

def estimate_intrinsic_stochasticity(
    embedding_data,
    pushforward,
    k=50,
    n_samples=5000,
    cond_threshold=1000,
    random_state=0,
    return_pointwise=False,
    theiler_w=0,          
    buffer_mult=6,        
    pre_scaled=False,     
    strict_theiler=True,
    true_state=None,
    delta_state=None, 
):
    if k <= 1: raise ValueError("k must be >= 2.")
    
    if pre_scaled:
        data_norm = np.asarray(embedding_data)
    else:
        scaler = RobustScaler()
        data_norm = scaler.fit_transform(np.asarray(embedding_data))

    data_centered = data_norm - np.mean(data_norm, axis=0)
    s_vals = np.linalg.svd(data_centered, compute_uv=False)
    if s_vals[-1] < 1e-12:
        cond_num = np.inf
    else:
        cond_num = s_vals[0] / s_vals[-1]
        
    if (cond_num > cond_threshold) or np.isinf(cond_num):
        return None

    T = data_norm.shape[0]
    max_start = T - pushforward
    
    X_curr = data_norm[:max_start]
    Y_fut  = data_norm[pushforward:]

    rng = np.random.default_rng(random_state)
    n_q = int(min(n_samples, max_start))
    query_idx = rng.choice(max_start, size=n_q, replace=False)

    k_query = int(min(max_start, max(k * buffer_mult, k + 2 * theiler_w + 5)))
    nbrs = NearestNeighbors(n_neighbors=k_query, algorithm="auto", n_jobs=-1).fit(X_curr)
    dist_raw, idx_raw = nbrs.kneighbors(X_curr[query_idx])

    nn_idx = np.empty((n_q, k), dtype=np.int64)
    rk = np.empty(n_q, dtype=np.float64)  
    not_enough = 0

    for r in range(n_q):
        q = int(query_idx[r])
        cand = idx_raw[r]
        cand_d = dist_raw[r]
        picked = 0
        last_d = 0.0

        for j in range(k_query):
            t = int(cand[j])
            if t == q: continue
            if theiler_w > 0 and (abs(t - q) <= theiler_w): continue
            
            nn_idx[r, picked] = t
            last_d = float(cand_d[j])
            picked += 1
            if picked == k: break

        if picked < k:
            not_enough += 1
            if strict_theiler:
                raise RuntimeError(f"Not enough neighbors after Theiler exclusion for query {q}.")
            else:
                # Fallback: fill with whatever is available (not recommended)
                pass
        rk[r] = last_d

    neighbor_clouds = Y_fut[nn_idx] # (n_q, k, d)
    local_errors = compute_errors_numba(neighbor_clouds)
    E_star_k = float(np.mean(local_errors))

    fold_frac = np.nan
    fold_any = np.nan
    
    if (true_state is not None) and (delta_state is not None):
        ff, fa = compute_ground_truth_fnn(
            embedding_neighbors_idx=nn_idx,
            true_state=true_state,
            query_indices=query_idx,
            macroscopic_threshold=delta_state
        )
        fold_frac = ff
        fold_any = fa

    out = {
        "E_star_k": E_star_k,
        "median_rk": float(np.median(rk)),
        "q90_rk": float(np.quantile(rk, 0.90)),
        "fold_frac": fold_frac,
        "fold_any": fold_any,
        "status": "ok"
    }
    return out


print("Simulating Rössler system...")
dt = 0.01
num_steps = int(2e4)
y_clean = np.zeros((num_steps + 1, 3), dtype=np.float64)
y_clean[0] = np.array([1.0, 1.0, 0.0]) 

for i in range(num_steps):
    y_clean[i + 1] = y_clean[i] + rossler(y_clean[i]) * dt

n_sample_diam = 2000
rng_init = np.random.default_rng(42)
idx_A = rng_init.choice(len(y_clean), n_sample_diam)
idx_B = rng_init.choice(len(y_clean), n_sample_diam)
dists = np.linalg.norm(y_clean[idx_A] - y_clean[idx_B], axis=1)

attractor_diameter = np.max(dists)
MACRO_THRESHOLD = 0.05 * attractor_diameter

print(f"Attractor Diameter (approx): {attractor_diameter:.4f}")
print(f"Ground Truth FNN Threshold (5%): {MACRO_THRESHOLD:.4f}")

embeddings_clean = build_embeddings_from_y(y_clean)

y_std = np.std(y_clean, axis=0, ddof=0)
y_scale = float(np.mean(y_std))

PUSHFORWARD = 20
K = 50
N_SAMPLES = 5000
COND_THRESHOLD = 1000
THEILER_W = 30
BUFFER_MULT = 6

noise_levels = [0.00] 
noise_seed = 123
n_repeats = 10

all_rows = []

print(f"Starting experiment on {len(embeddings_clean)} embeddings...")

for p in noise_levels:
    for rep in range(n_repeats):
        rng = np.random.default_rng(noise_seed + rep)

        for name, emb_clean in embeddings_clean.items():
            emb_arr = np.asarray(emb_clean)
            if emb_arr.ndim == 1: emb_arr = emb_arr.reshape(-1, 1)

            if p == 0.0:
                emb_noisy = emb_arr
            else:
                sigma = p * y_scale
                emb_noisy = emb_arr + rng.normal(0.0, sigma, size=emb_arr.shape)

            out = estimate_intrinsic_stochasticity(
                embedding_data=emb_noisy,
                pushforward=PUSHFORWARD,
                k=K,
                n_samples=N_SAMPLES,
                cond_threshold=COND_THRESHOLD,
                random_state=rep,
                theiler_w=THEILER_W,
                buffer_mult=BUFFER_MULT,
                pre_scaled=False,
                strict_theiler=True,
                true_state=y_clean,
                delta_state=MACRO_THRESHOLD 
            )

            if out is None:
                row = {"noise_level": p, "embedding": name, "status": "skipped"}
            else:
                row = {
                    "noise_level": p,
                    "repeat": rep,
                    "embedding": name,
                    "E_star_k": out["E_star_k"],
                    "fold_frac": out["fold_frac"], 
                    "fold_any": out["fold_any"],   
                    "status": "ok"
                }
            all_rows.append(row)

df_raw = pd.DataFrame(all_rows)

df_ok = df_raw[df_raw["status"] == "ok"].copy()

summary = (
    df_ok.groupby(["noise_level", "embedding"])
            .agg(
                E_star_mean=("E_star_k", "mean"),
                E_star_std=("E_star_k", "std"),
                Fold_Frac_mean=("fold_frac", "mean"), 
                Fold_Any_mean=("fold_any", "mean")
            )
            .reset_index()
)

print("\n=== Final Results (Sorted by E*) ===")
pd.set_option('display.max_rows', None)
pd.set_option('display.precision', 4)
print(summary.sort_values("E_star_mean"))

Simulating Rössler system...
Attractor Diameter (approx): 29.9115
Ground Truth FNN Threshold (5%): 1.4956
Starting experiment on 12 embeddings...

=== Final Results (Sorted by E*) ===
    noise_level embedding  E_star_mean  E_star_std  Fold_Frac_mean  \
7           0.0     y_y_y       0.0452      0.0005          0.0241   
3           0.0     x_y_x       0.0455      0.0005          0.0243   
0           0.0  x_plus_y       0.0897      0.0022          0.0358   
2           0.0     x_x_x       0.1090      0.0031          0.0374   
6           0.0  y_plus_z       0.7332      0.0321          0.0489   
1           0.0  x_plus_z       1.8802      0.0885          0.0616   
4           0.0     x_z_x       4.7021      0.1789          0.1015   
8           0.0     y_z_y       4.7087      0.1795          0.0995   
9           0.0     y_z_z      12.1080      0.4089          0.1231   
5           0.0     x_z_z      12.1550      0.4106          0.1660   
10          0.0     z_z_z      14.3654      0.