# Baseline 1-PH: Synthetic Shapes

Three 1-parameter persistence baselines:
- **Alpha 1-PH**: standard Alpha complex persistence
- **Codensity lower-star**: lower-star filtration from codensity (distance to k-th neighbor)
- **Eccentricity lower-star**: lower-star filtration from eccentricity

Same data generation, same classification pipeline as `Synthetic_noise.ipynb`.

In [None]:
import numpy as np
import gudhi as gd
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import pairwise_distances as pdist_sklearn
from sklearn.model_selection import train_test_split
from collections import Counter
from tqdm import tqdm
import time, warnings
warnings.filterwarnings('ignore')
#np.random.seed(42)

In [16]:
# --- Parameters (same as Synthetic_noise.ipynb) ---
n = 1000
n_ex = 40
k_neighbors = 15
BOTTLENECK_E = 0.01
K_KNN = 3
NOISE_LEVELS = [0.0, 0.03, 0.06, 0.09, 0.12]

class_names = ['Circle', 'Sphere', 'Torus', '3 Clusters', '2 Circles']
n_classes = len(class_names)

In [17]:
# --- Generators (identical to Synthetic_noise.ipynb) ---

def generate_circle(n, noise=0.0):
    theta = np.random.uniform(0, 2 * np.pi, n)
    pts = np.column_stack([0.5 + 0.4 * np.cos(theta),
                           0.5 * np.ones(n),
                           0.5 + 0.4 * np.sin(theta)])
    if noise > 0: pts += np.random.normal(0, noise, pts.shape)
    return pts

def generate_sphere(n, noise=0.0):
    phi = np.random.uniform(0, 2 * np.pi, n)
    cos_theta = np.random.uniform(-1, 1, n)
    sin_theta = np.sqrt(1 - cos_theta**2)
    pts = np.column_stack([0.5 + 0.4 * sin_theta * np.cos(phi),
                           0.5 + 0.4 * sin_theta * np.sin(phi),
                           0.5 + 0.4 * cos_theta])
    if noise > 0: pts += np.random.normal(0, noise, pts.shape)
    return pts

def generate_torus(n, noise=0.0):
    R, r = 0.3, 0.1
    theta = np.random.uniform(0, 2 * np.pi, n)
    phi = np.random.uniform(0, 2 * np.pi, n)
    pts = np.column_stack([0.5 + (R + r*np.cos(phi)) * np.cos(theta),
                           0.5 + (R + r*np.cos(phi)) * np.sin(theta),
                           0.5 + r * np.sin(phi)])
    if noise > 0: pts += np.random.normal(0, noise, pts.shape)
    return pts

def generate_three_clusters(n, noise=0.0):
    centers = np.array([[0.25, 0.25, 0.5], [0.75, 0.25, 0.5], [0.5, 0.75, 0.5]])
    n_per = n // 3
    parts = [c + np.random.normal(0, 0.06, (n_per if i < 2 else n - 2*n_per, 3))
             for i, c in enumerate(centers)]
    pts = np.vstack(parts)
    if noise > 0: pts += np.random.normal(0, noise, pts.shape)
    return pts

def generate_two_circles(n, noise=0.0):
    n1 = n // 2; n2 = n - n1
    t1 = np.random.uniform(0, 2 * np.pi, n1)
    t2 = np.random.uniform(0, 2 * np.pi, n2)
    pts1 = np.column_stack([0.5 + 0.2*np.cos(t1), 0.5 + 0.2*np.sin(t1), 0.5*np.ones(n1)])
    pts2 = np.column_stack([0.5 + 0.4*np.cos(t2), 0.5 + 0.4*np.sin(t2), 0.5*np.ones(n2)])
    pts = np.vstack([pts1, pts2])
    if noise > 0: pts += np.random.normal(0, noise, pts.shape)
    return pts

generators = [generate_circle, generate_sphere, generate_torus,
              generate_three_clusters, generate_two_circles]

In [18]:
# --- Core functions ---

def compute_codensity(points, k):
    nn = NearestNeighbors(n_neighbors=k+1).fit(points)
    dists, _ = nn.kneighbors(points)
    return dists[:, -1]

def compute_eccentricity(points):
    return np.max(pdist_sklearn(points), axis=1)

def normalize_01(f):
    fmin, fmax = f.min(), f.max()
    if fmax > fmin:
        return (f - fmin) / (fmax - fmin)
    return f

def safe_bottleneck(pd1, pd2, e=0.0):
    if len(pd1) == 0 and len(pd2) == 0: return 0.0
    if len(pd1) == 0: return float(np.max((pd2[:,1]-pd2[:,0])/2))
    if len(pd2) == 0: return float(np.max((pd1[:,1]-pd1[:,0])/2))
    return gd.bottleneck_distance(pd1, pd2, e)

def extract_finite_pd(st, dim):
    pd = st.persistence_intervals_in_dimension(dim)
    if pd is not None and len(pd) > 0:
        pd = pd[np.isfinite(pd[:, 1])]
    if pd is None or len(pd) == 0:
        pd = np.empty((0, 2))
    return pd

def lower_star_pd(points, func_values):
    """Alpha complex with lower-star filtration from vertex function."""
    ac = gd.AlphaComplex(points=points)
    st = ac.create_simplex_tree()
    for simplex, _ in st.get_simplices():
        val = max(func_values[v] for v in simplex)
        st.assign_filtration(simplex, float(val))
    st.make_filtration_non_decreasing()
    st.persistence()
    return (extract_finite_pd(st, 0), extract_finite_pd(st, 1))

def alpha_pd(points):
    """Standard Alpha complex persistence."""
    ac = gd.AlphaComplex(points=points)
    st = ac.create_simplex_tree()
    st.persistence()
    return (extract_finite_pd(st, 0), extract_finite_pd(st, 1))


def classify_knn_most_confident(D_train_list, D_test_list,
                                labels_train, labels_test, k=3):
    labels_train = np.asarray(labels_train)
    labels_test = np.asarray(labels_test)
    n_degrees = len(D_train_list)
    n_test = D_test_list[0].shape[0]
    final = np.zeros(n_test, dtype=int)
    for i in range(n_test):
        best_conf, best_pred, best_dist = -1.0, 0, np.inf
        for d in range(n_degrees):
            dists_i = D_test_list[d][i]
            k_nearest = np.argsort(dists_i)[:k]
            nn_labels = labels_train[k_nearest]
            pred = Counter(nn_labels).most_common(1)[0][0]
            conf = np.sum(nn_labels == pred) / k
            avg_d = np.mean(dists_i[k_nearest])
            if conf > best_conf or (conf == best_conf and avg_d < best_dist):
                best_conf, best_pred, best_dist = conf, pred, avg_d
        final[i] = best_pred
    return float(np.mean(final == labels_test))


def extract_distance_blocks(D_full, train_idx, test_idx):
    return D_full[np.ix_(train_idx, train_idx)], D_full[np.ix_(test_idx, train_idx)]

## Run

In [19]:
methods = ['Alpha 1-PH', 'Codensity lower-star', 'Ecc lower-star']
all_results = {}

for noise in NOISE_LEVELS:
    print(f'\n{"="*60}')
    print(f'NOISE = {noise}')
    print(f'{"="*60}')

    # Generate data
    #np.random.seed(42)
    Data, Labels = [], []
    for cls_idx, gen in enumerate(generators):
        for _ in range(n_ex):
            Labels.append(cls_idx)
            Data.append(gen(n, noise=noise))
    Labels = np.array(Labels)
    N = len(Data)

    # Compute PDs for each method
    PDs = {m: [] for m in methods}

    for i in tqdm(range(N), desc='Computing PDs'):
        # Alpha 1-PH
        PDs['Alpha 1-PH'].append(alpha_pd(Data[i]))

        # Codensity lower-star
        cod = normalize_01(compute_codensity(Data[i], k_neighbors))
        PDs['Codensity lower-star'].append(lower_star_pd(Data[i], cod))

        # Eccentricity lower-star
        ecc = normalize_01(compute_eccentricity(Data[i]))
        PDs['Ecc lower-star'].append(lower_star_pd(Data[i], ecc))

    # Distance matrices
    Ds = {}
    for mname in methods:
        print(f'  Distances: {mname}')
        D_H0 = np.zeros((N, N))
        D_H1 = np.zeros((N, N))
        for i in tqdm(range(N), leave=False):
            for j in range(i+1, N):
                d0 = safe_bottleneck(PDs[mname][i][0], PDs[mname][j][0], BOTTLENECK_E)
                d1 = safe_bottleneck(PDs[mname][i][1], PDs[mname][j][1], BOTTLENECK_E)
                D_H0[i,j] = D_H0[j,i] = d0
                D_H1[i,j] = D_H1[j,i] = d1
        Ds[mname] = (D_H0, D_H1)

    # 10-fold CV
    accs = {m: [] for m in methods}
    for fold in range(10):
        idx_tr, idx_te, lab_tr, lab_te = train_test_split(
            np.arange(N), Labels, train_size=0.7, stratify=Labels, random_state=42+fold)
        for mname in methods:
            D0, D1 = Ds[mname]
            blocks = [extract_distance_blocks(D, idx_tr, idx_te) for D in [D0, D1]]
            accs[mname].append(classify_knn_most_confident(
                [b[0] for b in blocks], [b[1] for b in blocks],
                lab_tr, lab_te, K_KNN))

    all_results[noise] = accs
    for m in methods:
        print(f'  {m:>25}: {np.mean(accs[m]):.2%} ± {np.std(accs[m]):.2%}')


NOISE = 0.0


Computing PDs: 100%|██████████| 200/200 [01:08<00:00,  2.92it/s]


  Distances: Alpha 1-PH


                                                  

  Distances: Codensity lower-star


                                                  

  Distances: Ecc lower-star


                                                

                 Alpha 1-PH: 61.17% ± 14.16%
       Codensity lower-star: 93.17% ± 3.37%
             Ecc lower-star: 71.33% ± 7.85%

NOISE = 0.03


Computing PDs: 100%|██████████| 200/200 [00:45<00:00,  4.38it/s]


  Distances: Alpha 1-PH


                                                  

  Distances: Codensity lower-star


                                                  

  Distances: Ecc lower-star


                                       

                 Alpha 1-PH: 48.00% ± 8.69%
       Codensity lower-star: 95.67% ± 2.38%
             Ecc lower-star: 23.50% ± 4.18%

NOISE = 0.06


Computing PDs: 100%|██████████| 200/200 [00:47<00:00,  4.23it/s]


  Distances: Alpha 1-PH


                                                  

  Distances: Codensity lower-star


                                                  

  Distances: Ecc lower-star


                                       

                 Alpha 1-PH: 34.00% ± 9.20%
       Codensity lower-star: 81.67% ± 3.94%
             Ecc lower-star: 22.00% ± 2.87%

NOISE = 0.09


Computing PDs: 100%|██████████| 200/200 [00:47<00:00,  4.17it/s]


  Distances: Alpha 1-PH


                                                  

  Distances: Codensity lower-star


                                                  

  Distances: Ecc lower-star


                                       

                 Alpha 1-PH: 38.17% ± 4.04%
       Codensity lower-star: 78.00% ± 4.64%
             Ecc lower-star: 24.83% ± 6.85%

NOISE = 0.12


Computing PDs: 100%|██████████| 200/200 [00:46<00:00,  4.28it/s]


  Distances: Alpha 1-PH


                                                  

  Distances: Codensity lower-star


                                                  

  Distances: Ecc lower-star


                                       

                 Alpha 1-PH: 31.50% ± 3.83%
       Codensity lower-star: 69.33% ± 3.51%
             Ecc lower-star: 22.17% ± 6.41%




## Results

In [20]:
def fmt(v): return f'{np.mean(v):.2%} ± {np.std(v):.2%}'

print('=' * 80)
print(f'{"Noise":<10}', end='')
for m in methods:
    print(f'{m:<25}', end='')
print()
print('=' * 80)
for noise in NOISE_LEVELS:
    r = all_results[noise]
    print(f'{noise:<10.2f}', end='')
    for m in methods:
        print(f'{fmt(r[m]):<25}', end='')
    print()
print('=' * 80)

Noise     Alpha 1-PH               Codensity lower-star     Ecc lower-star           
0.00      61.17% ± 14.16%          93.17% ± 3.37%           71.33% ± 7.85%           
0.03      48.00% ± 8.69%           95.67% ± 2.38%           23.50% ± 4.18%           
0.06      34.00% ± 9.20%           81.67% ± 3.94%           22.00% ± 2.87%           
0.09      38.17% ± 4.04%           78.00% ± 4.64%           24.83% ± 6.85%           
0.12      31.50% ± 3.83%           69.33% ± 3.51%           22.17% ± 6.41%           


In [21]:
np.save(f'Synthetic_1ph_{n:03d}.npy', all_results)