# üî¨ Experiment 1: Per-Layer Ricci Coefficients (Complete Pipeline)

**End-to-end notebook** that:
1. Trains 45 DNN architectures on Fashion-MNIST (Sandals vs Boots) **until 98% accuracy**
2. Extracts hidden-layer activations
3. Builds kNN graphs & computes 3 curvature types
4. Computes per-layer Ricci coefficients
5. Saves CSV & generates plots

| Curvature | Prefix | Formula |
|-----------|--------|---------|
| Forman-Ricci | FR | $4 - \deg(i) - \deg(j)$ |
| Augmented-Forman-Ricci | AFR | $4 - \deg(i) - \deg(j) + 3\triangle_{ij}$ |
| Approx-Ollivier-Ricci | AOR | Tian et al. (2023) |

---
## 1. Setup & Imports

In [None]:
import os, time, warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
from typing import List, Dict
from tqdm.notebook import tqdm

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.datasets import fashion_mnist

from sklearn.neighbors import kneighbors_graph
from scipy.sparse import csr_matrix, triu as sp_triu, lil_matrix
from scipy.sparse.csgraph import dijkstra
from scipy.stats import pearsonr

# GPU setup
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    print(f'‚úÖ GPU available: {[g.name for g in gpus]}')
else:
    print('‚ö†Ô∏è  No GPU found ‚Äî training will run on CPU')

warnings.filterwarnings('ignore', category=UserWarning)
matplotlib.rcParams.update({'font.size': 12})
print(f'TensorFlow: {tf.__version__}, NumPy: {np.__version__}')

---
## 2. Configuration

In [None]:
K = 350
TARGET_ACCURACY = 0.98
MAX_EPOCHS = 300
BATCH_SIZE = 32
OUTPUT_DIR = 'output_experiment_1'
ACTIVATIONS_DIR = 'output_k_sweep'

FLAT_DEPTHS = list(range(4, 13))
FLAT_WIDTHS = [16, 32, 64, 128]
BOTTLENECK_DEPTHS = list(range(4, 13))
BOTTLENECK_WIDTH = 128

CURVATURE_TYPES = ['Forman-Ricci', 'Augmented-Forman-Ricci', 'Approx-Ollivier-Ricci']
CURVATURE_PREFIXES = {'Forman-Ricci': 'FR', 'Augmented-Forman-Ricci': 'AFR', 'Approx-Ollivier-Ricci': 'AOR'}

NETWORKS = []
for d in FLAT_DEPTHS:
    for w in FLAT_WIDTHS:
        NETWORKS.append(('flat', d, w))
for d in BOTTLENECK_DEPTHS:
    NETWORKS.append(('bottleneck', d, BOTTLENECK_WIDTH))

print(f'üìã K={K}, Target={TARGET_ACCURACY*100:.0f}%, MaxEpochs={MAX_EPOCHS}')
print(f'   Networks={len(NETWORKS)} ({len(FLAT_DEPTHS)*len(FLAT_WIDTHS)} flat + {len(BOTTLENECK_DEPTHS)} bottleneck)')
print(f'   Curvatures={CURVATURE_TYPES}')

---
## 3. Load Fashion-MNIST Data
Binary classification: **Sandals (5) vs Ankle Boots (9)**

In [None]:
(x_train_full, y_train_full), (x_test_full, y_test_full) = fashion_mnist.load_data()
x_train_full = x_train_full.reshape(-1, 784).astype(np.float32)
x_test_full = x_test_full.reshape(-1, 784).astype(np.float32)

labels = [5, 9]
train_idx = np.isin(y_train_full, labels)
test_idx = np.isin(y_test_full, labels)
x_train = x_train_full[train_idx]
y_train = (y_train_full[train_idx] == 9).astype(np.int32)
x_test = x_test_full[test_idx]
y_test = (y_test_full[test_idx] == 9).astype(np.int32)

print(f'üìä Train: {x_train.shape}, Test: {x_test.shape}')

---
## 4. DNN Architecture Builders

In [None]:
def build_flat_model(depth, width, input_dim=784):
    model = Sequential()
    model.add(Dense(width, activation='relu', input_shape=(input_dim,)))
    for _ in range(depth - 1):
        model.add(Dense(width, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    return model

def build_bottleneck_model(depth, width, input_dim=784):
    half = depth // 2
    if depth <= 4:
        sizes = [width, 32, 32, width][:depth]
    else:
        comp = []; cur = width
        for _ in range(half):
            comp.append(cur)
            if cur > 32: cur = max(32, cur // 2)
        mid = [32] * max(1, depth - 2*len(comp))
        sizes = (comp + mid + comp[::-1])[:depth]
    model = Sequential()
    model.add(Dense(sizes[0], activation='relu', input_shape=(input_dim,)))
    for s in sizes[1:]:
        model.add(Dense(s, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    return model

def get_layer_structure_str(arch, depth, width):
    if arch == 'flat': return '‚Üí'.join([str(width)] * depth)
    m = build_bottleneck_model(depth, width)
    return '‚Üí'.join(str(l.units) for l in m.layers[:-1])

print('‚úÖ Architecture builders defined')

---
## 5. Train All Networks & Save Activations
Trains until **98% val accuracy** or max 300 epochs.

In [None]:
class StopAtAccuracy(tf.keras.callbacks.Callback):
    def __init__(self, target=0.98):
        super().__init__(); self.target = target
    def on_epoch_end(self, epoch, logs=None):
        if logs.get('val_accuracy', 0) >= self.target:
            self.model.stop_training = True

class TqdmProgressCallback(tf.keras.callbacks.Callback):
    def __init__(self, target_acc):
        super().__init__(); self.target_acc = target_acc; self.pbar = None
    def on_train_begin(self, logs=None):
        self.pbar = tqdm(total=self.params['epochs'], desc='    Training',
                         unit='ep', leave=False, bar_format='{l_bar}{bar:20}{r_bar}')
    def on_epoch_end(self, epoch, logs=None):
        self.pbar.set_postfix({'loss': f'{logs.get("loss",0):.4f}',
                               'val_acc': f'{logs.get("val_accuracy",0):.3f}'})
        self.pbar.update(1)
    def on_train_end(self, logs=None):
        if self.pbar: self.pbar.close()

os.makedirs(ACTIVATIONS_DIR, exist_ok=True)
training_results = []
total = len(NETWORKS)
print(f'üèãÔ∏è Training {total} networks (target: {TARGET_ACCURACY*100:.0f}%)\n')
t_start = time.time()

pbar = tqdm(NETWORKS, desc='Networks', unit='net', bar_format='{l_bar}{bar:30}{r_bar}')
for idx, (arch, depth, width) in enumerate(pbar):
    folder_name = f'{arch}_{depth}_{width}'
    pbar.set_postfix_str(folder_name)
    net_dir = os.path.join(ACTIVATIONS_DIR, folder_name)
    act_path = os.path.join(net_dir, 'activations.npy')
    acc_path = os.path.join(net_dir, 'accuracy.npy')

    if os.path.exists(act_path) and os.path.exists(acc_path):
        acc = np.load(acc_path)[0]
        if acc >= TARGET_ACCURACY:
            training_results.append((arch, depth, width, acc, 'skipped'))
            tqdm.write(f'  [{idx+1:2d}/{total}] {folder_name:20s} ‚Üí SKIP (acc={acc:.3f})')
            continue
        tqdm.write(f'  [{idx+1:2d}/{total}] {folder_name:20s} ‚Üí RETRAIN (acc={acc:.3f}<target)')

    model = build_flat_model(depth, width) if arch=='flat' else build_bottleneck_model(depth, width)
    model.compile(loss='binary_crossentropy', optimizer=RMSprop(), metrics=['accuracy'])
    history = model.fit(x_train, y_train, epochs=MAX_EPOCHS, batch_size=BATCH_SIZE,
                        validation_split=0.2, verbose=0,
                        callbacks=[StopAtAccuracy(TARGET_ACCURACY), TqdmProgressCallback(TARGET_ACCURACY)])
    ep = len(history.history['loss'])
    accuracy = model.evaluate(x_test, y_test, verbose=0)[1]
    tag = f'‚úì ({ep}ep)' if accuracy >= TARGET_ACCURACY else f'‚ö† (max {MAX_EPOCHS}ep)'

    activations = []
    inp = x_test
    for layer in model.layers[:-1]:
        inp = layer(inp); activations.append(inp.numpy())

    os.makedirs(net_dir, exist_ok=True)
    arr = np.empty(len(activations), dtype=object)
    for i, a in enumerate(activations): arr[i] = a
    np.save(act_path, arr); np.save(acc_path, np.array([accuracy]))

    training_results.append((arch, depth, width, accuracy, 'trained'))
    tqdm.write(f'  [{idx+1:2d}/{total}] {folder_name:20s} ‚Üí acc={accuracy:.3f} {tag}')
    del model, activations, arr, history
    tf.keras.backend.clear_session()

pbar.close()
trained = [r for r in training_results if r[4]=='trained']
print(f'\n‚úÖ Done in {time.time()-t_start:.0f}s ‚Äî Trained:{len(trained)}, Skipped:{len(training_results)-len(trained)}')

---
## 6. Curvature Computation Functions
Three types of discrete Ricci curvature, computed edge-wise on kNN graphs.

In [None]:
def build_knn_graph(X, k):
    if X.ndim == 1: X = X.reshape(-1, 1)
    if X.dtype != np.float32 and X.dtype != np.float64:
        X = X.astype(np.float32, copy=False)
    A = kneighbors_graph(X, k, mode='connectivity', include_self=False)
    return A.maximum(A.T).tocsr()

def compute_forman_ricci_matrix(A):
    deg = np.asarray(A.sum(axis=1)).ravel()
    Ric = lil_matrix(A.shape, dtype=np.float32)
    rows, cols = sp_triu(A, k=1).nonzero()
    for i, j in zip(rows, cols):
        c = 4.0 - deg[i] - deg[j]
        Ric[i,j] = c; Ric[j,i] = c
    return Ric.tocsr()

def compute_augmented_forman_ricci_matrix(A):
    deg = np.asarray(A.sum(axis=1)).ravel()
    A2 = A @ A
    Ric = lil_matrix(A.shape, dtype=np.float32)
    rows, cols = sp_triu(A, k=1).nonzero()
    for i, j in zip(rows, cols):
        c = 4.0 - deg[i] - deg[j] + 3.0*A2[i,j]
        Ric[i,j] = c; Ric[j,i] = c
    return Ric.tocsr()

def compute_approx_ollivier_ricci_matrix(A):
    deg = np.asarray(A.sum(axis=1)).ravel()
    A2 = A @ A
    Ric = lil_matrix(A.shape, dtype=np.float32)
    rows, cols = sp_triu(A, k=1).nonzero()
    for i, j in zip(rows, cols):
        t = A2[i,j]; di, dj = deg[i], deg[j]
        c = (0.5*(t/max(di,dj)) - 0.5*(max(0, 1-1/di-1/dj-t/min(di,dj))
             + max(0, 1-1/di-1/dj-t/max(di,dj)) - t/max(di,dj)))
        Ric[i,j] = c; Ric[j,i] = c
    return Ric.tocsr()

def compute_curvature_matrix(A, curv, apsp=None):
    if curv == 'Forman-Ricci':            return compute_forman_ricci_matrix(A)
    elif curv == 'Augmented-Forman-Ricci': return compute_augmented_forman_ricci_matrix(A)
    elif curv == 'Approx-Ollivier-Ricci':  return compute_approx_ollivier_ricci_matrix(A)
    else: raise ValueError(f'Unknown: {curv}')

print('‚úÖ Curvature functions defined (FR, AFR, AOR)')

---
## 7. Layer Ricci Coefficient Computation (Optimized)
kNN graphs and APSP computed **once per network**, reused for all 3 curvature types.

In [None]:
def precompute_graphs(activations, k):
    """Build kNN graphs and APSP ONCE for all layers."""
    L = len(activations)
    knn_graphs = []
    for i in tqdm(range(L), desc='      kNN graphs', leave=False, bar_format='{l_bar}{bar:15}{r_bar}'):
        knn_graphs.append(build_knn_graph(activations[i], k))
    apsps = []
    for i in tqdm(range(L), desc='      APSP      ', leave=False, bar_format='{l_bar}{bar:15}{r_bar}'):
        apsps.append(dijkstra(csgraph=knn_graphs[i], directed=False, unweighted=True, return_predecessors=False))
    return knn_graphs, apsps

def compute_ricci_for_curvature(knn_graphs, apsps, curv='Forman-Ricci'):
    """Compute per-layer Ricci coefficients using PRECOMPUTED graphs."""
    L = len(knn_graphs); N = knn_graphs[0].shape[0]
    if L < 2: return np.array([])

    curvatures = []
    for i in tqdm(range(L-1), desc=f'      Curvature ', leave=False, bar_format='{l_bar}{bar:15}{r_bar}'):
        curvatures.append(compute_curvature_matrix(knn_graphs[i], curv, apsp=apsps[i]))

    layer_coeffs = np.empty(L-1, dtype=np.float32)
    for i in tqdm(range(L-1), desc=f'      Pearson r ', leave=False, bar_format='{l_bar}{bar:15}{r_bar}'):
        sc, eta = [], []
        for x in range(N):
            nb = knn_graphs[i][x].indices
            if len(nb) == 0: continue
            ok = True; exp = 0.0
            for y in nb:
                if np.isinf(apsps[i+1][x,y]): ok = False; break
                exp += apsps[i+1][x,y] - apsps[i][x,y]
            if ok:
                sc.append(curvatures[i][x, nb].sum() / len(nb))
                eta.append(exp / len(nb))
        layer_coeffs[i] = pearsonr(sc, eta)[0] if len(sc) >= 2 else np.nan
    return layer_coeffs

print('‚úÖ Optimized Ricci functions defined (graphs computed once, reused for all curvatures)')

---
## 8. Run Ricci Analysis on All Networks

In [None]:
all_rows = []
total = len(NETWORKS)
print(f'üî¨ Ricci analysis: {total} networks √ó {len(CURVATURE_TYPES)} curvatures\n')
t_start = time.time()

pbar = tqdm(NETWORKS, desc='Ricci Analysis', unit='net', bar_format='{l_bar}{bar:30}{r_bar}')
for idx, (arch, depth, width) in enumerate(pbar):
    folder_name = f'{arch}_{depth}_{width}'
    pbar.set_postfix_str(folder_name)
    act_path = os.path.join(ACTIVATIONS_DIR, folder_name, 'activations.npy')
    acc_path = os.path.join(ACTIVATIONS_DIR, folder_name, 'accuracy.npy')

    if not os.path.exists(act_path):
        tqdm.write(f'  [{idx+1:2d}/{total}] {folder_name:20s} ‚Üí SKIP (no activations)')
        continue

    activations = list(np.load(act_path, allow_pickle=True))
    accuracy = np.load(acc_path)[0] if os.path.exists(acc_path) else np.nan
    row = {'network_id': idx+1, 'architecture': arch, 'depth': depth, 'width': width,
           'layer_structure': get_layer_structure_str(arch, depth, width), 'accuracy': accuracy}

    tqdm.write(f'  [{idx+1:2d}/{total}] {folder_name:20s} ‚Äî building graphs ({len(activations)} layers)...')
    knn_graphs, apsps = precompute_graphs(activations, K)

    curv_status = []
    for curv_type in tqdm(CURVATURE_TYPES, desc='    Curvatures', leave=False, bar_format='{l_bar}{bar:15}{r_bar}'):
        prefix = CURVATURE_PREFIXES[curv_type]
        try:
            lc = compute_ricci_for_curvature(knn_graphs, apsps, curv=curv_type)
            for i in range(1, 13):
                row[f'{prefix}_L{i}'] = float(lc[i-1]) if i <= len(lc) else np.nan
            curv_status.append(f'{prefix}‚úì')
        except Exception as e:
            for i in range(1, 13): row[f'{prefix}_L{i}'] = np.nan
            curv_status.append(f'{prefix}‚úó')
            tqdm.write(f'      ‚ö† {curv_type}: {e}')

    del knn_graphs, apsps, activations
    all_rows.append(row)
    tqdm.write(f'  [{idx+1:2d}/{total}] {folder_name:20s} acc={accuracy:.3f}  {", ".join(curv_status)}')

pbar.close()
print(f'\n‚úÖ Ricci analysis complete in {time.time()-t_start:.0f}s ({len(all_rows)} networks)')

---
## 9. Save CSV

In [None]:
base_cols = ['network_id', 'architecture', 'depth', 'width', 'layer_structure', 'accuracy']
layer_cols = [f'{CURVATURE_PREFIXES[ct]}_L{i}' for ct in CURVATURE_TYPES for i in range(1, 13)]
all_cols = base_cols + layer_cols
cols = [c for c in all_cols if c in all_rows[0]]
df = pd.DataFrame(all_rows)[cols]

os.makedirs(OUTPUT_DIR, exist_ok=True)
csv_path = os.path.join(OUTPUT_DIR, 'exp1_final_epoch.csv')
df.to_csv(csv_path, index=False)
print(f'üíæ Saved: {csv_path} ‚Äî {df.shape}')
df.head()

---
## 10. Plots ‚Äî Layer-Ricci vs Layer & Accuracy vs Depth

In [None]:
def plot_layer_ricci(df, curv_type, prefix, architecture):
    df_arch = df[df['architecture'] == architecture]
    if df_arch.empty: return
    for w in sorted(df_arch['width'].unique()):
        df_w = df_arch[df_arch['width'] == w].sort_values('depth')
        if df_w.empty: continue
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5), gridspec_kw={'width_ratios': [2, 1]})
        depths = sorted(df_w['depth'].unique())
        colors = plt.cm.viridis(np.linspace(0.1, 0.9, len(depths)))
        for d, c in zip(depths, colors):
            row = df_w[df_w['depth']==d].iloc[0]
            vals = [row[f'{prefix}_L{i}'] for i in range(1,13) if f'{prefix}_L{i}' in row.index]
            vals = [v for v in vals if not np.isnan(v)]
            if vals: ax1.plot(range(1,len(vals)+1), vals, 'o-', color=c, label=f'D{d}', lw=1.5, ms=4)
        ax1.set_xlabel('Layer'); ax1.set_ylabel('Layer-Ricci coef')
        ax1.legend(fontsize=9, ncol=2); ax1.axhline(0, color='gray', ls='--', alpha=.5); ax1.grid(True, alpha=.3)
        ax2.plot(df_w['depth'], df_w['accuracy']*100, 'o-', color='steelblue', lw=2, ms=6)
        ax2.set_xlabel('Depth'); ax2.set_ylabel('Test Accuracy (%)'); ax2.grid(True, alpha=.3)
        fig.suptitle(f'{curv_type} ‚Äî {architecture.capitalize()} (w={w})', fontsize=14, fontweight='bold')
        plt.tight_layout()
        fname = f'plot_{prefix}_{architecture}_w{w}.png'
        plt.savefig(os.path.join(OUTPUT_DIR, fname), dpi=150, bbox_inches='tight')
        plt.show(); print(f'  Saved: {fname}')

print('üìä Flat Networks\n')
for ct in CURVATURE_TYPES: plot_layer_ricci(df, ct, CURVATURE_PREFIXES[ct], 'flat')
print('\nüìä Bottleneck Networks\n')
for ct in CURVATURE_TYPES: plot_layer_ricci(df, ct, CURVATURE_PREFIXES[ct], 'bottleneck')

---
## 11. Curvature Comparison Plot

In [None]:
def plot_comparison(df, architecture='flat', width=128):
    df_s = df[(df['architecture']==architecture)&(df['width']==width)].sort_values('depth')
    if df_s.empty: print(f'No data for {architecture} w={width}'); return
    n = len(CURVATURE_TYPES)
    fig, axes = plt.subplots(1, n, figsize=(5*n, 5), sharey=True)
    if n == 1: axes = [axes]
    depths = sorted(df_s['depth'].unique())
    colors = plt.cm.viridis(np.linspace(0.1, 0.9, len(depths)))
    for ax, ct in zip(axes, CURVATURE_TYPES):
        p = CURVATURE_PREFIXES[ct]
        for d, c in zip(depths, colors):
            row = df_s[df_s['depth']==d].iloc[0]
            vals = [row[f'{p}_L{i}'] for i in range(1,13) if f'{p}_L{i}' in row.index and not np.isnan(row[f'{p}_L{i}'])]
            if vals: ax.plot(range(1,len(vals)+1), vals, 'o-', color=c, label=f'D{d}', lw=1.5, ms=3)
        ax.set_title(p, fontsize=12, fontweight='bold'); ax.set_xlabel('Layer')
        ax.axhline(0, color='gray', ls='--', alpha=.5); ax.grid(True, alpha=.3); ax.legend(fontsize=7, ncol=2)
    axes[0].set_ylabel('Layer-Ricci coef')
    fig.suptitle(f'Curvature Comparison ‚Äî {architecture.capitalize()} (w={width})', fontsize=14, fontweight='bold')
    plt.tight_layout()
    fname = f'plot_comparison_{architecture}_w{width}.png'
    plt.savefig(os.path.join(OUTPUT_DIR, fname), dpi=150, bbox_inches='tight')
    plt.show(); print(f'Saved: {fname}')

plot_comparison(df, 'flat', 128)
plot_comparison(df, 'flat', 64)
plot_comparison(df, 'bottleneck', 128)

---
## 12. Summary Statistics

In [None]:
print(f'{"Curvature":30s}  {"Mean":>8s}  {"Std":>8s}  {"Min":>8s}  {"Max":>8s}  {"% < 0":>8s}')
print('-' * 80)
for ct in CURVATURE_TYPES:
    p = CURVATURE_PREFIXES[ct]
    cols = [f'{p}_L{i}' for i in range(1,13) if f'{p}_L{i}' in df.columns]
    v = df[cols].values.flatten(); v = v[~np.isnan(v)]
    if len(v): print(f'{ct:30s}  {v.mean():8.4f}  {v.std():8.4f}  {v.min():8.4f}  {v.max():8.4f}  {100*(v<0).mean():7.1f}%')
print()
for a in ['flat','bottleneck']:
    d = df[df['architecture']==a]
    print(f'  {a.capitalize():12s}: acc mean={d["accuracy"].mean():.3f}, min={d["accuracy"].min():.3f}, max={d["accuracy"].max():.3f}')

In [None]:
# üì• Download all outputs as ZIP
import zipfile
from IPython.display import FileLink
zip_name = 'experiment_1_results.zip'
with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zf:
    for root, dirs, files in os.walk(OUTPUT_DIR):
        for f in files: zf.write(os.path.join(root, f))
print(f'\nüèÅ Done! Output: {OUTPUT_DIR}/ ({len([f for f in os.listdir(OUTPUT_DIR) if f.endswith(".png")])} plots + CSV)')
FileLink(zip_name)