# Comprehensive Neural Analysis Example

This notebook demonstrates comprehensive neural analysis methods using synthetic datasets. It covers:

1. **Test Neural Analysis Methods** - Apply structure_index and shape_distance to various datasets
2. **Benchmark Dimensionality Reduction** - Compare PCA, UMAP, t-SNE, MDS, Isomap, LLE, Spectral
3. **Validate Decoding Approaches** - Test population vector and k-NN decoders with known ground truth
4. **Test Cell Type Classification** - Apply supervised and unsupervised classifiers to mixed populations
5. **Study Noise Effects** - Analyze how noise affects embedding quality and analysis methods

All visualizations use the **PlotGrid system** for consistency.


In [None]:
# Imports
import numpy as np
import pandas as pd
import time
from pathlib import Path
import warnings
warnings.filterwarnings("ignore")

# Neural analysis imports
from neural_analysis.data.synthetic_data import (
    generate_place_cells,
    generate_grid_cells,
    generate_head_direction_cells,
    generate_mixed_population_flexible,
)
from neural_analysis.learning.decoding import (
    population_vector_decoder,
    knn_decoder,
    evaluate_decoder,
)
from neural_analysis.learning.classification import (
    classify_cells,
    cluster_cells,
    extract_cell_features,
    compare_classifiers,
    compare_clusterers,
)
from neural_analysis.embeddings import compute_embedding, compute_multiple_embeddings
from neural_analysis.topology import compute_structure_index
from neural_analysis.metrics.distributions import shape_distance
from neural_analysis.plotting import (
    PlotGrid,
    PlotSpec,
    GridLayoutConfig,
    PlotConfig,
)

# Set random seed
np.random.seed(42)

print("✓ Imports successful")


## Section 1: Test Neural Analysis Methods with Datasets

Generate multiple synthetic datasets and apply structure_index and shape_distance to quantify manifold organization.


In [None]:
# Generate different cell types
print("Generating synthetic datasets...")

# Place cells
place_activity, place_meta = generate_place_cells(
    n_cells=50, n_samples=1000, arena_size=(2.0, 2.0), seed=42, plot=False
)

# Grid cells
grid_activity, grid_meta = generate_grid_cells(
    n_cells=50, n_samples=1000, arena_size=(2.0, 2.0), seed=43, plot=False
)

# Head direction cells
hd_activity, hd_meta = generate_head_direction_cells(
    n_cells=50, n_samples=1000, seed=44, plot=False
)

# Mixed population
mixed_activity, mixed_meta = generate_mixed_population_flexible(
    n_samples=1000, seed=45, plot=False
)

print(f"Place cells: {place_activity.shape}")
print(f"Grid cells: {grid_activity.shape}")
print(f"Head direction cells: {hd_activity.shape}")
print(f"Mixed population: {mixed_activity.shape}")


In [None]:
# Compute structure index for each dataset
print("\nComputing Structure Index...")

datasets = {
    'place': (place_activity, place_meta['positions']),
    'grid': (grid_activity, grid_meta['positions']),
    'head_direction': (hd_activity, hd_meta['head_directions'].reshape(-1, 1)),
    'mixed': (mixed_activity, mixed_meta['positions']),
}

si_results = {}
for name, (activity, labels) in datasets.items():
    try:
        si, _, _, _ = compute_structure_index(
            activity, labels, n_bins=10, n_neighbors=15, num_shuffles=10
        )
        si_results[name] = si
        print(f"  {name}: SI = {si:.3f}")
    except Exception as e:
        print(f"  {name}: Error - {e}")
        si_results[name] = np.nan


In [None]:
# Compute shape similarity between datasets
print("\nComputing shape similarity...")

# Compare place vs grid
place_sample = place_activity[:500]
grid_sample = grid_activity[:500]

shape_dist, _ = shape_distance(
    place_sample, grid_sample, method='procrustes'
)
print(f"Place vs Grid (Procrustes): {shape_dist:.3f}")

# Compare place vs mixed
mixed_sample = mixed_activity[:500]
shape_dist2, _ = shape_distance(
    place_sample, mixed_sample, method='procrustes'
)
print(f"Place vs Mixed (Procrustes): {shape_dist2:.3f}")


## Section 2: Benchmark Dimensionality Reduction Algorithms

Compare multiple DR methods on synthetic datasets with known structure.


In [None]:
# Use place cells for DR benchmarking
data = place_activity
labels = place_meta['positions']

# Compute multiple embeddings
print("Computing embeddings...")
methods = ['pca', 'umap', 'tsne', 'mds', 'isomap', 'lle', 'spectral']
embeddings = {}
times = {}

for method in methods:
    try:
        start = time.time()
        emb = compute_embedding(
            data, method=method, n_components=2, random_state=42
        )
        elapsed = time.time() - start
        embeddings[method] = emb
        times[method] = elapsed
        print(f"  {method}: {elapsed:.3f}s")
    except Exception as e:
        print(f"  {method}: Error - {e}")
        embeddings[method] = None


In [None]:
# Evaluate embeddings with structure index
print("\nEvaluating embeddings with structure index...")
embedding_si = {}

for method, emb in embeddings.items():
    if emb is not None:
        try:
            si, _, _, _ = compute_structure_index(
                emb, labels, n_bins=10, n_neighbors=15, num_shuffles=5
            )
            embedding_si[method] = si
            print(f"  {method}: SI = {si:.3f}")
        except Exception as e:
            print(f"  {method}: SI computation failed - {e}")
            embedding_si[method] = np.nan


In [None]:
# Visualize embeddings using PlotGrid
plot_specs = []
methods_to_plot = ['pca', 'umap', 'tsne', 'isomap']

for idx, method in enumerate(methods_to_plot):
    if embeddings.get(method) is not None:
        emb = embeddings[method]
        spec = PlotSpec(
            data={'x': emb[:, 0], 'y': emb[:, 1]},
            plot_type='scatter',
            subplot_position=idx,
            title=f'{method.upper()} Embedding',
            color_by=labels[:, 0],  # Color by X position
            cmap='viridis',
            marker_size=10,
            alpha=0.6,
            kwargs={'x_label': 'Dim 1', 'y_label': 'Dim 2'}
        )
        plot_specs.append(spec)

if plot_specs:
    grid = PlotGrid(
        plot_specs=plot_specs,
        config=PlotConfig(figsize=(16, 4)),
        layout=GridLayoutConfig(rows=1, cols=len(plot_specs)),
        backend='matplotlib'
    )
    fig = grid.plot()


## Section 3: Validate Decoding Approaches with Known Ground Truth

Test decoding methods on place cells, grid cells, and head direction cells with known positions/angles.


In [None]:
# Test population vector decoder on place cells
print("Testing Population Vector Decoder...")
decoded_pos = population_vector_decoder(
    place_activity, place_meta['field_centers'], method='weighted_average'
)

# Compute decoding error
errors = np.linalg.norm(decoded_pos - place_meta['positions'], axis=1)
print(f"  Mean error: {errors.mean():.3f} m")
print(f"  Median error: {np.median(errors):.3f} m")

# Test k-NN decoder
print("\nTesting k-NN Decoder...")
n_train = 700
train_act = place_activity[:n_train]
train_pos = place_meta['positions'][:n_train]
test_act = place_activity[n_train:]
test_pos = place_meta['positions'][n_train:]

decoded_knn = knn_decoder(train_act, train_pos, test_act, k=5)
errors_knn = np.linalg.norm(decoded_knn - test_pos, axis=1)
print(f"  Mean error: {errors_knn.mean():.3f} m")
print(f"  Median error: {np.median(errors_knn):.3f} m")


In [None]:
# Compare high-D vs low-D decoding
print("\nComparing High-D vs Low-D Decoding...")

# High-D (raw activity)
metrics_highd = evaluate_decoder(
    train_act, train_pos, test_act, test_pos, decoder='knn', k=5
)

# Low-D (PCA embedding)
pca_emb = compute_embedding(place_activity, method='pca', n_components=10, random_state=42)
train_emb = pca_emb[:n_train]
test_emb = pca_emb[n_train:]

metrics_lowd = evaluate_decoder(
    train_emb, train_pos, test_emb, test_pos, decoder='knn', k=5
)

print(f"High-D R²: {metrics_highd['r2_score']:.3f}, Error: {metrics_highd['mean_error']:.3f}")
print(f"Low-D R²: {metrics_lowd['r2_score']:.3f}, Error: {metrics_lowd['mean_error']:.3f}")


## Section 4: Test Cell Type Classification on Mixed Populations

Apply supervised and unsupervised classifiers to identify cell types in mixed populations.


In [None]:
# Extract features from mixed population
print("Extracting features from mixed population...")
mixed_features = extract_cell_features(mixed_activity, mixed_meta)
mixed_cell_types = mixed_meta['cell_types']

print(f"Features shape: {mixed_features.shape}")
print(f"Cell types: {np.unique(mixed_cell_types, return_counts=True)}")

# Split train/test
n_train = len(mixed_features) // 2
train_feat = mixed_features[:n_train]
train_labels = mixed_cell_types[:n_train]
test_feat = mixed_features[n_train:]
test_labels = mixed_cell_types[n_train:]

# Compare classifiers
print("\nComparing supervised classifiers...")
classifier_results = compare_classifiers(
    train_feat, train_labels, test_feat, test_labels,
    methods=['random_forest', 'svc', 'knn', 'logistic_regression'],
    random_state=42
)

# Display results
for method, metrics in classifier_results.items():
    if 'error' not in metrics:
        print(f"  {method}: Accuracy = {metrics['accuracy']:.3f}, F1 = {metrics['f1']:.3f}")


In [None]:
# Test unsupervised clustering
print("\nComparing unsupervised clusterers...")
n_clusters = len(np.unique(mixed_cell_types))

clustering_results = compare_clusterers(
    mixed_features,
    n_clusters=n_clusters,
    true_labels=mixed_cell_types,
    methods=['kmeans', 'gaussian_mixture', 'agglomerative'],
    random_state=42
)

# Display results
for method, metrics in clustering_results.items():
    if 'error' not in metrics:
        print(f"  {method}: ARI = {metrics.get('adjusted_rand_score', np.nan):.3f}, "
              f"Silhouette = {metrics['silhouette_score']:.3f}")


## Section 5: Study How Noise Affects Embedding Quality

Generate datasets with varying noise levels and analyze how noise affects embedding quality, structure index, and decoding performance.


In [None]:
# Generate datasets with varying noise levels
print("Generating datasets with varying noise...")
noise_levels = np.linspace(0.0, 1.0, 6)
noise_results = []

for noise in noise_levels:
    print(f"\nNoise level: {noise:.2f}")
    
    # Generate place cells with noise
    activity, meta = generate_place_cells(
        n_cells=50, n_samples=1000, arena_size=(2.0, 2.0),
        noise_level=noise, seed=42, plot=False
    )
    positions = meta['positions']
    
    # Compute structure index
    try:
        si, _, _, _ = compute_structure_index(
            activity, positions, n_bins=10, n_neighbors=15, num_shuffles=5
        )
    except:
        si = np.nan
    
    # Compute embeddings
    embeddings_noise = {}
    for method in ['pca', 'umap']:
        try:
            emb = compute_embedding(
                activity, method=method, n_components=2, random_state=42
            )
            embeddings_noise[method] = emb
        except:
            embeddings_noise[method] = None
    
    # Compute shape similarity to clean embedding (noise=0)
    if noise > 0 and embeddings_noise.get('pca') is not None:
        # Get clean embedding
        clean_activity, _ = generate_place_cells(
            n_cells=50, n_samples=1000, arena_size=(2.0, 2.0),
            noise_level=0.0, seed=42, plot=False
        )
        clean_emb = compute_embedding(
            clean_activity, method='pca', n_components=2, random_state=42
        )
        shape_dist, _ = shape_distance(
            clean_emb[:500], embeddings_noise['pca'][:500], method='procrustes'
        )
    else:
        shape_dist = 0.0
    
    # Test decoding
    n_train = 700
    train_act = activity[:n_train]
    train_pos = positions[:n_train]
    test_act = activity[n_train:]
    test_pos = positions[n_train:]
    
    metrics = evaluate_decoder(
        train_act, train_pos, test_act, test_pos, decoder='knn', k=5
    )
    
    noise_results.append({
        'noise': noise,
        'si': si,
        'shape_dist': shape_dist,
        'decoding_error': metrics['mean_error'],
        'decoding_r2': metrics['r2_score'],
    })
    
    print(f"  SI: {si:.3f}, Shape dist: {shape_dist:.3f}, "
          f"Decoding error: {metrics['mean_error']:.3f}")

noise_df = pd.DataFrame(noise_results)
print("\nNoise Analysis Results:")
print(noise_df.round(3))


In [None]:
# Visualize noise effects using PlotGrid
plot_specs = []

# Structure Index vs Noise
spec1 = PlotSpec(
    data={'x': noise_df['noise'], 'y': noise_df['si']},
    plot_type='line',
    subplot_position=0,
    title='Structure Index vs Noise',
    color='steelblue',
    marker='o',
    line_width=2,
    kwargs={'x_label': 'Noise Level', 'y_label': 'Structure Index'}
)
plot_specs.append(spec1)

# Decoding Error vs Noise
spec2 = PlotSpec(
    data={'x': noise_df['noise'], 'y': noise_df['decoding_error']},
    plot_type='line',
    subplot_position=1,
    title='Decoding Error vs Noise',
    color='coral',
    marker='o',
    line_width=2,
    kwargs={'x_label': 'Noise Level', 'y_label': 'Mean Error (m)'}
)
plot_specs.append(spec2)

# Shape Distance vs Noise
spec3 = PlotSpec(
    data={'x': noise_df['noise'], 'y': noise_df['shape_dist']},
    plot_type='line',
    subplot_position=2,
    title='Shape Distance vs Noise',
    color='green',
    marker='o',
    line_width=2,
    kwargs={'x_label': 'Noise Level', 'y_label': 'Shape Distance'}
)
plot_specs.append(spec3)

grid = PlotGrid(
    plot_specs=plot_specs,
    config=PlotConfig(figsize=(15, 5)),
    layout=GridLayoutConfig(rows=1, cols=3),
    backend='matplotlib'
)
fig = grid.plot()


## Summary

This comprehensive example demonstrated:

1. **Structure Index Analysis** - Quantified manifold organization across different cell types
2. **Dimensionality Reduction Benchmarking** - Compared 7 DR methods on synthetic data
3. **Decoding Validation** - Tested population vector and k-NN decoders with ground truth
4. **Cell Type Classification** - Applied supervised and unsupervised methods to mixed populations
5. **Noise Impact Study** - Analyzed how noise affects embedding quality and decoding performance

All methods are integrated and ready for use with your neural data!
