# Manifold System v1 - Interactive Notebook

This notebook provides an interactive interface for exploring manifold projections.

## Quick Start

1. Run all cells
2. Load your data
3. Project and visualize
4. Explore interactively

In [None]:
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

from manifold_system import ManifoldSystem
from manifold_learning import ManifoldLearner, TrajectoryAnalyzer
from manifold_viz import ManifoldVisualizer

# Try to import interactive plotly
try:
    from interactive_plotly import InteractivePlotter
    PLOTLY_AVAILABLE = True
    print("✓ Interactive Plotly available")
except ImportError:
    PLOTLY_AVAILABLE = False
    print("⚠ Plotly not available. Install with: pip install plotly")

# Configure matplotlib
%matplotlib inline
plt.style.use('seaborn-v0_8-darkgrid')

print("✓ All imports successful")

## 1. Initialize System

In [None]:
# Initialize components
system = ManifoldSystem()
learner = ManifoldLearner()
viz = ManifoldVisualizer()

if PLOTLY_AVAILABLE:
    plotter = InteractivePlotter()

print("✓ System initialized")
print(f"  Available methods: {learner.check_dependencies()}")

## 2. Load Data

Choose one of the options below:

### Option A: Load Your CSV File

In [None]:
# Load your data
filepath = 'your_data.csv'  # Change this!
df = pd.read_csv(filepath)

print(f"Loaded: {df.shape[0]} rows, {df.shape[1]} columns")
print(f"\nColumns: {list(df.columns)}")
df.head()

### Option B: Generate Synthetic Data

In [None]:
# Generate synthetic data
from example_usage import generate_synthetic_data

df = generate_synthetic_data(
    n_entities=300,
    n_features=40,
    n_timepoints=1,  # Change to >1 for temporal data
    add_clusters=True
)

print(f"Generated: {df.shape[0]} rows, {df.shape[1]} columns")
df.head()

## 3. Configure and Ingest

In [None]:
# Configure these for your data
ENTITY_ID_COL = 'entity_id'  # Column with entity IDs
TIMESTAMP_COL = 'timestamp'  # Column with timestamps (optional, can be None)

# Select features (modify as needed)
FEATURE_COLS = [col for col in df.columns if col.startswith('feature_')]
# Or manually: FEATURE_COLS = ['feature1', 'feature2', ...]

# Optional: metadata columns (not used in projection, but kept for reference)
METADATA_COLS = ['cluster'] if 'cluster' in df.columns else []

# Optional: label column for coloring visualizations
LABEL_COL = 'cluster' if 'cluster' in df.columns else None

print(f"Configuration:")
print(f"  Entity ID: {ENTITY_ID_COL}")
print(f"  Timestamp: {TIMESTAMP_COL}")
print(f"  Features: {len(FEATURE_COLS)} selected")
print(f"  First 5 features: {FEATURE_COLS[:5]}")
if LABEL_COL:
    print(f"  Label column: {LABEL_COL}")

In [None]:
# Ingest data
system.ingest_dataframe(
    df,
    entity_id_col=ENTITY_ID_COL,
    timestamp_col=TIMESTAMP_COL,
    feature_cols=FEATURE_COLS,
    metadata_cols=METADATA_COLS,
    preprocess=True
)

print("✓ Data ingested")
print(f"  Summary: {system.summary()}")

## 4. Prepare Feature Matrix

In [None]:
# Get feature matrix
# Choose preprocessing: standardize=True (z-score) or normalize=True (0-1 scaling)
X, entity_ids, feature_names = system.get_feature_matrix(standardize=True)

print(f"Feature matrix: {X.shape}")
print(f"  Entities: {len(entity_ids)}")
print(f"  Features: {len(feature_names)}")

# Get labels if available
labels = None
if LABEL_COL:
    labels = df.groupby(ENTITY_ID_COL)[LABEL_COL].first().values
    print(f"  Labels: {len(set(labels))} unique values")

## 5. Run Manifold Projections

Run the cells below to create different projections.

### PCA (Fast, Linear)

In [None]:
%%time
pca_proj = learner.project_pca(X, n_components=2, entity_ids=entity_ids)
print(f"✓ PCA complete")
print(f"  Variance explained: {pca_proj.metadata['total_explained_variance']:.2%}")

### t-SNE (Good for visualization, slower)

In [None]:
%%time
# Adjust perplexity based on dataset size (5-50 typically)
tsne_proj = learner.project_tsne(
    X, 
    n_components=2, 
    perplexity=30,
    entity_ids=entity_ids
)
print("✓ t-SNE complete")

### UMAP (Best balance of speed and quality)

In [None]:
%%time
# Check if UMAP available
if learner.check_dependencies()['umap']:
    umap_proj = learner.project_umap(
        X,
        n_components=2,
        n_neighbors=15,
        min_dist=0.1,
        entity_ids=entity_ids
    )
    print("✓ UMAP complete")
else:
    print("⚠ UMAP not available. Install with: pip install umap-learn")
    umap_proj = None

### Isomap (Preserves geodesic distances)

In [None]:
%%time
isomap_proj = learner.project_isomap(
    X,
    n_components=2,
    n_neighbors=5,
    entity_ids=entity_ids
)
print("✓ Isomap complete")

## 6. Visualize (Static)

Matplotlib visualizations

In [None]:
# PCA
fig, ax = viz.plot_projection_2d(pca_proj, labels=labels)
plt.show()

In [None]:
# t-SNE
fig, ax = viz.plot_projection_2d(tsne_proj, labels=labels)
plt.show()

In [None]:
# UMAP (if available)
if umap_proj:
    fig, ax = viz.plot_projection_2d(umap_proj, labels=labels)
    plt.show()

### Compare All Methods Side-by-Side

In [None]:
# Collect projections
projections = {
    'PCA': pca_proj,
    't-SNE': tsne_proj,
    'Isomap': isomap_proj,
}
if umap_proj:
    projections['UMAP'] = umap_proj

# Compare
fig, axes = viz.compare_projections(projections, labels=labels, ncols=2)
plt.show()

### Variance Explained (PCA)

In [None]:
fig, axes = viz.plot_variance_explained(pca_proj)
plt.show()

## 7. Interactive Visualizations (Plotly)

Zoomable, hoverable, explorable plots

In [None]:
if PLOTLY_AVAILABLE:
    # PCA Interactive
    fig = plotter.plot_projection_2d(pca_proj, labels=labels)
    fig.show()
else:
    print("Install plotly for interactive plots: pip install plotly")

In [None]:
if PLOTLY_AVAILABLE:
    # t-SNE Interactive
    fig = plotter.plot_projection_2d(tsne_proj, labels=labels)
    fig.show()

In [None]:
if PLOTLY_AVAILABLE and umap_proj:
    # UMAP Interactive
    fig = plotter.plot_projection_2d(umap_proj, labels=labels)
    fig.show()

### Interactive Comparison

In [None]:
if PLOTLY_AVAILABLE:
    fig = plotter.compare_projections_interactive(projections, labels=labels)
    fig.show()

## 8. Analyze Specific Entities

In [None]:
# Find entities in specific regions of the manifold
# For example, entities in upper-right quadrant of PCA projection

embedding = pca_proj.embedding
entity_ids_list = pca_proj.entity_ids

# Filter entities
mask = (embedding[:, 0] > 0) & (embedding[:, 1] > 0)
selected_entities = [entity_ids_list[i] for i in range(len(entity_ids_list)) if mask[i]]

print(f"Found {len(selected_entities)} entities in upper-right quadrant")
print(f"Sample entities: {selected_entities[:10]}")

# Get their original data
selected_data = df[df[ENTITY_ID_COL].isin(selected_entities)]
selected_data.head()

## 9. Export Results

In [None]:
# Export PCA projection
pca_df = pd.DataFrame(
    pca_proj.embedding,
    columns=['component_1', 'component_2']
)
pca_df.insert(0, 'entity_id', pca_proj.entity_ids)

if labels is not None:
    pca_df['label'] = labels

pca_df.to_csv('pca_projection.csv', index=False)
print("✓ Saved pca_projection.csv")
pca_df.head()

In [None]:
# Save interactive plot as HTML
if PLOTLY_AVAILABLE:
    fig = plotter.plot_projection_2d(umap_proj or tsne_proj, labels=labels)
    fig.write_html('interactive_projection.html')
    print("✓ Saved interactive_projection.html")
    print("  Open this file in a web browser to explore interactively")

## 10. Trajectory Analysis (If Temporal Data)

In [None]:
# Only run if you have temporal data (n_timepoints > 1)
if TIMESTAMP_COL and df[TIMESTAMP_COL].nunique() > 1:
    trajectories = system.get_trajectories()
    print(f"Found {len(trajectories)} entity trajectories")
    
    # Analyze a specific trajectory
    sample_entity = list(trajectories.keys())[0]
    traj = trajectories[sample_entity]
    
    print(f"\nAnalyzing entity: {sample_entity}")
    print(f"  Number of snapshots: {len(traj.snapshots)}")
    
    # Get trajectory in manifold space
    # (Note: This requires projecting each snapshot, simplified here)
    analyzer = TrajectoryAnalyzer(pca_proj)
    
else:
    print("No temporal data available for trajectory analysis")
    print("Generate data with n_timepoints > 1 to enable this feature")

## Next Steps

1. **Experiment with parameters**: Try different perplexity, n_neighbors, etc.
2. **Feature selection**: Try different subsets of features
3. **Preprocessing**: Compare standardize vs normalize
4. **Dimensionality**: Try 3D projections
5. **Analysis**: Look for patterns, clusters, outliers
6. **Export**: Save your results for further analysis