In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_digits, make_swiss_roll, make_s_curve, fetch_openml
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import trustworthiness
from scipy.sparse import csr_matrix
from scipy.spatial.distance import pdist, squareform
import time
import warnings
warnings.filterwarnings('ignore')

# Try to import UMAP (install if available)
try:
    import umap
    UMAP_AVAILABLE = True
    print("UMAP library available!")
except ImportError:
    UMAP_AVAILABLE = False
    print("UMAP library not available - will use simplified implementation")

# Set style and random seed
plt.style.use('seaborn-v0_8')
np.random.seed(42)

print("Libraries imported successfully!")


In [None]:
class SimplifiedUMAP:
    """Simplified UMAP implementation for educational purposes"""
    
    def __init__(self, n_neighbors=15, min_dist=0.1, n_components=2, 
                 learning_rate=1.0, n_epochs=500, random_state=42):
        self.n_neighbors = n_neighbors
        self.min_dist = min_dist
        self.n_components = n_components
        self.learning_rate = learning_rate
        self.n_epochs = n_epochs
        self.random_state = random_state
        
        # Training history
        self.losses = []
        
    def _compute_membership_strengths(self, distances, sigmas, rhos):
        """Compute fuzzy set membership strengths"""
        # Adjust distances by subtracting rho (distance to nearest neighbor)
        adjusted_distances = distances - rhos[:, np.newaxis]
        adjusted_distances = np.maximum(adjusted_distances, 0)
        
        # Compute membership using exponential decay
        membership = np.exp(-adjusted_distances / sigmas[:, np.newaxis])
        
        return membership
    
    def _smooth_knn_dist(self, distances, k, bandwidth=1.0):
        """Find the optimal sigma for each point to achieve target perplexity"""
        sigmas = np.ones(distances.shape[0])
        rhos = distances[:, 1]  # Distance to nearest neighbor (excluding self)
        
        for i in range(distances.shape[0]):
            # Binary search for sigma
            beta_min = 0.0
            beta_max = np.inf
            beta = 1.0
            
            for _ in range(50):  # Max iterations
                # Compute sum of membership strengths
                adjusted_dist = np.maximum(distances[i] - rhos[i], 0)
                memberships = np.exp(-adjusted_dist / beta)
                sum_membership = np.sum(memberships)
                
                if abs(sum_membership - np.log2(k)) < 1e-5:
                    break
                
                if sum_membership > np.log2(k):
                    beta_max = beta
                    if beta_min == 0:
                        beta = beta / 2
                    else:
                        beta = (beta_min + beta_max) / 2
                else:
                    beta_min = beta
                    if beta_max == np.inf:
                        beta = beta * 2
                    else:
                        beta = (beta_min + beta_max) / 2
            
            sigmas[i] = beta
        
        return sigmas, rhos
    
    def _compute_graph_weights(self, X):
        """Compute the high-dimensional fuzzy simplicial set"""
        n_samples = X.shape[0]
        
        # Find k-nearest neighbors
        nn = NearestNeighbors(n_neighbors=self.n_neighbors + 1, metric='euclidean')
        nn.fit(X)
        distances, indices = nn.kneighbors(X)
        
        # Remove self-distances
        distances = distances[:, 1:]
        indices = indices[:, 1:]
        
        # Compute sigmas and rhos
        sigmas, rhos = self._smooth_knn_dist(distances, self.n_neighbors)
        
        # Compute membership strengths
        membership = self._compute_membership_strengths(distances, sigmas, rhos)
        
        # Build sparse graph
        rows = np.repeat(np.arange(n_samples), self.n_neighbors)
        cols = indices.flatten()
        vals = membership.flatten()
        
        # Create symmetric matrix
        graph = csr_matrix((vals, (rows, cols)), shape=(n_samples, n_samples))
        graph = graph + graph.T
        graph.data = graph.data / 2.0  # Average symmetric values
        
        return graph
    
    def _optimize_embedding(self, graph, n_components):
        """Optimize the low-dimensional embedding"""
        n_samples = graph.shape[0]
        
        # Initialize embedding
        np.random.seed(self.random_state)
        embedding = np.random.uniform(-10, 10, (n_samples, n_components))
        
        # Convert graph to coordinate format for efficiency
        graph_coo = graph.tocoo()
        
        print(f"Optimizing embedding with {len(graph_coo.data)} edges...")
        
        for epoch in range(self.n_epochs):
            total_loss = 0
            
            # Sample edges for this epoch
            n_samples_epoch = min(len(graph_coo.data), n_samples * 5)
            edge_indices = np.random.choice(len(graph_coo.data), n_samples_epoch, replace=False)
            
            for idx in edge_indices:
                i, j = graph_coo.row[idx], graph_coo.col[idx]
                weight = graph_coo.data[idx]
                
                # Compute distance in embedding space
                diff = embedding[i] - embedding[j]
                dist_sq = np.sum(diff ** 2)
                
                # UMAP force computation
                if dist_sq > 0:
                    grad_coeff = -2.0 * weight / (1.0 + dist_sq)
                    total_loss += weight * np.log(1.0 + dist_sq)
                else:
                    grad_coeff = 0
                
                # Apply gradients
                grad = grad_coeff * diff
                embedding[i] += self.learning_rate * grad
                embedding[j] -= self.learning_rate * grad
                
                # Negative sampling (simplified)
                for _ in range(5):  # 5 negative samples per positive
                    k = np.random.randint(n_samples)
                    if k != i and k != j:
                        diff_neg = embedding[i] - embedding[k]
                        dist_sq_neg = np.sum(diff_neg ** 2)
                        
                        if dist_sq_neg > 0:
                            grad_coeff_neg = 2.0 * self.min_dist / ((self.min_dist + dist_sq_neg) * (1.0 + dist_sq_neg))
                            grad_neg = grad_coeff_neg * diff_neg
                            embedding[i] += self.learning_rate * grad_neg
                            embedding[k] -= self.learning_rate * grad_neg
            
            # Store loss and print progress
            avg_loss = total_loss / n_samples_epoch
            self.losses.append(avg_loss)
            
            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Average Loss: {avg_loss:.4f}")
        
        return embedding
    
    def fit_transform(self, X):
        """Fit UMAP and return the embedding"""
        print(f"Fitting Simplified UMAP on {X.shape[0]} samples...")
        
        # Build high-dimensional graph
        print("Building high-dimensional graph...")
        graph = self._compute_graph_weights(X)
        
        # Optimize embedding
        embedding = self._optimize_embedding(graph, self.n_components)
        
        print("UMAP fitting completed!")
        return embedding

# Test simplified UMAP on sample data
def test_simplified_umap():
    """Test simplified UMAP implementation"""
    
    # Create test dataset
    X, color = make_swiss_roll(n_samples=500, noise=0.1, random_state=42)
    
    # Standardize
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    print("=== Testing Simplified UMAP ===")
    
    # Fit simplified UMAP
    simple_umap = SimplifiedUMAP(n_neighbors=15, min_dist=0.1, n_epochs=300)
    Y_simple = simple_umap.fit_transform(X_scaled)
    
    # Compare with sklearn t-SNE for reference
    print("\nFitting t-SNE for comparison...")
    tsne = TSNE(n_components=2, random_state=42, perplexity=30)
    Y_tsne = tsne.fit_transform(X_scaled)
    
    # Plot results
    fig, axes = plt.subplots(1, 3, figsize=(18, 5))
    
    # Original data (first 2 dimensions)
    axes[0].scatter(X_scaled[:, 0], X_scaled[:, 1], c=color, cmap='viridis', alpha=0.7)
    axes[0].set_title('Original Data (First 2 Dims)')
    axes[0].set_xlabel('Dimension 1')
    axes[0].set_ylabel('Dimension 2')
    
    # Simplified UMAP
    axes[1].scatter(Y_simple[:, 0], Y_simple[:, 1], c=color, cmap='viridis', alpha=0.7)
    axes[1].set_title('Simplified UMAP')
    axes[1].set_xlabel('UMAP 1')
    axes[1].set_ylabel('UMAP 2')
    
    # t-SNE
    axes[2].scatter(Y_tsne[:, 0], Y_tsne[:, 1], c=color, cmap='viridis', alpha=0.7)
    axes[2].set_title('t-SNE')
    axes[2].set_xlabel('t-SNE 1')
    axes[2].set_ylabel('t-SNE 2')
    
    plt.tight_layout()
    plt.show()
    
    # Plot loss evolution
    plt.figure(figsize=(10, 5))
    plt.plot(simple_umap.losses)
    plt.title('UMAP Loss Evolution')
    plt.xlabel('Epoch')
    plt.ylabel('Average Loss')
    plt.grid(True, alpha=0.3)
    plt.show()
    
    # Calculate trustworthiness
    trust_umap = trustworthiness(X_scaled, Y_simple, n_neighbors=10)
    trust_tsne = trustworthiness(X_scaled, Y_tsne, n_neighbors=10)
    
    print(f"\nTrustworthiness Comparison:")
    print(f"Simplified UMAP: {trust_umap:.4f}")
    print(f"t-SNE: {trust_tsne:.4f}")
    
    return Y_simple, Y_tsne, simple_umap

# Run test
simple_results = test_simplified_umap()


In [None]:
# Comprehensive comparison of UMAP vs t-SNE
def compare_umap_tsne():
    """Compare UMAP and t-SNE on multiple datasets"""
    
    # Create diverse datasets
    datasets = {}
    
    # 1. Swiss Roll
    X_swiss, color_swiss = make_swiss_roll(n_samples=1000, noise=0.1, random_state=42)
    scaler = StandardScaler()
    datasets['Swiss Roll'] = (scaler.fit_transform(X_swiss), color_swiss)
    
    # 2. S-Curve
    X_s, color_s = make_s_curve(n_samples=1000, noise=0.1, random_state=42)
    scaler = StandardScaler()
    datasets['S-Curve'] = (scaler.fit_transform(X_s), color_s)
    
    # 3. Digits dataset
    digits = load_digits()
    X_digits = digits.data[:800]  # Subset for speed
    y_digits = digits.target[:800]
    scaler = StandardScaler()
    datasets['Digits'] = (scaler.fit_transform(X_digits), y_digits)
    
    # 4. High-dimensional Gaussian clusters
    np.random.seed(42)
    n_clusters = 4
    cluster_size = 200
    dims = 50
    
    X_clusters = []
    y_clusters = []
    for i in range(n_clusters):
        center = np.random.randn(dims) * 3
        cluster = center + np.random.randn(cluster_size, dims) * 0.8
        X_clusters.append(cluster)
        y_clusters.extend([i] * cluster_size)
    
    X_clusters = np.vstack(X_clusters)
    y_clusters = np.array(y_clusters)
    scaler = StandardScaler()
    datasets['High-D Clusters'] = (scaler.fit_transform(X_clusters), y_clusters)
    
    # Comparison metrics
    results = {}
    
    fig, axes = plt.subplots(len(datasets), 3, figsize=(18, 4 * len(datasets)))
    if len(datasets) == 1:
        axes = axes.reshape(1, -1)
    
    for dataset_idx, (name, (X, y)) in enumerate(datasets.items()):
        print(f"\\n=== Processing {name} Dataset ===")
        print(f"Shape: {X.shape}")
        
        # Time the algorithms
        start_time = time.time()
        
        # t-SNE
        print("Fitting t-SNE...")
        tsne_start = time.time()
        tsne = TSNE(n_components=2, random_state=42, perplexity=30, max_iter=300)
        Y_tsne = tsne.fit_transform(X)
        tsne_time = time.time() - tsne_start
        
        # UMAP (use library version if available, otherwise simplified)
        print("Fitting UMAP...")
        umap_start = time.time()
        if UMAP_AVAILABLE:
            umap_model = umap.UMAP(n_neighbors=15, min_dist=0.1, random_state=42, n_epochs=200)
            Y_umap = umap_model.fit_transform(X)
        else:
            # Use our simplified version with reduced epochs for speed
            simple_umap = SimplifiedUMAP(n_neighbors=15, min_dist=0.1, n_epochs=100)
            Y_umap = simple_umap.fit_transform(X)
        umap_time = time.time() - umap_start
        
        total_time = time.time() - start_time
        
        # Calculate trustworthiness
        trust_tsne = trustworthiness(X, Y_tsne, n_neighbors=min(10, X.shape[0]//10))
        trust_umap = trustworthiness(X, Y_umap, n_neighbors=min(10, X.shape[0]//10))
        
        # Store results
        results[name] = {
            'tsne_time': tsne_time,
            'umap_time': umap_time,
            'trust_tsne': trust_tsne,
            'trust_umap': trust_umap,
            'tsne_embedding': Y_tsne,
            'umap_embedding': Y_umap
        }
        
        # Plot original data (if 3D, show first 2 dimensions)
        if X.shape[1] >= 2:
            axes[dataset_idx, 0].scatter(X[:, 0], X[:, 1], c=y, cmap='tab10', alpha=0.7)
        else:
            axes[dataset_idx, 0].scatter(X[:, 0], np.zeros_like(X[:, 0]), c=y, cmap='tab10', alpha=0.7)
        axes[dataset_idx, 0].set_title(f'{name}\\nOriginal Data')
        axes[dataset_idx, 0].set_xlabel('Dim 1')
        axes[dataset_idx, 0].set_ylabel('Dim 2')
        
        # Plot t-SNE
        axes[dataset_idx, 1].scatter(Y_tsne[:, 0], Y_tsne[:, 1], c=y, cmap='tab10', alpha=0.7)
        axes[dataset_idx, 1].set_title(f't-SNE\\nTrust: {trust_tsne:.3f}, Time: {tsne_time:.1f}s')
        axes[dataset_idx, 1].set_xlabel('t-SNE 1')
        axes[dataset_idx, 1].set_ylabel('t-SNE 2')
        
        # Plot UMAP
        axes[dataset_idx, 2].scatter(Y_umap[:, 0], Y_umap[:, 1], c=y, cmap='tab10', alpha=0.7)
        axes[dataset_idx, 2].set_title(f'UMAP\\nTrust: {trust_umap:.3f}, Time: {umap_time:.1f}s')
        axes[dataset_idx, 2].set_xlabel('UMAP 1')
        axes[dataset_idx, 2].set_ylabel('UMAP 2')
        
        print(f"Results - t-SNE: {tsne_time:.1f}s, Trust: {trust_tsne:.3f}")
        print(f"        UMAP: {umap_time:.1f}s, Trust: {trust_umap:.3f}")
    
    plt.tight_layout()
    plt.show()
    
    # Summary comparison
    print("\\n" + "="*60)
    print("SUMMARY COMPARISON")
    print("="*60)
    
    # Create comparison dataframe
    comparison_data = []
    for name, result in results.items():
        comparison_data.append({
            'Dataset': name,
            't-SNE Time (s)': result['tsne_time'],
            'UMAP Time (s)': result['umap_time'],
            't-SNE Trust': result['trust_tsne'],
            'UMAP Trust': result['trust_umap'],
            'Speed Ratio': result['tsne_time'] / result['umap_time']
        })
    
    comparison_df = pd.DataFrame(comparison_data)
    print(comparison_df.round(3))
    
    # Plot summary metrics
    fig, axes = plt.subplots(1, 3, figsize=(18, 5))
    
    # Time comparison
    datasets_names = list(results.keys())
    tsne_times = [results[name]['tsne_time'] for name in datasets_names]
    umap_times = [results[name]['umap_time'] for name in datasets_names]
    
    x = np.arange(len(datasets_names))
    width = 0.35
    
    axes[0].bar(x - width/2, tsne_times, width, label='t-SNE', alpha=0.7)
    axes[0].bar(x + width/2, umap_times, width, label='UMAP', alpha=0.7)
    axes[0].set_xlabel('Dataset')
    axes[0].set_ylabel('Time (seconds)')
    axes[0].set_title('Computation Time Comparison')
    axes[0].set_xticks(x)
    axes[0].set_xticklabels(datasets_names, rotation=45)
    axes[0].legend()
    axes[0].set_yscale('log')
    
    # Trustworthiness comparison
    tsne_trusts = [results[name]['trust_tsne'] for name in datasets_names]
    umap_trusts = [results[name]['trust_umap'] for name in datasets_names]
    
    axes[1].bar(x - width/2, tsne_trusts, width, label='t-SNE', alpha=0.7)
    axes[1].bar(x + width/2, umap_trusts, width, label='UMAP', alpha=0.7)
    axes[1].set_xlabel('Dataset')
    axes[1].set_ylabel('Trustworthiness')
    axes[1].set_title('Trustworthiness Comparison')
    axes[1].set_xticks(x)
    axes[1].set_xticklabels(datasets_names, rotation=45)
    axes[1].legend()
    
    # Speed ratio
    speed_ratios = [results[name]['tsne_time'] / results[name]['umap_time'] for name in datasets_names]
    bars = axes[2].bar(datasets_names, speed_ratios, alpha=0.7, color='green')
    axes[2].set_xlabel('Dataset')
    axes[2].set_ylabel('Speed Ratio (t-SNE/UMAP)')
    axes[2].set_title('UMAP Speed Advantage')
    axes[2].tick_params(axis='x', rotation=45)
    
    # Add value labels on bars
    for bar, ratio in zip(bars, speed_ratios):
        axes[2].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1,
                    f'{ratio:.1f}x', ha='center', va='bottom')
    
    plt.tight_layout()
    plt.show()
    
    return results, comparison_df

# Run comprehensive comparison
print("=== UMAP vs t-SNE Comprehensive Comparison ===")
comparison_results, comparison_summary = compare_umap_tsne()


In [None]:
# Analyze UMAP parameter effects
def analyze_umap_parameters():
    """Analyze how different UMAP parameters affect the embedding"""
    
    # Use digits dataset for parameter analysis
    digits = load_digits()
    X = digits.data[:600]  # Subset for speed
    y = digits.target[:600]
    
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    print(f"Analyzing UMAP parameters on digits dataset: {X_scaled.shape}")
    
    # Parameter ranges to test
    param_tests = {
        'n_neighbors': [5, 15, 30, 50, 100],
        'min_dist': [0.01, 0.1, 0.3, 0.5, 0.99]
    }
    
    if not UMAP_AVAILABLE:
        print("UMAP library not available - using simplified analysis")
        # Create a simple demonstration with our simplified UMAP
        simple_umap = SimplifiedUMAP(n_neighbors=15, min_dist=0.1, n_epochs=100)
        Y_demo = simple_umap.fit_transform(X_scaled[:200])  # Smaller subset
        
        plt.figure(figsize=(10, 5))
        plt.scatter(Y_demo[:, 0], Y_demo[:, 1], c=y[:200], cmap='tab10', alpha=0.7)
        plt.title('Simplified UMAP on Digits Dataset')
        plt.xlabel('UMAP 1')
        plt.ylabel('UMAP 2')
        plt.colorbar(label='Digit Class')
        plt.show()
        
        return None
    
    # Test n_neighbors effect
    print("\\n=== Testing n_neighbors effect ===")
    fig, axes = plt.subplots(1, len(param_tests['n_neighbors']), figsize=(20, 4))
    
    neighbor_results = {}
    for i, n_neighbors in enumerate(param_tests['n_neighbors']):
        print(f"Testing n_neighbors = {n_neighbors}")
        
        umap_model = umap.UMAP(n_neighbors=n_neighbors, min_dist=0.1, 
                              random_state=42, n_epochs=200)
        Y_umap = umap_model.fit_transform(X_scaled)
        
        trust_score = trustworthiness(X_scaled, Y_umap, n_neighbors=10)
        neighbor_results[n_neighbors] = {
            'embedding': Y_umap,
            'trustworthiness': trust_score
        }
        
        # Plot
        scatter = axes[i].scatter(Y_umap[:, 0], Y_umap[:, 1], c=y, cmap='tab10', alpha=0.7)
        axes[i].set_title(f'n_neighbors = {n_neighbors}\\nTrust: {trust_score:.3f}')
        axes[i].set_xlabel('UMAP 1')
        axes[i].set_ylabel('UMAP 2')
    
    plt.tight_layout()
    plt.show()
    
    # Test min_dist effect
    print("\\n=== Testing min_dist effect ===")
    fig, axes = plt.subplots(1, len(param_tests['min_dist']), figsize=(20, 4))
    
    mindist_results = {}
    for i, min_dist in enumerate(param_tests['min_dist']):
        print(f"Testing min_dist = {min_dist}")
        
        umap_model = umap.UMAP(n_neighbors=15, min_dist=min_dist, 
                              random_state=42, n_epochs=200)
        Y_umap = umap_model.fit_transform(X_scaled)
        
        trust_score = trustworthiness(X_scaled, Y_umap, n_neighbors=10)
        mindist_results[min_dist] = {
            'embedding': Y_umap,
            'trustworthiness': trust_score
        }
        
        # Plot
        scatter = axes[i].scatter(Y_umap[:, 0], Y_umap[:, 1], c=y, cmap='tab10', alpha=0.7)
        axes[i].set_title(f'min_dist = {min_dist}\\nTrust: {trust_score:.3f}')
        axes[i].set_xlabel('UMAP 1')
        axes[i].set_ylabel('UMAP 2')
    
    plt.tight_layout()
    plt.show()
    
    # Parameter effect analysis
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))
    
    # n_neighbors effect on trustworthiness
    neighbors = list(neighbor_results.keys())
    neighbor_trusts = [neighbor_results[n]['trustworthiness'] for n in neighbors]
    
    axes[0].plot(neighbors, neighbor_trusts, 'o-', linewidth=2, markersize=8)
    axes[0].set_xlabel('n_neighbors')
    axes[0].set_ylabel('Trustworthiness')
    axes[0].set_title('Effect of n_neighbors on Trustworthiness')
    axes[0].grid(True, alpha=0.3)
    
    # min_dist effect on trustworthiness
    min_dists = list(mindist_results.keys())
    mindist_trusts = [mindist_results[d]['trustworthiness'] for d in min_dists]
    
    axes[1].plot(min_dists, mindist_trusts, 'o-', linewidth=2, markersize=8, color='red')
    axes[1].set_xlabel('min_dist')
    axes[1].set_ylabel('Trustworthiness')
    axes[1].set_title('Effect of min_dist on Trustworthiness')
    axes[1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Summary analysis
    print("\\n=== Parameter Analysis Summary ===")
    print("\\nn_neighbors effect:")
    for n, result in neighbor_results.items():
        print(f"  {n:3d}: Trustworthiness = {result['trustworthiness']:.4f}")
    
    print("\\nmin_dist effect:")
    for d, result in mindist_results.items():
        print(f"  {d:4.2f}: Trustworthiness = {result['trustworthiness']:.4f}")
    
    print("\\nKey Insights:")
    print("- n_neighbors controls local vs global structure balance")
    print("- Smaller n_neighbors: more local structure, tighter clusters")
    print("- Larger n_neighbors: more global structure, may lose fine details")
    print("- min_dist controls how tightly points are packed")
    print("- Smaller min_dist: tighter clusters")
    print("- Larger min_dist: more spread out, clearer separation")
    
    return neighbor_results, mindist_results

# Run parameter analysis
print("=== UMAP Parameter Analysis ===")
if UMAP_AVAILABLE:
    param_results = analyze_umap_parameters()
else:
    analyze_umap_parameters()  # Run simplified version


In [None]:
# Analyze UMAP parameter effects
def analyze_umap_parameters():
    """Analyze how different UMAP parameters affect the embedding"""
    
    # Use digits dataset for parameter analysis
    digits = load_digits()
    X = digits.data[:600]  # Subset for speed
    y = digits.target[:600]
    
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    print(f"Analyzing UMAP parameters on digits dataset: {X_scaled.shape}")
    
    # Parameter ranges to test
    param_tests = {
        'n_neighbors': [5, 15, 30, 50, 100],
        'min_dist': [0.01, 0.1, 0.3, 0.5, 0.99]
    }
    
    if not UMAP_AVAILABLE:
        print("UMAP library not available - using simplified analysis")
        # Create a simple demonstration with our simplified UMAP
        simple_umap = SimplifiedUMAP(n_neighbors=15, min_dist=0.1, n_epochs=100)
        Y_demo = simple_umap.fit_transform(X_scaled[:200])  # Smaller subset
        
        plt.figure(figsize=(10, 5))
        plt.scatter(Y_demo[:, 0], Y_demo[:, 1], c=y[:200], cmap='tab10', alpha=0.7)
        plt.title('Simplified UMAP on Digits Dataset')
        plt.xlabel('UMAP 1')
        plt.ylabel('UMAP 2')
        plt.colorbar(label='Digit Class')
        plt.show()
        
        return None
    
    # Test n_neighbors effect
    print("\\n=== Testing n_neighbors effect ===")
    fig, axes = plt.subplots(1, len(param_tests['n_neighbors']), figsize=(20, 4))
    
    neighbor_results = {}
    for i, n_neighbors in enumerate(param_tests['n_neighbors']):
        print(f"Testing n_neighbors = {n_neighbors}")
        
        umap_model = umap.UMAP(n_neighbors=n_neighbors, min_dist=0.1, 
                              random_state=42, n_epochs=200)
        Y_umap = umap_model.fit_transform(X_scaled)
        
        trust_score = trustworthiness(X_scaled, Y_umap, n_neighbors=10)
        neighbor_results[n_neighbors] = {
            'embedding': Y_umap,
            'trustworthiness': trust_score
        }
        
        # Plot
        scatter = axes[i].scatter(Y_umap[:, 0], Y_umap[:, 1], c=y, cmap='tab10', alpha=0.7)
        axes[i].set_title(f'n_neighbors = {n_neighbors}\\nTrust: {trust_score:.3f}')
        axes[i].set_xlabel('UMAP 1')
        axes[i].set_ylabel('UMAP 2')
    
    plt.tight_layout()
    plt.show()
    
    # Test min_dist effect
    print("\\n=== Testing min_dist effect ===")
    fig, axes = plt.subplots(1, len(param_tests['min_dist']), figsize=(20, 4))
    
    mindist_results = {}
    for i, min_dist in enumerate(param_tests['min_dist']):
        print(f"Testing min_dist = {min_dist}")
        
        umap_model = umap.UMAP(n_neighbors=15, min_dist=min_dist, 
                              random_state=42, n_epochs=200)
        Y_umap = umap_model.fit_transform(X_scaled)
        
        trust_score = trustworthiness(X_scaled, Y_umap, n_neighbors=10)
        mindist_results[min_dist] = {
            'embedding': Y_umap,
            'trustworthiness': trust_score
        }
        
        # Plot
        scatter = axes[i].scatter(Y_umap[:, 0], Y_umap[:, 1], c=y, cmap='tab10', alpha=0.7)
        axes[i].set_title(f'min_dist = {min_dist}\\nTrust: {trust_score:.3f}')
        axes[i].set_xlabel('UMAP 1')
        axes[i].set_ylabel('UMAP 2')
    
    plt.tight_layout()
    plt.show()
    
    # Parameter effect analysis
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))
    
    # n_neighbors effect on trustworthiness
    neighbors = list(neighbor_results.keys())
    neighbor_trusts = [neighbor_results[n]['trustworthiness'] for n in neighbors]
    
    axes[0].plot(neighbors, neighbor_trusts, 'o-', linewidth=2, markersize=8)
    axes[0].set_xlabel('n_neighbors')
    axes[0].set_ylabel('Trustworthiness')
    axes[0].set_title('Effect of n_neighbors on Trustworthiness')
    axes[0].grid(True, alpha=0.3)
    
    # min_dist effect on trustworthiness
    min_dists = list(mindist_results.keys())
    mindist_trusts = [mindist_results[d]['trustworthiness'] for d in min_dists]
    
    axes[1].plot(min_dists, mindist_trusts, 'o-', linewidth=2, markersize=8, color='red')
    axes[1].set_xlabel('min_dist')
    axes[1].set_ylabel('Trustworthiness')
    axes[1].set_title('Effect of min_dist on Trustworthiness')
    axes[1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Summary analysis
    print("\\n=== Parameter Analysis Summary ===")
    print("\\nn_neighbors effect:")
    for n, result in neighbor_results.items():
        print(f"  {n:3d}: Trustworthiness = {result['trustworthiness']:.4f}")
    
    print("\\nmin_dist effect:")
    for d, result in mindist_results.items():
        print(f"  {d:4.2f}: Trustworthiness = {result['trustworthiness']:.4f}")
    
    print("\\nKey Insights:")
    print("- n_neighbors controls local vs global structure balance")
    print("- Smaller n_neighbors: more local structure, tighter clusters")
    print("- Larger n_neighbors: more global structure, may lose fine details")
    print("- min_dist controls how tightly points are packed")
    print("- Smaller min_dist: tighter clusters")
    print("- Larger min_dist: more spread out, clearer separation")
    
    return neighbor_results, mindist_results

# Run parameter analysis
print("=== UMAP Parameter Analysis ===")
if UMAP_AVAILABLE:
    param_results = analyze_umap_parameters()
else:
    analyze_umap_parameters()  # Run simplified version
