In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_swiss_roll, make_s_curve, load_digits, fetch_openml
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE, MDS, Isomap, LocallyLinearEmbedding
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import trustworthiness
from mpl_toolkits.mplot3d import Axes3D
import warnings
warnings.filterwarnings('ignore')

# Set random seed for reproducibility
np.random.seed(42)

print("Libraries imported successfully!")


In [None]:
# Create manifold datasets
def create_manifold_datasets():
    """Create various manifold datasets for testing"""
    
    # 1. Swiss Roll - 2D manifold in 3D space
    X_swiss, color_swiss = make_swiss_roll(n_samples=1000, noise=0.1, random_state=42)
    
    # 2. S-Curve - 2D manifold in 3D space
    X_s_curve, color_s_curve = make_s_curve(n_samples=1000, noise=0.1, random_state=42)
    
    # 3. Create a 2D spiral that we'll embed in higher dimensions
    t = np.linspace(0, 4*np.pi, 1000)
    X_spiral_2d = np.column_stack([
        t * np.cos(t),
        t * np.sin(t)
    ])
    
    # Embed spiral in higher dimensions with noise
    X_spiral_hd = np.column_stack([
        X_spiral_2d,
        np.random.normal(0, 0.1, (1000, 8))  # Add 8 noise dimensions
    ])
    
    # 4. Load digits dataset (high-dimensional real data)
    digits = load_digits()
    X_digits = digits.data
    y_digits = digits.target
    
    # Standardize datasets
    scaler_swiss = StandardScaler()
    X_swiss_scaled = scaler_swiss.fit_transform(X_swiss)
    
    scaler_spiral = StandardScaler()
    X_spiral_scaled = scaler_spiral.fit_transform(X_spiral_hd)
    
    scaler_digits = StandardScaler()
    X_digits_scaled = scaler_digits.fit_transform(X_digits)
    
    return {
        'swiss_roll': (X_swiss_scaled, color_swiss),
        's_curve': (X_s_curve, color_s_curve),
        'spiral': (X_spiral_scaled, t),
        'digits': (X_digits_scaled, y_digits)
    }

# Create datasets
datasets = create_manifold_datasets()

# Visualize the 3D manifolds
fig = plt.figure(figsize=(20, 5))

# Swiss Roll
ax1 = fig.add_subplot(141, projection='3d')
X_swiss, color_swiss = datasets['swiss_roll']
ax1.scatter(X_swiss[:, 0], X_swiss[:, 1], X_swiss[:, 2], c=color_swiss, cmap='viridis')
ax1.set_title('Swiss Roll Manifold')
ax1.set_xlabel('X')
ax1.set_ylabel('Y')
ax1.set_zlabel('Z')

# S-Curve
ax2 = fig.add_subplot(142, projection='3d')
X_s_curve, color_s_curve = datasets['s_curve']
ax2.scatter(X_s_curve[:, 0], X_s_curve[:, 1], X_s_curve[:, 2], c=color_s_curve, cmap='viridis')
ax2.set_title('S-Curve Manifold')
ax2.set_xlabel('X')
ax2.set_ylabel('Y')
ax2.set_zlabel('Z')

# High-dimensional spiral (show first 3 dimensions)
ax3 = fig.add_subplot(143, projection='3d')
X_spiral, t_spiral = datasets['spiral']
ax3.scatter(X_spiral[:, 0], X_spiral[:, 1], X_spiral[:, 2], c=t_spiral, cmap='viridis')
ax3.set_title('Spiral in High Dimensions')
ax3.set_xlabel('X')
ax3.set_ylabel('Y')
ax3.set_zlabel('Z')

# Digits (show some sample images)
ax4 = fig.add_subplot(144)
X_digits, y_digits = datasets['digits']
# Reshape first digit back to 8x8 image
digit_img = X_digits[0].reshape(8, 8)
ax4.imshow(digit_img, cmap='gray')
ax4.set_title(f'Sample Digit: {y_digits[0]}')
ax4.axis('off')

plt.tight_layout()
plt.show()

print("Manifold datasets created and visualized!")
print(f"Swiss Roll: {X_swiss.shape} -> 2D manifold in 3D space")
print(f"S-Curve: {X_s_curve.shape} -> 2D manifold in 3D space")  
print(f"Spiral: {X_spiral.shape} -> 2D manifold in 10D space")
print(f"Digits: {X_digits.shape} -> Manifold in 64D space")


In [None]:
# Compare manifold learning techniques
def compare_manifold_techniques(X, colors, title, n_components=2):
    """Compare different manifold learning techniques"""
    
    # Reduce sample size for computational efficiency
    if X.shape[0] > 1000:
        indices = np.random.choice(X.shape[0], 1000, replace=False)
        X_sample = X[indices]
        colors_sample = colors[indices]
    else:
        X_sample = X
        colors_sample = colors
    
    # Define techniques
    techniques = {
        'PCA': PCA(n_components=n_components),
        't-SNE': TSNE(n_components=n_components, random_state=42, perplexity=30),
        'MDS': MDS(n_components=n_components, random_state=42),
        'Isomap': Isomap(n_components=n_components, n_neighbors=10),
        'LLE': LocallyLinearEmbedding(n_components=n_components, n_neighbors=10, random_state=42)
    }
    
    # Apply techniques and measure performance
    results = {}
    trustworthiness_scores = {}
    
    fig, axes = plt.subplots(1, len(techniques), figsize=(20, 4))
    
    for i, (name, technique) in enumerate(techniques.items()):
        try:
            # Fit and transform
            X_embedded = technique.fit_transform(X_sample)
            
            # Calculate trustworthiness (higher is better)
            trust_score = trustworthiness(X_sample, X_embedded, n_neighbors=10)
            trustworthiness_scores[name] = trust_score
            
            # Plot
            scatter = axes[i].scatter(X_embedded[:, 0], X_embedded[:, 1], 
                                    c=colors_sample, cmap='viridis', alpha=0.7)
            axes[i].set_title(f'{name}\\nTrustworthiness: {trust_score:.3f}')
            axes[i].set_xlabel('Component 1')
            axes[i].set_ylabel('Component 2')
            
            results[name] = X_embedded
            
        except Exception as e:
            axes[i].text(0.5, 0.5, f'{name}\\nFailed: {str(e)[:20]}...', 
                        ha='center', va='center', transform=axes[i].transAxes)
            axes[i].set_title(f'{name}\\nFailed')
            trustworthiness_scores[name] = 0
    
    plt.suptitle(f'Manifold Learning Comparison: {title}', fontsize=16)
    plt.tight_layout()
    plt.show()
    
    return results, trustworthiness_scores

# Test on Swiss Roll
print("=== Swiss Roll Analysis ===")
X_swiss, color_swiss = datasets['swiss_roll']
swiss_results, swiss_trust = compare_manifold_techniques(X_swiss, color_swiss, "Swiss Roll")

# Test on Spiral
print("\\n=== High-Dimensional Spiral Analysis ===")
X_spiral, t_spiral = datasets['spiral']
spiral_results, spiral_trust = compare_manifold_techniques(X_spiral, t_spiral, "High-D Spiral")

# Test on Digits (subset for speed)
print("\\n=== Digits Dataset Analysis ===")
X_digits, y_digits = datasets['digits']
# Use only first 500 samples for speed
X_digits_subset = X_digits[:500]
y_digits_subset = y_digits[:500]
digits_results, digits_trust = compare_manifold_techniques(X_digits_subset, y_digits_subset, "Digits")

# Summary of trustworthiness scores
print("\\n=== Trustworthiness Summary ===")
summary_df = pd.DataFrame({
    'Swiss Roll': swiss_trust,
    'High-D Spiral': spiral_trust,
    'Digits': digits_trust
}).T

print(summary_df)

# Plot trustworthiness comparison
plt.figure(figsize=(12, 6))
summary_df.plot(kind='bar', ax=plt.gca())
plt.title('Trustworthiness Scores by Dataset and Method')
plt.ylabel('Trustworthiness Score')
plt.xlabel('Dataset')
plt.legend(title='Method', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:
class SimpleTSNE:
    """Simplified t-SNE implementation for educational purposes"""
    
    def __init__(self, n_components=2, perplexity=30, learning_rate=200, n_iter=1000):
        self.n_components = n_components
        self.perplexity = perplexity
        self.learning_rate = learning_rate
        self.n_iter = n_iter
        self.costs = []
        
    def _pairwise_distances(self, X):
        """Compute pairwise squared Euclidean distances"""
        sum_X = np.sum(X**2, axis=1)
        D = sum_X[:, np.newaxis] + sum_X[np.newaxis, :] - 2 * np.dot(X, X.T)
        return np.maximum(D, 0)  # Ensure non-negative
    
    def _compute_conditional_probabilities(self, distances, sigmas):
        """Compute conditional probabilities p_{j|i}"""
        n = distances.shape[0]
        P = np.zeros((n, n))
        
        for i in range(n):
            # Compute probabilities for row i
            beta = 1.0 / (2.0 * sigmas[i]**2)
            
            # Exclude self-distance
            Di = distances[i].copy()
            Di[i] = np.inf
            
            # Compute probabilities
            Pi = np.exp(-beta * Di)
            Pi[i] = 0
            Pi = Pi / np.sum(Pi)
            
            P[i] = Pi
            
        return P
    
    def _binary_search_perplexity(self, distances, target_perplexity, tol=1e-5):
        """Binary search to find sigma that gives target perplexity"""
        n = distances.shape[0]
        sigmas = np.ones(n)
        
        for i in range(n):
            # Binary search for sigma_i
            beta_min = -np.inf
            beta_max = np.inf
            beta = 1.0
            
            # Exclude self-distance
            Di = distances[i].copy()
            Di[i] = np.inf
            
            for _ in range(50):  # Max iterations
                # Compute probabilities
                Pi = np.exp(-beta * Di)
                Pi[i] = 0
                sum_Pi = np.sum(Pi)
                
                if sum_Pi == 0:
                    Pi = np.ones_like(Pi) / (n - 1)
                    Pi[i] = 0
                else:
                    Pi = Pi / sum_Pi
                
                # Compute perplexity
                H = -np.sum(Pi * np.log2(Pi + 1e-12))
                perplexity = 2**H
                
                # Check convergence
                perp_diff = perplexity - target_perplexity
                if abs(perp_diff) < tol:
                    break
                
                # Update beta
                if perp_diff > 0:
                    beta_min = beta
                    if beta_max == np.inf:
                        beta = beta * 2
                    else:
                        beta = (beta + beta_max) / 2
                else:
                    beta_max = beta
                    if beta_min == -np.inf:
                        beta = beta / 2
                    else:
                        beta = (beta + beta_min) / 2
            
            sigmas[i] = 1.0 / np.sqrt(2.0 * beta)
        
        return sigmas
    
    def _compute_joint_probabilities(self, X):
        """Compute joint probabilities in high-dimensional space"""
        distances = self._pairwise_distances(X)
        n = X.shape[0]
        
        # Binary search for sigmas
        sigmas = self._binary_search_perplexity(distances, self.perplexity)
        
        # Compute conditional probabilities
        P = self._compute_conditional_probabilities(distances, sigmas)
        
        # Symmetrize to get joint probabilities
        P_joint = (P + P.T) / (2.0 * n)
        P_joint = np.maximum(P_joint, 1e-12)  # Avoid numerical issues
        
        return P_joint
    
    def _compute_low_dim_probabilities(self, Y):
        """Compute probabilities in low-dimensional space using Student's t-distribution"""
        distances = self._pairwise_distances(Y)
        
        # Student's t-distribution with 1 degree of freedom (Cauchy distribution)
        Q = 1.0 / (1.0 + distances)
        
        # Set diagonal to zero
        np.fill_diagonal(Q, 0)
        
        # Normalize
        Q = Q / np.sum(Q)
        Q = np.maximum(Q, 1e-12)  # Avoid numerical issues
        
        return Q
    
    def _compute_gradient(self, P, Q, Y):
        """Compute gradient of KL divergence"""
        n = Y.shape[0]
        
        # Compute forces
        PQ_diff = P - Q
        distances = self._pairwise_distances(Y)
        inv_distances = 1.0 / (1.0 + distances)
        
        # Gradient computation
        gradient = np.zeros_like(Y)
        for i in range(n):
            diff = Y[i] - Y
            gradient[i] = 4 * np.sum(
                (PQ_diff[i] * inv_distances[i])[:, np.newaxis] * diff, axis=0
            )
        
        return gradient
    
    def fit_transform(self, X):
        """Fit t-SNE and return embedded coordinates"""
        n, d = X.shape
        
        # Compute joint probabilities in high-dimensional space
        print("Computing high-dimensional probabilities...")
        P = self._compute_joint_probabilities(X)
        
        # Initialize low-dimensional embedding
        Y = np.random.normal(0, 1e-4, (n, self.n_components))
        
        # Optimization loop
        print("Optimizing embedding...")
        for iteration in range(self.n_iter):
            # Compute low-dimensional probabilities
            Q = self._compute_low_dim_probabilities(Y)
            
            # Compute cost (KL divergence)
            cost = np.sum(P * np.log(P / Q))
            self.costs.append(cost)
            
            # Compute gradient
            gradient = self._compute_gradient(P, Q, Y)
            
            # Update Y using gradient descent
            Y = Y - self.learning_rate * gradient
            
            # Center the embedding
            Y = Y - np.mean(Y, axis=0)
            
            if iteration % 100 == 0:
                print(f"Iteration {iteration}, Cost: {cost:.4f}")
        
        return Y

# Test simple t-SNE implementation
print("=== Testing Simple t-SNE Implementation ===")

# Use a small subset of Swiss Roll for demonstration
X_test = datasets['swiss_roll'][0][:200]  # Use only 200 points for speed
colors_test = datasets['swiss_roll'][1][:200]

# Apply our simple t-SNE
simple_tsne = SimpleTSNE(n_components=2, perplexity=15, learning_rate=100, n_iter=500)
Y_simple = simple_tsne.fit_transform(X_test)

# Compare with sklearn's t-SNE
tsne_sklearn = TSNE(n_components=2, perplexity=15, random_state=42)
Y_sklearn = tsne_sklearn.fit_transform(X_test)

# Plot results
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Original data (first 2 dimensions)
axes[0].scatter(X_test[:, 0], X_test[:, 1], c=colors_test, cmap='viridis')
axes[0].set_title('Original Data (First 2 Dims)')

# Our implementation
axes[1].scatter(Y_simple[:, 0], Y_simple[:, 1], c=colors_test, cmap='viridis')
axes[1].set_title('Our Simple t-SNE')

# Sklearn implementation
axes[2].scatter(Y_sklearn[:, 0], Y_sklearn[:, 1], c=colors_test, cmap='viridis')
axes[2].set_title('Sklearn t-SNE')

plt.tight_layout()
plt.show()

# Plot cost function
plt.figure(figsize=(10, 5))
plt.plot(simple_tsne.costs)
plt.title('t-SNE Cost Function (KL Divergence)')
plt.xlabel('Iteration')
plt.ylabel('Cost')
plt.grid(True, alpha=0.3)
plt.show()

print("Simple t-SNE implementation completed!")
