### Define own method that implements Isometric Feature Mapping (ISOMAP)

In [1]:
# Imports
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import MinMaxScaler

from scipy.spatial import distance
from scipy.sparse.csgraph import dijkstra

import numpy as np
from numpy import linalg as LA

import matplotlib.pyplot as plt
from matplotlib import offsetbox

In [2]:
def get_isomap(X, n_neighbors, d, nghbd_def='k'):
    """ Dimensionalty reduction with ISOMAP.

    Keyword arguments:
    X -- Data matrix with M rows (observations) and N columns (features)
    d -- Reduced number of dimensions
    nghbd_def -- Selection of approach, either 'c' for k-Isomap or 'eps' for eps-Isomap (default 'k')
    """

    # Save number of samples and number of features
    n_samples, n_features = X.shape

    # Step 1: Calculate the graph based on the choisen neighborhood definition
    # k-neighborhood
    if nghbd_def=='k':

        # Calculate k-nearest-neighbors graph
        nbrs = NearestNeighbors(n_neighbors=n_neighbors, algorithm='ball_tree').fit(X)
        knn = nbrs.kneighbors_graph(X).toarray()

    # eps-neighborhood 
    elif nghbd_def=='eps':

        # Calculate eps-neighborhood graph (TODO)
        pass

    # Only 'k' or 'eps' are valid inputs
    else:

        print("Only k and eps are supported inputs for parameter nghbd_def") 
        return

    # Step 2: Graph distance computation
    distance_graph = distance.cdist(X,X)
    DG = dijkstra(knn * distance_graph, directed=False)     

    # Replace inf values with high number (TODO: Improve)
    DG[DG==np.inf] = 1000000

    # Step 3: Calculate centering Gram matrix 
    SG = DG ** 2

    # Calculate n-centralizing matrix
    H = np.identity(n_samples) - ((1/n_samples) * np.ones((n_samples,n_samples)))

    # Define kernel of Isomap DR method
    GC = -.5 * (H @ SG @ H)

    # Check if GC is positive semi-definite and if positive adjust GC
    eps = np.finfo(float).eps
    if not np.all(np.linalg.eigvals(GC) > -eps*abs(max(np.linalg.eigvals(GC)))):
        
    
        # Step 3-1: Computation of shifting constant
        FC = -.5 * (H @ DG @ H)

        # Step 3-2: Constant-shift adjustment
        eigenvalues, _ = LA.eig(np.block([[np.zeros((n_samples,n_samples)), 2 * GC], [-np.identity(n_samples), -4 * FC]]))
        t = max(eigenvalues)
        
        GC1 = GC
        GC2 = 2 * t * FC
        GC3 = .5 * t * t * H
        GC = GC1 + GC2 + GC3

        # Check if adjusted GC is positive semi-definite
        if not np.all(np.linalg.eigvals(GC) > -eps*abs(max(np.linalg.eigvals(GC)))):

            print("Constant-shift adjusted kernel is not psd. Process aborted,")
            return

    # Step 4: Eigen decomposition
    eigenvalues, eigenvectors = LA.eig(GC)
    
    # Sort eigenvalues and eigenvectors
    idx = eigenvalues.argsort()[::-1]   
    eigenvalues = eigenvalues[idx]
    eigenvectors = eigenvectors[:,idx]

    # Step 5: Define dimensionality reduction of X
    Yprime = np.sqrt(eigenvalues[0:d]) * eigenvectors[:,0:d]

    return Yprime

In [3]:
def plot_embedding(X, title):
    """ Plot ISOMAP embedding.

    Keyword arguments:
    X -- Data matrix with M rows (observations) and N columns (features)
    title -- Title of plot
    """
    # Plot
    _, ax = plt.subplots()
    X = MinMaxScaler().fit_transform(X)

    for digit in digits.target_names:
        ax.scatter(
            *X[y == digit].T,
            marker=f"${digit}$",
            s=60,
            color=plt.cm.Dark2(digit),
            alpha=0.425,
            zorder=2,
        )
    shown_images = np.array([[1.0, 1.0]])  # just something big
    for i in range(X.shape[0]):
        # plot every digit on the embedding
        # show an annotation box for a group of digits
        dist = np.sum((X[i] - shown_images) ** 2, 1)
        if np.min(dist) < 4e-3:
            # don't show points that are too close
            continue
        shown_images = np.concatenate([shown_images, [X[i]]], axis=0)
        imagebox = offsetbox.AnnotationBbox(
            offsetbox.OffsetImage(digits.images[i], cmap=plt.cm.gray_r), X[i]
        )
        imagebox.set(zorder=1)
        ax.add_artist(imagebox)

    ax.set_title(title)
    ax.axis("off")