In [98]:
import numpy as np

data = np.random.rand(100, 25)

In [99]:
kNeighbors = 5

In [100]:
from sklearn.neighbors import NearestNeighbors

knn = NearestNeighbors(n_neighbors=kNeighbors+1, algorithm='auto').fit(data)

distances, indices = knn.kneighbors(data, n_neighbors=kNeighbors+1, return_distance=True)

In [101]:
neighborDistances, neighborIndices = distances[:, 1:], indices[:, 1:]
closestNeighborDistance, closestNeighborIndex = neighborDistances[:, 0], neighborIndices[:, 0]

In [102]:
targetSigma = np.log(kNeighbors)

In [103]:
def binarySearchSigma(neighborDistances, closestNeighborDistance, targetSigma, lowerBoundSigma = 1e-5, upperBoundSigma = 1000, tolerance = 1e-5, maxIters=64):
    sigma = np.full((neighborDistances.shape[0], 1), (lowerBoundSigma + upperBoundSigma)/2)

    for idx,(distances,closestDistance) in enumerate(zip(neighborDistances, closestNeighborDistance)):
        lower = lowerBoundSigma
        upper = upperBoundSigma
        iters = 0

        while iters < maxIters:
            result = computeFuzzyProbability(distances, closestDistance, sigma[idx]).sum()

            if np.abs(result - targetSigma) < tolerance:
                break
            elif result < targetSigma:
                upper = sigma[idx]
                sigma[idx] = (lower + sigma[idx])/2
            else:
                lower = sigma[idx]
                sigma[idx] = (sigma[idx] + upper)/2

            iters += 1

    return sigma

def computeFuzzyProbability(distances, closestDistance, sigma):
    return np.exp(-((distances - closestDistance)/sigma))

def computeDirectedFuzzyGraph(neighborDistances, closestNeighborDistance, sigma):
    fuzzyProbabilities = computeFuzzyProbability(neighborDistances, closestNeighborDistance, sigma)
    return fuzzyProbabilities


In [104]:
from scipy.sparse import coo_matrix

def symmetricFuzzyGraph(fuzzyProbabilities, neighborIndices, kNeighbors):
    n_samples = fuzzyProbabilities.shape[0]

    rows = np.repeat(np.arange(n_samples), kNeighbors)
    cols = neighborIndices.flatten()
    data = fuzzyProbabilities.flatten()

    P_ji = coo_matrix((data, (rows, cols)), shape=(n_samples, n_samples))

    P = 1 - (1 - P_ji).multiply(1 - P_ji.T)



In [105]:
import numpy as np

initialPoints = np.random.normal(loc=0, scale=0.0001, size=(data.shape[0], 2))


In [106]:
def lowDimensionEmbeddingProbabilities(points, a=1.929, b=0.7915):
    d = np.linalg.norm(points[:, 0] - points[:, 1], axis=1)
    qij = 1/(1 + a * d**(2*b))
    return qij

In [None]:
def umap_loss(positive_pairs, negative_pairs, embedding_positions, gamma, a=1.929, b=0.7915):
    positiveValues = embedding_positions[positive_pairs]
    negativeValues = embedding_positions[negative_pairs]

    firstTerm = -np.log(lowDimensionEmbeddingProbabilities(positiveValues)).sum()
    secondTerm = -gamma * np.log(lowDimensionEmbeddingProbabilities(negativeValues)).sum()
    return firstTerm + secondTerm