In [None]:
import numpy as np
import matplotlib.pyplot as plt
import sklearn_tda as sktda
import statmapper as stm
import networkx as nx

from sklearn.metrics import pairwise_distances
from sklearn.cluster import AgglomerativeClustering
from mpl_toolkits.mplot3d import Axes3D
from stochmapper import AgglomerativeCover, VoronoiCover, StochasticMapperComplex
from stochmapper import EntropyRegularizedWasserstein, KullbackLeiblerDivergence, EuclideanDistance

# Annulus

## Dataset

### Underlying manifold

In [None]:
num_pts = 5000
radius  = 1.

In [None]:
theta   = np.random.uniform(low=0., high=2*np.pi, size=num_pts)
xs, ys  = radius * np.cos(theta), radius * np.sin(theta)
noise_x = np.random.normal(loc=xs, scale=.1, size=num_pts)
noise_y = np.random.normal(loc=ys, scale=.1, size=num_pts)
X       = np.hstack([np.reshape(xs+noise_x, [-1,1]), np.reshape(ys+noise_y, [-1,1])])

### Probability distributions

Now, assign a probability distribution for each point.

In [None]:
distributions = []

#### Ideal case

In [None]:
num_samples = 1000

Gaussian distribution.

In [None]:
for i in range(num_pts):
    distributions.append(np.random.normal(loc=X[i,0], scale=10, size=num_samples))

Bimodal distribution.

In [None]:
for i in range(num_pts):
    distrib = []
    for s in range(num_samples):
        idx = np.random.choice(2, 1)
        if idx == 0:
            distrib.append(np.random.normal(loc=X[i,0], scale=10, size=1))
        else:
            distrib.append(np.random.normal(loc=-X[i,0], scale=10, size=1))
    distributions.append(distrib)

#### Filter values of nearest neighbors

In [None]:
threshold = 1.

In [None]:
real = []
for i in range(num_pts):
    real.append(np.random.normal(loc=X[i,0], scale=10, size=1))

In [None]:
real = []
for i in range(num_pts):
    idx = np.random.choice(2, 1)
    if idx == 0:
        real.append(np.random.normal(loc=X[i,0], scale=10, size=1))
    else:
        real.append(np.random.normal(loc=-X[i,0], scale=10, size=1))

In [None]:
pdist = pairwise_distances(X)

In [None]:
for i in range(num_pts):
    distrib = np.squeeze(np.argwhere(pdist[i,:]<=threshold))
    np.random.shuffle(distrib)
    distributions.append([real[n] for n in distrib])

#### Distances between distributions

In [None]:
m, M = np.min([np.min(d) for d in distributions]), np.max([np.max(d) for d in distributions])

In [None]:
erw = EntropyRegularizedWasserstein(epsilon=1, num_bins=10, bnds=(m, M))
wass = erw.compute_matrix(distributions)

In [None]:
kld = KullbackLeiblerDivergence(num_bins=1000, bnds=(m, M))
kldv = kld.compute_matrix(distributions)

In [None]:
eud = EuclideanDistance(num_bins=1000, bnds=(m, M))
eucd = eud.compute_matrix(distributions)

### Visualization

Visualize a given realization.

In [None]:
z = [distributions[i][0] for i in range(num_pts)]

In [None]:
z = [np.mean(distrib) for distrib in distributions]

In [None]:
%matplotlib notebook
fig = plt.figure()
ax  = fig.add_subplot(111, projection="3d")
ax.scatter(X[:,0], X[:,1], z, s=1.)
plt.show()

## Underlying Mapper

In [None]:
flt = np.reshape(X[:,0], [-1,1])

In [None]:
mapper = sktda.MapperComplex(
    filters=flt, filter_bnds=np.array([[np.nan, np.nan]]),
    resolutions=np.array([5]), gains=np.array([.3]), colors=flt,
    clustering=AgglomerativeClustering(n_clusters=None, linkage="single", distance_threshold=1.)
                            ).fit(X)

In [None]:
G = stm.mapper2networkx(mapper)
nx.draw_networkx(G, with_labels=False,
                 node_color=[mapper.node_info_[name]["colors"][0] for name in G.nodes()],
                 node_size=[len(mapper.node_info_[name]["indices"]) for name in G.nodes()])

## Single realization Mapper

In [None]:
flt = np.reshape(np.array([distributions[i][10] for i in range(num_pts)]), [-1,1])

In [None]:
mapper = sktda.MapperComplex(
    filters=flt, filter_bnds=np.array([[np.nan, np.nan]]),
    resolutions=np.array([5]), gains=np.array([.3]), colors=flt,
    clustering=AgglomerativeClustering(n_clusters=None, linkage="single", distance_threshold=1.)
                            ).fit(X)

In [None]:
G = stm.mapper2networkx(mapper)
nx.draw_networkx(G, with_labels=False,
                 node_color=[mapper.node_info_[name]["colors"][0] for name in G.nodes()],
                 node_size=[len(mapper.node_info_[name]["indices"]) for name in G.nodes()])

## Mean stochastic Mapper 

In [None]:
flt = np.reshape([np.mean(distrib) for distrib in distributions], [-1,1])

In [None]:
mapper = sktda.MapperComplex(
    filters=flt, filter_bnds=np.array([[np.nan, np.nan]]),
    resolutions=np.array([5]), gains=np.array([.3]), colors=flt,
    clustering=AgglomerativeClustering(n_clusters=None, linkage="single", distance_threshold=1.)
                            ).fit(X)

In [None]:
G = stm.mapper2networkx(mapper)
nx.draw_networkx(G, with_labels=False,
                 node_color=[mapper.node_info_[name]["colors"][0] for name in G.nodes()],
                 node_size=[len(mapper.node_info_[name]["indices"]) for name in G.nodes()])

## Exact stochastic Mapper

In [None]:
mapper = StochasticMapperComplex(
    distributions=distributions, distance=eucd,
    cover=VoronoiCover(n_patches=10, threshold=.02), colors=np.reshape(X[:,0], [-1,1]),
    clustering=AgglomerativeClustering(n_clusters=None, linkage="single", distance_threshold=1.)
                            ).fit(X)

In [None]:
G = stm.mapper2networkx(mapper)
nx.draw_networkx(G, with_labels=False,
                 node_color=[mapper.node_info_[name]["colors"][0] for name in G.nodes()],
                 node_size=[len(mapper.node_info_[name]["indices"]) for name in G.nodes()])

# Application