# Appendix D - Spectral clustering when data is not a graph

In [None]:
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import make_circles
from sklearn.cluster import KMeans
from sklearn.neighbors import DistanceMetric

## Create data

In [None]:
n_nodes = 150
data, labels = make_circles(n_samples=n_nodes, factor=0.5, noise=0.05)
cluster1 = data[labels == 0]
cluster2 = data[labels == 1]

plt.scatter(cluster1[:,0], cluster1[:,1])
plt.scatter(cluster2[:,0], cluster2[:,1])
plt.savefig(f'../figures/ground-truth.eps')
plt.show()

## K-means clustering

In [None]:
kmeans = KMeans(n_clusters=2)
labels = kmeans.fit(data).labels_

In [None]:
cluster1 = data[labels == 0]
cluster2 = data[labels == 1]

plt.scatter(cluster1[:,0], cluster1[:,1])
plt.scatter(cluster2[:,0], cluster2[:,1])
plt.savefig(f'../figures/kmeans.eps')
plt.show()

## Spectral clustering

In [None]:
# Generate adjacency matrix
dist = DistanceMetric.get_metric('euclidean')
W = (dist.pairwise(data) < 0.2).astype(int)
plt.imshow(W)

# Compute the laplacian
D = np.diag(np.sum(W, axis=0))
laplacian = D - W

# Compute eigenvalues and eigenvectors
evals, evects = np.linalg.eigh(laplacian)

In [None]:
# Plot eigenvalues
plt.bar(np.arange(10), evals[0:10])
plt.xticks(np.arange(0, 10), labels=np.arange(1, 10+1))
plt.xlabel('Eigenvalues')
plt.ylabel('Intensity')
plt.savefig(f'../figures/eigenvals-on-comparison.eps')
plt.show()

In [None]:
kmeans = KMeans(n_clusters=2)
labels = kmeans.fit(evects[:,0:2]).labels_

In [None]:
cluster1 = data[labels == 0]
cluster2 = data[labels == 1]

plt.scatter(cluster1[:,0], cluster1[:,1])
plt.scatter(cluster2[:,0], cluster2[:,1])
plt.savefig(f'../figures/spectral-on-comparison.eps')
plt.show()