-
Notifications
You must be signed in to change notification settings - Fork 1
/
umap_commands.py
54 lines (42 loc) · 1.77 KB
/
umap_commands.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import scikit
from sklearn.datasets import fetch_mldata
from sklearn.decomposition import PCA
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# Dimension reduction and clustering libraries
import umap
import hdbscan
import sklearn.cluster as cluster
from sklearn.metrics import adjusted_rand_score, adjusted_mutual_info_score
sns.set(style='white', rc={'figure.figsize':(10,8)})
mnist = fetch_mldata('MNIST Original')
standard_embedding = umap.UMAP(random_state=42).fit_transform(mnist.data)
plt.scatter(standard_embedding[:, 0], standard_embedding[:, 1], c=mnist.target, s=0.1, cmap='Spectral');
kmeans_labels = cluster.KMeans(n_clusters=10).fit_predict(mnist.data)
plt.scatter(standard_embedding[:, 0], standard_embedding[:, 1], c=kmeans_labels, s=0.1, cmap='Spectral');
(
adjusted_rand_score(mnist.target, kmeans_labels),
adjusted_mutual_info_score(mnist.target, kmeans_labels)
)
(0.36675295135972552, 0.49614118437750965)
lowd_mnist = PCA(n_components=50).fit_transform(mnist.data)
hdbscan_labels = hdbscan.HDBSCAN(min_samples=10, min_cluster_size=500).fit_predict(lowd_mnist)
clustered = (hdbscan_labels >= 0)
plt.scatter(standard_embedding[~clustered, 0],
standard_embedding[~clustered, 1],
c=(0.5, 0.5, 0.5),
s=0.1,
alpha=0.5)
plt.scatter(standard_embedding[clustered, 0],
standard_embedding[clustered, 1],
c=hdbscan_labels[clustered],
s=0.1,
cmap='Spectral');
adjusted_rand_score(mnist.target, labels), adjusted_mutual_info_score(mnist.target, labels)
clustered = (labels >= 0)
(
adjusted_rand_score(mnist.target[clustered], labels[clustered]),
adjusted_mutual_info_score(mnist.target[clustered], labels[clustered])
)
np.sum(clustered) / mnist.data.shape[0]