In [None]:
from numpy import array, log, empty
from numba import jit, float64

@jit(float64[:, :](float64[:, :], float64[:, :]), nopython=True, fastmath=True)
def continuous_jaccard(x, w):
    res = empty((x.shape[0], w.shape[0]), dtype=x.dtype)
    for i in range(x.shape[0]):
        for j in range(w.shape[0]):
            num = 0.0
            denum = 0.0
            for k in range(x.shape[1]):
                num += x[i, k] * w[j, k]
                denum += x[i, k] ** 2 + w[j, k] ** 2
            
            denum -= num
            
            if denum == 0:
                num = 1. - num
                denum = 1.
                
            res[i, j] = 1 - num / denum
            
    return res

XB = array([[0., 0.], [-10.1, -10.1], [-0.1, 0.9], [0.1, 0.3], [0.0, -10.]])
%time continuous_jaccard(XB, XB)

In [None]:
# import sys
# sys.path.insert(0, '../')

In [None]:
from neural_map import NeuralMap, _plot
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [None]:
df = pd.read_csv('datasets/blob_data_5dims_10clusters.csv')
l_columns = df.columns[0]
col = None

In [None]:
scaler = MinMaxScaler()
data = scaler.fit_transform(df.drop(l_columns, axis=1).values)

data.shape

In [None]:
som = NeuralMap(variables=data.shape[1], metric=continuous_jaccard, columns=20, rows=20, toroidal=True)

In [None]:
som.train(data=data, n_epochs=40, eval_data=data, radius_decay_function='exponential')

In [None]:
rot = 0.5

plt.figure(figsize=(9, 9 * 0.866))
plt.scatter(
    (som.relative_positions[..., 0] + som.width * rot) % som.width,
    (som.relative_positions[..., 1] + som.height * rot) % som.height
)
plt.show()

In [None]:
som.plot_analysis(data, min_cluster_size=3, display_empty_nodes=True)

In [None]:
som.plot_unified_distance_matrix()
# NeuralMap(variables=2, metric='euclidean', columns=som.columns, rows=som.rows, weights=som.relative_positions).plot_unified_distance_matrix(size=10, detailed=False, borders=False)

In [None]:
actfreq, q, mean_distance = som.analyse(data)
umatrix, distance_matrix = som.get_unified_distance_matrix()
labels, probabilities, outlier_socer = som.hdbscan(min_cluster_size=5)

_plot.tiles(som.positions, som.hexagonal, umatrix[..., -1], title='distancia')
_plot.tiles(som.positions, som.hexagonal, actfreq, title='frecuencia de actvación')
_plot.tiles(som.positions, som.hexagonal, q, title='error de cuantización')
_plot.tiles(som.positions, som.hexagonal, mean_distance, title='distancia media')
_plot.tiles(som.positions, som.hexagonal, probabilities, title = 'probabilidad de prtenencia')
_plot.tiles(som.positions, som.hexagonal, outlier_socer, title = 'outiler score')

In [None]:
clusters = 10

labels, centers = som.k_means(clusters)
_plot.tiles(som.positions, som.hexagonal, labels, norm=False, title='Clustering con K-means. Valor de K = ' + str(clusters), labels=list(range(clusters)))

# labels, centers = som.k_medoids(clusters)
# _plot.tiles(som.positions, som.hexagonal, labels, norm=False, title='Clustering con K-medoids. Valor de K = ' + str(clusters), labels=list(range(clusters)))


In [None]:
som.plot_cluster_weights_vectors(cluster=0)
som.plot_weights_vector(node_index=(0, 0))

In [None]:
# from BootstrapCCpy import BootstrapCCpy
# from sklearn.cluster import KMeans
# from scipy.cluster.hierarchy import linkage, fcluster, dendrogram

# K_CLUSTERS = 15
# B_SAMPLINGS = som.columns * som.rows

# bcc = BootstrapCCpy(cluster=KMeans().__class__, K=K_CLUSTERS, B=B_SAMPLINGS)
# bcc.fit(som.weights.reshape((-1, som.variables)), verbose=True)
# bcc.plot_consensus_heatmap()

# for clusters in range(2, K_CLUSTERS + 1):
#     labels = (fcluster(linkage(bcc.Mk[clusters - 2], 'single'), clusters, criterion='maxclust') - 1).reshape((som.columns, som.rows))
# #     dendrogram(linkage(bcc.Mk[clusters - 2], 'single'))
#     _plot.tiles(som.positions, som.hexagonal, labels, norm=False, title='BootstrapCC with K = '+ str(clusters), labels=list(range(labels.max() + 1)), intensity=(labels >= 0.) * 1., color_map=plt.cm.get_cmap('hsv', labels.max()), size=4)
#     plt.show()

# bcc.get_best_k()