In [None]:
from numpy import array, log, empty
from numba import jit, float64

@jit(float64[:, :](float64[:, :], float64[:, :]), nopython=True, fastmath=True)
def continuous_jaccard(x, w):
    res = empty((x.shape[0], w.shape[0]), dtype=x.dtype)
    for i in range(x.shape[0]):
        for j in range(w.shape[0]):
            num = 0.0
            denum = 0.0
            for k in range(x.shape[1]):
                num += x[i, k] * w[j, k]
                denum += x[i, k] ** 2 + w[j, k] ** 2
            
            denum -= num
            
            if denum == 0:
                num = 1. - num
                denum = 1.
                
            res[i, j] = 1 - num / denum
            
    return res

XB = array([[0., 0.], [-10.1, -10.1], [-0.1, 0.9], [0.1, 0.3], [0.0, -10.]])
%time continuous_jaccard(XB, XB)

In [None]:
import sys
sys.path.insert(0, '../')

In [None]:
from neural_map import NeuralMap, _plot
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [None]:
df = pd.read_csv('datasets/blob_data_5dims_10clusters.csv')
l_columns = df.columns[0]
col = None

In [None]:
scaler = MinMaxScaler()
data = scaler.fit_transform(df.drop(l_columns, axis=1).values)

data.shape

In [None]:
som = NeuralMap(z=data.shape[1], x=10, y=10, metric=continuous_jaccard)

In [None]:
som.train(data=data, radius_decay_function='exponential', verbosity=True, eval_data=data, num_epochs=30)

In [None]:
som.plot_analysis(data, min_cluster_size=5, show_empty_nodes=False)

In [None]:
som.plot_u_matrix(detailed=True, borders=True)

In [None]:
actfreq, q, mean_distance = som.analyse(data)
umatrix, distance_matrix = som.unified_distance_matrix()
labels, probabilities, outlier_socer = som.hdbscan(min_cluster_size=5)

_plot.tiles(som.cart_coord, som.hexagonal, umatrix[..., -1], title='distancia')
_plot.tiles(som.cart_coord, som.hexagonal, actfreq, title='frecuencia de actvación')
_plot.tiles(som.cart_coord, som.hexagonal, q, title='error de cuantización')
_plot.tiles(som.cart_coord, som.hexagonal, mean_distance, title='distancia media')
_plot.tiles(som.cart_coord, som.hexagonal, probabilities, title = 'probabilidad de prtenencia')
_plot.tiles(som.cart_coord, som.hexagonal, outlier_socer, title = 'outiler score')

In [None]:
clusters = 10

labels, centers = som.k_means(clusters)
_plot.tiles(som.cart_coord, som.hexagonal, labels, norm=False, title='Clustering con K-means. Valor de K = ' + str(clusters), labels=list(range(clusters)))

labels, centers = som.k_medoids(clusters)
_plot.tiles(som.cart_coord, som.hexagonal, labels, norm=False, title='Clustering con K-medoids. Valor de K = ' + str(clusters), labels=list(range(clusters)))


In [None]:
som.plot_set_weight_vectors(cluster=0)
som.plot_node_weight_vector(node_index=(0, 0))