In [None]:
import sys
sys.path.insert(0, '../')

In [None]:
from neural_map import NeuralMap, _plot
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [None]:
df = pd.read_csv("datasets/ANCESTRY_LACRN.tsv", sep="\t")
l_columns = ['IndID', 'SentrixID', 'Sex', 'Source', 'Region', 'Population', 'Ancestry']
col = 'Ancestry'
data = df.drop(columns=l_columns).values
data.shape

In [None]:
som = NeuralMap(variables=data.shape[1], columns=10, rows=10, metric='correlation')

In [None]:
som.train(data=data, verbose=True, eval_data=data, n_epochs=30) # , radius_decay_function='exponential'

In [None]:
som.plot_analysis(data, min_cluster_size=10, display_empty_nodes=True)

In [None]:
labels = df[col].values
types = df[col].unique()

for i in range(3):
    som.plot_labels(data, min_cluster_size=3, display_empty_nodes=False, labels=labels, labels_to_display=types[i * 3 : (i + 1) * 3])

In [None]:
som.plot_weights(headers=df.drop(columns=l_columns).columns, size=5)

In [None]:
for i in range(som.variables):
    som.plot_map_value(data, min_cluster_size=3, display_empty_nodes=False, attached_values=data[:, i], size=5)

In [None]:
labels = df.drop(columns=l_columns).columns

_plot.bubbles(diameters=som.weights.sum(axis=-1), positions=som.relative_positions, data=som.weights, borders=True, color_map=plt.cm.get_cmap('Accent', len(labels)), labels=labels)

In [None]:
som.plot_unified_distance_matrix(detailed=True, borders=True)

In [None]:
clusters = 4

labels, centers = som.k_means(clusters)
_plot.tiles(som.positions, som.hexagonal, labels, norm=False, title='Clustering con K-means. Valor de K = ' + str(clusters), labels=list(range(clusters)))

labels, centers = som.k_medoids(clusters)
_plot.tiles(som.positions, som.hexagonal, labels, norm=False, title='Clustering con K-medoids. Valor de K = ' + str(clusters), labels=list(range(clusters)))

In [None]:
som.plot_cluster_weights_vectors(cluster=0)
som.plot_cluster_weights_vectors(cluster=1)

In [None]:
from BootstrapCCpy import BootstrapCCpy
from sklearn.cluster import KMeans
from scipy.cluster.hierarchy import linkage, fcluster, dendrogram

K_CLUSTERS = 10
B_SAMPLINGS = som.columns * som.rows

bcc = BootstrapCCpy(cluster=KMeans().__class__, K=K_CLUSTERS, B=B_SAMPLINGS)
bcc.fit(som.weights.reshape((-1, som.variables)), verbose=True)
bcc.plot_consensus_heatmap()
bcc.get_best_k()

for clusters in range(2, K_CLUSTERS + 1):
    labels = (fcluster(linkage(bcc.Mk[clusters - 2], 'single'), clusters, criterion='maxclust') - 1).reshape((som.columns, som.rows))
#     dendrogram(linkage(bcc.Mk[clusters - 2], 'single'))
    _plot.tiles(som.positions, som.hexagonal, labels, norm=False, title='BootstrapCC with K = '+ str(clusters), labels=list(range(labels.max() + 1)), intensity=(labels >= 0.) * 1., size=4)
    plt.show()