In [17]:
#Importing common libraries
import pandas as pd
import numpy as np
import collections
import copy
import random
import matplotlib.pyplot as plt
from datetime import datetime


#Importing text preprocessing methods:
from nlp.preprocessing import (
    clean_text,
    preprocess,
    tokenize,
    preprocess_document,
    tokenize_document,
    get_stopwords, 
    lemmatization_document,
    get_canonical_words)
from textpp_ptbr.preprocessing import TextPreProcessing as tpp
from gensim.parsing.preprocessing import (
    strip_multiple_whitespaces,
    strip_non_alphanum,
    strip_punctuation2,
    strip_short)

#Importing libraries to check spelling:
from item.spellcheckeropt import SpellcheckerOpt
from item.utils import get_tokens_set


#Importing text analysis:
from nlp.utils import (
    plot_histogram,
    get_completetext,
    plot_wordcloud,
    print_statistics,
    groups_frequency_sort)

#Importing text statistics:
from nlp.text_statistics import (
    count_tokens,
    unique_tokens
)

#Importing baseline approaches for clustering:
from nlp.grouping import (
    get_groups,
    get_groups_size,
    get_unigram_groups,
    get_two_tokens_groups,
    get_first_token_groups,
    get_bigram_groups,
    get_first_two_groups,
    groups_frequency_sort
)

#Importing the stucture of the descriptions:
from utils.read_files import (
    get_items)
from item.item_list import (
    ItemList,
    Item
)

#Importing xmeans through pyclustering library:
from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer;
from pyclustering.cluster.xmeans import xmeans

#Importing the HDBSCAN stand-alone method:
import hdbscan
#Importing UMAP dimensionality reduction method:
import umap

#Importing the multiprocessing library:
import multiprocessing

#Importing the libraries to save the final resutls and making it possible to load them:
import nltk
import pickle


#Get the list of words (medicines and nouns) from the list of descriptions
#in a specific group.
def get_list_of_tokens_med_n(group_desc, itemlist, medicines, canonical_form, word_class):
    list_words = list()

    for desc_id in group_desc:
        words = itemlist.items_list[desc_id].get_item_dict()['palavras']
        for p in words:
            if((p not in list_words)): 
                if ((p in medicines) or ((p in word_class) and (word_class[p] == 'N'))):
                    list_words.append(p)
                
    list_words.sort()
    
    return list_words


#Get the list of words (medicines, nouns, verbs, adjectives and numerals) from the list of descriptions
#in a specific group.
def get_list_of_tokens_med_n_v_a_num(group_desc, itemlist, medicines, canonical_form, word_class):
    list_words = list()

    for desc_id in group_desc:
        words = itemlist.items_list[desc_id].get_item_dict()['palavras']
        for p in words:
            if((p not in list_words)): 
                if ((p in medicines) or ((p in word_class) and ((word_class[p] == 'N') or 
                    (word_class[p] == 'A') or (word_class[p] == 'V') or (word_class[p].isnumeric())))):
                    list_words.append(p)
                
    list_words.sort()
    
    return list_words
    
    
#Get the list of words (medicines, nouns, verbs, adjectives and numerals) from the list of descriptions
#in a specific group.
def get_list_of_tokens_med_n_v_a_num_adv(group_desc, itemlist, medicines, canonical_form, word_class):
    list_words = list()

    for desc_id in group_desc:
        words = itemlist.items_list[desc_id].get_item_dict()['palavras']
        for p in words:
            if((p not in list_words)): 
                if ((p in medicines) or ((p in word_class) and ((word_class[p] == 'N') or 
                    (word_class[p] == 'A') or (word_class[p] == 'V') or (word_class[p] == 'DET+Num') or 
                    (word_class[p] == 'ADV') or (word_class[p].isnumeric())))):
                    list_words.append(p)
                
    list_words.sort()
    
    return list_words

    
#Get all list of words in 'palavras' from the list of descriptions from the first token groups.
def get_all_tokens(group_desc, itemlist, medicines, canonical_form, word_class):
    list_words = list()

    for desc_id in group_desc:
        words = itemlist.items_list[desc_id].get_item_dict()['palavras']
        for p in words:
            list_words.append(p)
                
    list_words.sort()
    
    return list_words

#Define a zero matrix based on the size of the number 
#of descriptions in that group (row) and the number of 
#words (only medicines and nouns) from all descriptions
#in that group.
def define_zero_matrix(group_desc, itemlist, medicines, canonical_form, word_class, tokens):
    if(tokens == 0):
        print('BOW composed only by nouns and medicines/medical terms of the descriptions!')        
        list_words = get_list_of_tokens_med_n(group_desc, itemlist, medicines, canonical_form, word_class)
    elif(tokens == 1):
        print('BOW composed by all words of the descriptions!')
        list_words = get_all_list_of_words(group_desc, itemlist, medicines, canonical_form, word_class)
    elif(tokens == 2):
        print('BOW composed only by nouns, adjectives, numerals, verbs and medicines/medical terms of the descriptions!')
        list_words = get_list_of_tokens_med_n_v_a_num(group_desc, itemlist, medicines, canonical_form, word_class)
    elif(tokens == 3):
        print('BOW composed only by nouns, adjectives, numerals, adverbs, verbs and medicines/medical terms of the descriptions!')
        list_words = get_list_of_tokens_med_n_v_a_num_adv(group_desc, itemlist, medicines, canonical_form, word_class)               
    else:
        print('Option not available! Using all words of the descriptions to make the bag...')
        list_words = get_all_tokens(group_desc, itemlist, medicines, canonical_form, word_class)


    rows = len(group_desc)
    columns = len(list_words)
    matrix_bow = np.zeros((rows, columns))
    print('Rows = ' + str(rows))    
    #print('Columns = ' + str(columns))
    return matrix_bow, list_words, rows, columns

# Define the bag-of-words matrix.
def define_description_bow(group_desc, itemlist, medicines, canonical_form, word_class, tokens):
    matrix_list = define_zero_matrix(group_desc, itemlist, medicines, canonical_form, word_class, tokens)
    zeros = matrix_list[0]
    list_words = matrix_list[1]
    rows = matrix_list[2]
    columns = matrix_list[3]
    preproc_descs = np.empty((rows, 1), dtype='object')
    original_descs = np.empty((rows, 1), dtype='object')
    ids_descs = np.empty((rows, 1), dtype='object')
    
    i = 0
    for desc_id in group_desc:
        words = itemlist.items_list[desc_id].get_item_dict()['palavras']        
        desc_original = itemlist.items_list[desc_id].get_item_dict()['original']
        desc_prep = itemlist.items_list[desc_id].get_item_dict()['original_prep']
        desc_prep_rep = str(desc_prep).replace('\'', '').replace('[', '').replace(']', '').replace(',', '')
        
        original_descs[i, 0] = desc_original
        preproc_descs[i, 0 ] = desc_prep_rep
        ids_descs[i, 0 ] = desc_id
        
        for w in words:
            if(w in list_words):
                k = list_words.index(w)
                zeros[i, k]  = 1.0
        i = i + 1       
        
    result_descs = np.concatenate((original_descs, preproc_descs), axis=1)
    result_descs_w_ids = np.concatenate((ids_descs, result_descs), axis=1)
    
    return zeros, rows, columns, result_descs_w_ids

#It applies x-means on the bag of words.
def cluster_by_xmeans(bow, number_of_descriptions):
    cluster_size_limit = round(number_of_descriptions/30)
    xmeans_instance = xmeans(bow, kmax=cluster_size_limit, ccore=False)
    xmeans_instance.process();
    clusters = xmeans_instance.get_clusters();
    
    return clusters

#It just transfors the sklearn output to the pyclustering output
#as they differ in terms of representation.
def transform_sklearn_to_pyclustering(output):
    output_dict = {}
    i = 0
    
    while i < len(output):     
        if(output[i] not in output_dict):
            aux_arr = []
            aux_arr.append(i)
            output_dict[output[i]] = aux_arr
        else:
            aux_arr = output_dict[output[i]]
            aux_arr.append(i)
            output_dict[output[i]] = aux_arr       
        
        i = i + 1
        
    output_arr = []
    negative_key = -1
    
    j = 0
    for key in output_dict:
        if(key == -1):
            #print('found it at ' + str(j))
            negative_key = j

        j = j + 1
        output_arr.append(output_dict[key])
    
    return output_arr, negative_key


#It applies hdbscan on the bag of words.
def cluster_by_hdbscan(bow, employed_metric, groups_ft):
    clusterer = hdbscan.HDBSCAN(min_cluster_size=30, metric=employed_metric, min_samples=1)
    clusters_sklearn = clusterer.fit_predict(bow)

    clusters_pyclustering = transform_sklearn_to_pyclustering(clusters_sklearn)

    return clusters_pyclustering, clusters_sklearn
    

def check_hdbscan_metrics(metric):
    available_metrics = ['braycurtis','canberra','chebyshev','cityblock','dice','euclidean',
    'hamming','haversine','infinity','jaccard','kulsinski','l1','l2','mahalanobis','manhattan',
    'matching','minkowski','p','pyfunc','rogerstanimoto','russellrao','seuclidean','sokalmichener',
    'sokalsneath','wminkowski']

    employed_metric = metric

    #It hecks if the employed metric is available for HDBSCAN;
    #If it is not available, it employs l2
    if(employed_metric not in available_metrics):
        employed_metric = 'l2'

    return employed_metric


#It calls the specific method depending on 'cluster_alg' parameter.
#groups_ft is used when we have outliers, so we can separate these outliers
#considering the groups (from the First Token approach) they actually represent.
def general_clustering(bow, groups_ft, number_of_descriptions, cluster_alg, cluster_alg_metric):
    clusters = None
    employed_metric = check_hdbscan_metrics(cluster_alg_metric)
    
    if(cluster_alg == 'xmeans'):
        print('Clustering by X-Means.')
        clusters = cluster_by_xmeans(bow, number_of_descriptions)
        return clusters, None, None
    #cluster_alg = 1, it employs  HDBSCAN with the Euclidean distance (normalized by l2):
    elif(cluster_alg == 'hdbscan'):
        print('Clustering by HDBSCAN with ' + employed_metric +  ' metric.')
        clusters = cluster_by_hdbscan(bow, employed_metric , groups_ft)
        return clusters[0][0], clusters[0][1], clusters[1]
    #otherwise,  it employs HDBSCAN with an Euclidean distance (normalized by l2):
    else:
        print('Option not available. Employing HDBSCAN clustering algorithm with l2 metric.')
        clusters = cluster_by_hdbscan(bow, 'l2', groups_ft)
        return clusters[0][0], clusters[0][1], clusters[1]




#Translate the generated ids of the clustering approach to actual description ids.
def translate_id_to_descriptions(ids, descriptions_ids):
    arr = []
    
    for i in ids:
        arr.append(descriptions_ids[i])
    return arr

def check_umap_metrics(metric):
    available_metrics = ['euclidean','manhattan','chebyshev','minkowski','canberra','braycurtis',
    'mahalanobis','wminkowski','seuclidean','cosine','correlation','haversine','hamming',
    'jaccard','dice','russelrao','kulsinski','ll_dirichlet','hellinger','rogerstanimoto',
    'sokalmichener','sokalsneath','yule']

    employed_metric = metric

    #It hecks if the employed metric is available for UMap;
    #If it is not available, it employs cosine
    if(employed_metric not in available_metrics):
        employed_metric = 'cosine'

    return employed_metric

#Dimensionality reduction to the bag-of-words.
def dimensionality_reduction(bow, dr_alg, dr_n_comp, dr_metric):
    flag = False
    bow_reduced = None    

    if(dr_alg == 'umap'):
        if(bow[1] > 0 and bow[2] > 0):
            try:
                employed_metric = check_umap_metrics(dr_metric)
                bow_reduced = umap.UMAP(n_components=dr_n_comp, metric=employed_metric, low_memory = True).fit_transform(bow[0])
                print('UMAP employed')                
            except:
                print('#####Exception occurred')
                bow_reduced = bow[0]
                flag = True
    else:
        print('No dimensionality reduction employed. Using the traditional bag of words.')
        bow_reduced = bow[0]
        
    rows, cols = bow_reduced.shape
        
    return bow_reduced, rows, cols
        


#It clusters again the groups generated by the first token approach. For now, this method only accepts X-Means and HDBScan with specific characteristics.
def cluster_on_first_token_groups_bow(first_token_groups, itemlist, it_thread, lower, upper, 
                                      medicines, canonical_form, word_class, tokens,
                                      cluster_alg, cluster_alg_metric, 
                                      dr_alg, dr_n_comp, dr_metric, 
                                      Return_Dict, Return_Repres_Comp):

    print(it_thread)
    #It creates a list of the the keys of these groups:
    groups = list(first_token_groups.keys())
    #It gets the values of each group (i.e., the id of the descriptions into that group):
    group_descriptions = list(first_token_groups.values())
    #It defines the dictionary that will have the clustering with first token
    #together with traditional clustering methods considering a bag-of-words of the descriptions 
    #grouped by the first token approach:
    first_token_plus_bow_traditional_clustering = {}
    #Iterator of the first token groups:
    ft_it = lower
    start_it = lower
    bow_w_dr = None
    df_results = None
    
    while ft_it <= upper:
        print(str(it_thread) + ': ' + str(start_it) + '/' + str(ft_it) + '/' + str(upper))
        #It only considers to cluster again if the number of descritptions of that group has more than 30 descriptions
        if(len(group_descriptions[ft_it]) > 30):
            
            #Bag of words for the group 0:
            bow_raw = define_description_bow(group_descriptions[ft_it], itemlist, medicines, canonical_form, word_class, tokens)
            bow_w_dr = dimensionality_reduction(bow_raw, dr_alg, dr_n_comp, dr_metric)

            #It only applies the traditional clustering methods if the number of rows and columns of the bow are greater than zero:
            if(bow_w_dr[1] > 0 and bow_w_dr[2] > 0):
                #It applies the clusters on the bow of the descriptions:
                clusters_bow_result = general_clustering(bow_w_dr[0], groups[ft_it], len(group_descriptions[ft_it]), cluster_alg, cluster_alg_metric)
                clusters_bow = clusters_bow_result[0]
                negative_index = clusters_bow_result[1]

                if(bow_w_dr[2]==dr_n_comp):
                    first_token = np.full((bow_w_dr[1], 1), groups[ft_it], dtype='object')
                    result_descs = np.concatenate((first_token, bow_raw[3]), axis=1)
                    result_descs_w_dr = np.concatenate((result_descs, bow_w_dr[0]), axis=1)
                    ft_plus_clusters = np.concatenate((first_token, np.c_[clusters_bow_result[2]]), axis=1)
                    ft_plus_clusters_merged = np.c_[["".join(i) for i in ft_plus_clusters[:,0:].astype(str)]]                    
                    result_descs_w_clusters = np.concatenate((result_descs_w_dr, ft_plus_clusters_merged), axis=1)
                    #print(result_descs_w_clusters)
                    if(df_results is None):
                        df_results = result_descs_w_clusters
                    else:
                        df_results = np.concatenate((df_results, result_descs_w_clusters), axis=0)

                it = 0
                for c in clusters_bow:
                    #It translates ids from traditional clustering to actual descriptions (new groups):
                    desc_ids = translate_id_to_descriptions(c, group_descriptions[ft_it])
                    #It defines the key of the map:
                    if(it != negative_index):
                        new_key = groups[ft_it] + '_' + str(it)
                    else:
                        new_key = groups[ft_it] + '_-1'
                        #It sets the maps:
                    first_token_plus_bow_traditional_clustering[new_key] = desc_ids                   
                    it = it + 1     
            else:
                #It returns the first token groups if it is not possible to apply the traditional clustering.
                first_token_plus_bow_traditional_clustering[groups[ft_it]] = group_descriptions[ft_it]
        else:
            #It returns the first token groups if it is not possible to apply the traditional clustering.
            first_token_plus_bow_traditional_clustering[groups[ft_it]] = group_descriptions[ft_it]
        

        ft_it = ft_it + 1
    
    #Returning dictionaries for this process
    Return_Repres_Comp[it_thread] = df_results        
    Return_Dict[it_thread] = first_token_plus_bow_traditional_clustering


#It gets the ranges of the clusters generated by the First Token approach
#This is done in order to the processes work on.
def get_ranges(group_len, n_processes):
    if(n_processes == 1):
        return 0, (group_len - 1)

    total_len = group_len
    num_processes = n_processes
    lower = []
    upper = []
    step = int(total_len/num_processes)

    for k in range(num_processes):
        lower.append(0)
        upper.append(0)

    lower[0] = 0
    upper[0] = step
    i = 1
    j = 0
    while (i < num_processes):    
        upper[i]  = upper[j] + step
        lower[i]  = upper[j] +  1
        if(i%2 != 0):
            upper[i] = upper[i] + 1
        
        i = i + 1
        j = j + 1
    
    #Please, check if the final cluster range ends with 18,034 clusters
    #(i.e., the number of clusters generated by First Token).
    #Depending of the number of processes, you may have to change this "-1"
    #for something else.
    upper[n_processes - 1] = upper[n_processes - 1] - 1 
    return lower, upper

In [10]:
manager_results = multiprocessing.Manager()
Return_Dict = manager_results.dict()

manager_repres = multiprocessing.Manager()
Return_Repres_Comp = manager_repres.dict()   

jobs = []
n_threads = 1

#It loads the medical terms (medicines, drugs, etc):
medicines = get_tokens_set('../dados/palavras/medications.txt')
#It loads the canonical forms and their classes
canonical_form, word_class = get_canonical_words()
print("Read Canonical terms.")
        
#It loads the items from the list:
itemlist = ItemList()
itemlist.load_items_from_file('../dados/items_preprocessed_sp0_sc1.zip', original=True)
                
print('Read data preprocessed')    
#It gets the first tokens of each description and groups
#based on this approach:
first_token_groups = itemlist.get_first_token_groups()
group_len = len(first_token_groups)
first_token_groups_new = {}

#It shuffles the itens based on their keys:
keys_ft = list(first_token_groups.keys())
random.shuffle(keys_ft)
random.shuffle(keys_ft)
    
#It fills another dictionary with the shuffled keys:
for k in keys_ft:
    first_token_groups_new[k] = first_token_groups[k]
    

#It defines the ranges (of the groups) the processes will work on:
thread_ranges = get_ranges(group_len, 10)
print('Read ranges')
print(thread_ranges) 

Read Canonical terms.
Read data preprocessed
Read ranges
([0, 1381, 2762, 4142, 5523, 6903, 8284, 9664, 11045, 12425], [1380, 2761, 4141, 5522, 6902, 8283, 9663, 11044, 12424, 13804])


In [18]:
p = multiprocessing.Process(target=cluster_on_first_token_groups_bow, 
                            args=(first_token_groups_new, itemlist, 0, thread_ranges[0][0], 
                                  thread_ranges[1][0], medicines, canonical_form, word_class, 
                                  0, 'hdbscan', 'l2', 
                                  'umap', 15, 'cosine',
                                  Return_Dict, Return_Repres_Comp))


jobs.append(p)
p.start()
        
#It joins the results
for i in range(n_threads):   
    jobs[i].join()

0
0: 0/0/1380
0: 0/1/1380
0: 0/2/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['panonico' 12167
  'Panotico rapido, Conjunto para coloracao rapida em hematologia. O kit e composto por tres corantes: Rapido 1:agente fixador, Rapido 2: solucao corante'
  'panonico rapido conjunto para coloracao rapido hematologia kit composto tres corante 1 agente fixador 2 solucao corante'
  14.835219383239746 6.561183929443359 -12.502057075500488
  19.035358428955078 6.168514728546143 15.142071723937988
  5.546876430511475 -3.7879538536071777 1.4984290599822998
  -4.567113399505615 16.542892456054688 12.217392921447754
  1.016654133796692 11.11926555633545 2.8754892349243164 'panonico-1']
 ['panonico' 38857 'PANÓTICO - 3X500 ML' 'panonico 3 x 500 ml'
  13.857443809509277 6.243274211883545 -13.020267486572266
  18.501798629760742 6.056699752807617 14.973553657531738
  5.268158912658691 -3.941098213195801 2.226543426

  1.2143986225128174 11.43726634979248 3.2818427085876465 'panonico-1']]
0: 0/3/1380
0: 0/4/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['resultado' 30454 'RESULTADO DE EXAME C/100FLS FRENTE E VERSO 33X24CM'
  ... 4.456676006317139 9.22832202911377 'resultado-1']
 ['resultado' 54192 'RESULTADO DE EXAME DE FEZES' ... 6.941014289855957
  7.334301948547363 'resultado-1']
 ['resultado' 65879 'RESULTADO DE EXAME FESES C/100 FOLHAS' ...
  7.1768293380737305 7.482198715209961 'resultado-1']
 ...
 ['resultado' 1457491
  'RESULTADO DA MAMOGRAFIA, (PROGRAMA NACIONAL DE CONTROLE DO CANCER DO COLO DE ÚTERO E DE MAMA - COR AZUL.,BLOCO COM 100 UNIDADES'
  ... 7.16309928894043 7.148916244506836 'resultado-1']
 ['resultado' 1467742 'RESULTADO DE EXAMES LABORATOR' ...
  6.510304927825928 6.771862030029297 'resultado-1']
 ['resultado' 1479777 'RESULTADO DE EXAMES DE FEZES 01 VIA F8' ...
  6.913076877593994 7.238456

  11.865950584411621 'micro-usb-1']]
0: 0/22/1380
0: 0/23/1380
0: 0/24/1380
0: 0/25/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['cha' 4687 'Chá de Limão-100g. Caixa contendo 15(quinze) saquinhos' ...
  1.0423141717910767 1.6062290668487549 'cha6']
 ['cha' 13326 'CHÁ MATTE EM CAIXA COM 250 GRAMAS' ... 8.885299682617188
  5.892728805541992 'cha5']
 ['cha' 13779
  '(00000572)Chá de erva cidreira, cx contendo 10 saches de 20gr' ...
  0.8056888580322266 1.527966856956482 'cha2']
 ...
 ['cha' 1486147 'CHÁ DE CANELA EMBALAGEM C/20 SACHÊ' ...
  2.7389261722564697 1.7004363536834717 'cha7']
 ['cha' 1489252
  'CHA DE ERVA DOCE - CHA DE ERVA DOCE EM SACHE 1 GRAMA, CAIXA COM 10 UNIDADES'
  ... 6.220381259918213 7.134146213531494 'cha3']
 ['cha' 1489275
  'CHA MATE 100% PURO, EM SACHE DE 1,5 GRAMAS - CHA MATE 100% PURO, EM SACHE DE 1,5 GRAMAS - CAIXA COM 15 UNIDADES'
  ... 2.3784656524658203 1.475308537483215

  4.8940887451171875 'esteira-1']]
0: 0/71/1380
0: 0/72/1380
0: 0/73/1380
0: 0/74/1380
0: 0/75/1380
0: 0/76/1380
0: 0/77/1380
0: 0/78/1380
0: 0/79/1380
0: 0/80/1380
0: 0/81/1380
0: 0/82/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['esomeprazol' 2653 'ESOMEPRASOL 40MG...' ... 2.753852367401123
  6.097195625305176 'esomeprazol0']
 ['esomeprazol' 15233 'ESOMEPRAZOL MAGNESIO 40 MG - CX C/ 28 cpr.' ...
  -1.2988982200622559 8.657147407531738 'esomeprazol1']
 ['esomeprazol' 38712 'ESOMEPRAZOL 20MG C/ 14 COMP.' ...
  3.0308127403259277 6.3055739402771 'esomeprazol0']
 ...
 ['esomeprazol' 1444151 'ESOMEPRAZOL MAGNESIO 20MG - CX C/ 28 Comp.' ...
  -1.3009966611862183 8.89163875579834 'esomeprazol1']
 ['esomeprazol' 1446063 'ESOMEPRASOL 20MG' ... 2.772287607192993
  6.251501083374023 'esomeprazol0']
 ['esomeprazol' 1465945 'ESOMEPRAZOL40MG' ... 2.78525972366333
  6.4178338050842285 'esomeprazol0']]
0: 0/83/

  5.929913520812988 0.4952240586280823 -0.276028573513031 'passar-1']]
0: 0/89/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['olmesartana' 29275
  '0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000990003 OLMESARTANA + ANLODIPINO 40MG + 5MG'
  ... 4.455138683319092 12.631556510925293 'olmesartana0']
 ['olmesartana' 32512
  'Olmesartana medoxomila 40 mg + Hidroclorotiazida 25 mg comp. (Ref. Olmetec)'
  ... -3.476170778274536 1.669655442237854 'olmesartana1']
 ['olmesartana' 77876 'OLMESARTANA + ANLODIPINO 40mg + 10 mg' ...
  4.474453926086426 12.651716232299805 'olmesartana0']
 ...
 ['olmesartana' 1475726 'OLMESARTANA MEDOXOMILA 40MG' ...
  4.229830741882324 12.517619132995605 'olmesartana0']
 ['olmesartana' 1482126
  'OLMESARTANA MEDOXOMILA ASSOCIADA COM HIDROCLOROTIAZIDA 40MG + 25 MG COMPRIMIDO OLMETEC HCT OU SIMILARES'
  ... -3.2357215881347656

  -0.44441285729408264 'agrofilito-1']]
0: 0/118/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['madeirit' 28216 'MADEIRIT NAO COMPENSADO 6 MM' ... 3.579251289367676
  10.994988441467285 'madeirit-1']
 ['madeirit' 70994 'MADEIRIT NAO COMPENSADO 10 MM' ... 3.498610496520996
  11.347664833068848 'madeirit-1']
 ['madeirit' 72272 '002662 MADEIRIT COLA BRANCA 14MM' ...
  4.627657413482666 -15.625581741333008 'madeirit-1']
 ...
 ['madeirit' 1395559 '013391 MADEIRIT FENOL 220 X110 X 14 MM' ...
  4.696328163146973 -15.960235595703125 'madeirit-1']
 ['madeirit' 1405399 '013211 MADEIRIT FENOL 220 X 110 X 10 MM' ...
  4.774349212646484 -16.159421920776367 'madeirit-1']
 ['madeirit' 1478092 'MADEIRIT RESINADO 5MM' ... 4.742077827453613
  -16.204051971435547 'madeirit-1']]
0: 0/119/1380
0: 0/120/1380
0: 0/121/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clusterin

failed. This is likely due to too small an eigengap. Consider
adding some noise or jitter to your data.

Falling back to random initialisation!


UMAP employed
Clustering by HDBSCAN with l2 metric.
[['fta' 16311 'FTA-ABS(IGM)' 'fta abs igm' -1.5392557382583618
  10.657992362976074 7.172056198120117 5.041105270385742
  2.119753837585449 1.672331690788269 2.5515449047088623 8.73355484008789
  4.598102569580078 7.99745512008667 5.8975605964660645
  11.070982933044434 1.7286959886550903 4.4158525466918945
  10.865522384643555 'fta-1']
 ['fta' 78924 'FTA-ABS IgG -  EXAME LABORATORIAL'
  'fta abs igg exame laboratorial' -1.5773173570632935 9.718002319335938
  7.408993244171143 5.292059898376465 1.5570615530014038
  2.105377435684204 3.146930694580078 9.098342895507812 5.487890720367432
  7.339162349700928 5.825115203857422 11.095754623413086
  1.3582719564437866 4.035058498382568 10.9534912109375 'fta-1']
 ['fta' 149066 'FTA - ABS - IGG (SIFILIS)' 'fta abs igg sifilis'
  -1.4994715452194214 9.587437629699707 7.330832004547119
  5.206493854522705 1.8819509744644165 2.146115779876709
  3.4256720542907715 8.582680702209473 5.296918392181

  10.988873481750488 'fta-1']]
0: 0/133/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['banda' 4875
  '0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010450035 BANDA PRE-MOLAR No 24'
  ... 8.725753784179688 4.935906887054443 'banda3']
 ['banda' 14440 "'BANDA MATRIZ, ACO INOX" ... 9.451590538024902
  8.026189804077148 'banda1']
 ['banda' 20244
  '0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010450032 BANDA BU/REPOSICAO 41 - PCT C/ 10'
  ... 8.745811462402344 5.09864616394043 'banda2']
 ...
 ['banda' 1478364 'BANDA MATRIZ DE ACO INOX 0,05 X 7 X 500mm' ...
  9.062122344970703 8.233363151550293 'banda1']
 ['banda' 1484857 'BANDA MATRIZ INOX 7MM' ... 8.978955268859863
  8.112387657165527 'banda1']
 ['banda' 1487813 'BANDA MATRIZ ACO 7MM' ... 9.05603313446045
  8.208866119384766 'banda1']]
0: 0/134/

  5.618170261383057 7.64965295791626 3.812831401824951 'metalico-1']]
0: 0/159/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['infravermelho' 7540 'INFRAVERMELHO DE PEDESTAL'
  'infravermelho pedestal' 9.780804634094238 -5.880545616149902
  8.18947982788086 -1.586172103881836 7.880838871002197 -9.31943416595459
  9.197304725646973 -1.9695608615875244 3.0837221145629883
  3.5518641471862793 2.460380792617798 2.814521551132202
  1.386799693107605 23.348934173583984 -4.794140338897705
  'infravermelho-1']
 ['infravermelho' 12719 'INFRAVERMELHO PEDESTAL BIVOLT'
  'infravermelho pedestal bivolt' 9.578084945678711 -5.812255382537842
  7.834722518920898 -1.4556138515472412 7.754228115081787
  -9.48832893371582 9.453038215637207 -2.013002872467041
  3.210726022720337 3.545138359069824 2.7153282165527344
  2.3268778324127197 1.5864620208740234 23.055278778076172
  -4.589056491851807 'infravermelho-1']
 ['inf

  -4.498820781707764 'infravermelho-1']]
0: 0/160/1380
0: 0/161/1380
0: 0/162/1380
0: 0/163/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['formoterol' 5426
  'FORMOTEROL+BUDESONIDA  12/400 MCG 60 CAPS SEM INALADOR' ...
  6.085145473480225 0.34533557295799255 'formoterol1']
 ['formoterol' 7133 'FORMOTEROL+BUDESONID' ... 1.9568595886230469
  4.030837059020996 'formoterol0']
 ['formoterol' 16383 'FORMOTEROL 6MCG+BUDESONIDA 200' ...
  1.71892511844635 3.6516644954681396 'formoterol0']
 ...
 ['formoterol' 1467748
  'FORMOTEROL FUMARATO DIIDRATADO 12 MCG + BUDESONIDA 400 MCG INALADOR COM 60 DOSES + INALADOR'
  ... 2.311051845550537 5.59241247177124 'formoterol1']
 ['formoterol' 1477977 'FORMETEROL + BUDESONIDA 12/400' ...
  1.7457025051116943 3.9693751335144043 'formoterol0']
 ['formoterol' 1479311
  'FORMOTEROL + BUDESONIDA 12/400 MCG 60 DOSES COM INALADOR ( ESTE ITEM PODERA SER TANTO A CAPSULA QUANTO O

  6.667416572570801 4.6125807762146 'rebolo-1']]
0: 0/176/1380
0: 0/177/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['bendix' 1487 'BENDIX ZEN 1022' ... -9.678337097167969
  1.2475333213806152 'bendix1']
 ['bendix' 1830 'BENDIX PARTIDA PATROL KOMATSU' ... -8.842388153076172
  1.6764802932739258 'bendix1']
 ['bendix' 3521 'Bendix motor partida zen/ kangoo 1.0 2015' ...
  -0.3269517421722412 3.498277187347412 'bendix0']
 ...
 ['bendix' 1464956 'BENDIX PA CARREGADEIRA FR 10' ... -9.042466163635254
  1.0708767175674438 'bendix1']
 ['bendix' 1468612 'BENDIX UNO MILLE FIRE 2008' ... -0.27952373027801514
  5.037013530731201 'bendix2']
 ['bendix' 1469427 'BENDIX PA CARREGADEIRA 924G' ... -8.975621223449707
  1.0839526653289795 'bendix1']]
0: 0/178/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
#####Exception occurred
Clustering by HDBSCAN with l2 metric.
0: 0/179/1380
0: 

  -5.862637519836426 'flanela0']]
0: 0/221/1380
0: 0/222/1380
0: 0/223/1380
0: 0/224/1380
0: 0/225/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['baralho' 5392 '3-baralho comum (cartas) - 11' ... 3.450188636779785
  10.075451850891113 'baralho-1']
 ['baralho' 25297 'Baralho' ... 2.2838785648345947 14.023808479309082
  'baralho-1']
 ['baralho' 45086
  'BARALHO 139 EM MATERIAL DE COMPOSICAO EM PAPEL CHOUCHE  NUMERO DE CARTAS 55  ACABAMENTO VERNIZ ATOXI EM MATERIAL DE COMPOSICAO EM PAPEL CHOUCHE  NUMERO DE CARTAS 55  ACABAMENTO VERNIZ ATOXICO  TAMANHO DO NAIPE GRANDE  TIPO DA EMBALAGEM CAIXINHA UNITARIA C/ CELOFANE E FEICHO P/ ABERTURA  ALTURA 3 1CM LARGURA 18CM COMPRIMENTO 22CM  PESO 100G ME'
  ... 3.265578031539917 10.014938354492188 'baralho-1']
 ...
 ['baralho' 1406174
  'BARALHO DOSD COMPORTAMENTOS: EFEITO BUMERANGUEAssim,instrumentaliza-se aqui o acesso aos computadores desadaptativos,muita veze

  9.631197929382324 5.811685562133789 'colar13']]
0: 0/276/1380
0: 0/277/1380
0: 0/278/1380
0: 0/279/1380
0: 0/280/1380
0: 0/281/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['dente' 1904
  'DENTES ARTIFICIAIS ANTER/SUPER - Dentes artificiais confeccionados em resina acrilica, boca anterior superior com 6 dentes, cor 66'
  ... 0.8477464318275452 3.032728433609009 'dente4']
 ['dente' 6071
  'DENTE ANTERIOR SUPERIOR 3 D- COR 62 - EMBALAGEM COM 6 DENTES - Indicado para uso em protese total ou parcial ou removivel.'
  ... 11.778725624084473 10.483426094055176 'dente0']
 ['dente' 6924 'Dente de estoque anterior superior 66' ...
  -0.3612529933452606 3.8604373931884766 'dente3']
 ...
 ['dente' 1479457 'DENTE  A25 SUPERIOR COR 62' ... 11.428205490112305
  10.49409008026123 'dente0']
 ['dente' 1484788 'DENTES POSTERIORES 32 L SUPERIOR COR 62' ...
  0.4258468747138977 3.5519511699676514 'dente4']
 ['dente' 

  5.333809852600098 2.554849624633789 15.890483856201172 'lentilha-1']]
0: 0/296/1380
0: 0/297/1380
0: 0/298/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['motocicleta' 6750
  'MOTOCICLETA: 125 CILINDRAS (MÍNIMO) CARACTERÍSTICAS MÍNIMAS: - 0 KM, - ANO/MODELO 2015, CAMBIO DE 05 VELOCIDADES, CAPACIDADE DE TANQUE DE COMBUSTÍVEL 10 LITROS (MÍNIMO), PNEU DIANTEIRO MÍNIMO 80/90 ARO MÍNIMO 19?, PNEU TRASEIRO 110/80 ARO MÍNIMO 17?, DEMAIS EQUIPAMENTOS DE SEGURANÇA EXIGIDOS POR LEI'
  ... 6.3744659423828125 5.523589611053467 'motocicleta0']
 ['motocicleta' 8080 'MOTOCICLETA 150 CILINDRADAS.' ...
  6.6266608238220215 10.645438194274902 'motocicleta1']
 ['motocicleta' 15776 'MOTOCICLETA 150 CC, O KM' ... 6.666845798492432
  10.714300155639648 'motocicleta1']
 ...
 ['motocicleta' 1433106 'MOTOCICLETAS YAMAHA VLR HORA TRABALHADA' ...
  6.591785907745361 10.013365745544434 'motocicleta1']
 ['motocicleta' 1436455

  6.919145584106445 -1.970624566078186 -0.09719434380531311 'rufo-1']]
0: 0/302/1380
0: 0/303/1380
0: 0/304/1380
0: 0/305/1380
0: 0/306/1380
0: 0/307/1380
0: 0/308/1380
0: 0/309/1380
0: 0/310/1380
0: 0/311/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['chato' 4625 'FERRO CHATO MACICO P/ SER-' ... 5.617696285247803
  2.668018341064453 'chato1']
 ['chato' 29313 'FERRO CHATO 1" X  3/16 - 6 METROS' ... 5.062911033630371
  2.429664373397827 'chato1']
 ['chato' 38124 '014456 CHITINHA DE 02 LARGURAS PECA' ...
  5.230817794799805 2.840590000152588 'chato1']
 ...
 ['chato' 1460204 'FERRO CHATO 3/4 X 3/16 (BARRA C/6M)' ...
  8.533851623535156 6.263927459716797 'chato0']
 ['chato' 1463704 'FERRO CHATO 1" X ¼ - 6 METROS' ... 5.245803356170654
  2.837477922439575 'chato1']
 ['chato' 1485769 'FERRO CHATO 1.1/2 pol. X 1/8 pol.' ...
  5.288261890411377 2.7126715183258057 'chato1']]
0: 0/312/1380
0: 0/313/1380
0: 0

  13.756173133850098 'trajeto-1']]
0: 0/323/1380
0: 0/324/1380
0: 0/325/1380
0: 0/326/1380
0: 0/327/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['regador' 4444 '(00002043)REGADOR PLÁSTICO 10L' ... 0.36205992102622986
  7.4153571128845215 'regador-1']
 ['regador' 62316 'REGADOR DE PLÁSTICO DE 12 LITROS' ...
  0.39707615971565247 7.2093729972839355 'regador-1']
 ['regador' 73774 'Regador de plástico de 10L.' ... 0.19285857677459717
  7.558195114135742 'regador-1']
 ...
 ['regador' 1473364 'Regador de plástico, 10L' ... 0.21419556438922882
  7.772477149963379 'regador-1']
 ['regador' 1473901 'REGADOR' ... 0.34029364585876465 7.794512748718262
  'regador-1']
 ['regador' 1482278 'REGADOR 10L' ... 0.48778966069221497
  7.89762020111084 'regador-1']]
0: 0/328/1380
0: 0/329/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.


  9.319713592529297 'a50-1']]
0: 0/352/1380
0: 0/353/1380
0: 0/354/1380
0: 0/355/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['ducha' 6677 'DUCHA ELETRICA' ... 12.699484825134277 19.263599395751953
  'ducha0']
 ['ducha' 7557
  'DUCHA CHUVEIRO ELETRICO, 220V, 3 TEMPERATURASFRIO QUENTE MORNO' ...
  12.059579849243164 19.082555770874023 'ducha0']
 ['ducha' 24456
  'DUCHA METALICA DE PAREDE, ARTICULAVEL, COM DESVIAD - 1300110083' ...
  4.025143146514893 13.670276641845703 'ducha2']
 ...
 ['ducha' 1484341
  'DUCHA HIGIENICA PLASTICA COM REGISTRO DE 1/4 DE VOLTA, COMPATIVEL OU SIMILAR COM FAME'
  ... 6.381009578704834 19.04549789428711 'ducha2']
 ['ducha' 1486391 '(00001780)DUCHA 5.500 wtz' ... 12.336138725280762
  19.438711166381836 'ducha0']
 ['ducha' 1489638 'DUCHA 127/4500W' ... 12.534591674804688
  19.416961669921875 'ducha0']]
0: 0/356/1380
0: 0/357/1380
BOW composed only by nouns and medicines/me

  9.750843048095703 7.39357328414917 11.959454536437988 'webcam-1']]
0: 0/389/1380
0: 0/390/1380
0: 0/391/1380
0: 0/392/1380
0: 0/393/1380
0: 0/394/1380
0: 0/395/1380
0: 0/396/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['consultorio' 1894
  'CONSULTÓRIO ODONTOLÓGICO COMPOSTO DE: Cadeira odontológica automática base com dimensão que permite aproximação do CD/ASB sistema tipo pantográfico de elevação movimentos do assento e encosto acionados por dois motoredutores elétricos articulação'
  ... 23.19935417175293 -1.1148695945739746 'consultorio-1']
 ['consultorio' 6570 'CONSULTORIO ODONTOLOGICO' ... 26.399612426757812
  -4.970032215118408 'consultorio-1']
 ['consultorio' 23472
  '79895 - CONSULTORIO ODONTOLOGICO COMPLETO - CADEIRA ODONTOLOGICA COM ESTRUTURA EM ACO, COM TRATAMENTO ANTI CORROSIVO E PROTECAO EM PVC, ARTICULACAO CENTRAL ENTRE ACENTO E ENCOSTO, COM PONTOS DE LIGACAO NAS LATERAIS DA CADEIR

  9.851049423217773 'nutridrink-1']]
0: 0/423/1380
0: 0/424/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['diva' 14344 'DIVA BAIXO TABLADO' ... 6.900716781616211
  2.0649218559265137 'diva1']
 ['diva' 18461 'DIVA TABLADO EM MADEIRA' ... 6.610283374786377
  2.1999452114105225 'diva1']
 ['diva' 21240 'DIVA BAIXO TABLADO P/FISIOTE-' ... 6.795796871185303
  2.176764488220215 'diva1']
 ...
 ['diva' 1425297 'DIVA MESA EXAME CLINICO OBESO' ... 6.072718620300293
  10.160832405090332 'diva0']
 ['diva' 1441917
  'Diva Tablado em Madeira: Ideal para Uso Fisioterapeutico, possui Alta Resistencia e Durabilidade Dimensoes aproximadas: 180.0 cm x 130.0 cm x 52.0 cm'
  ... 6.92739200592041 2.485535144805908 'diva1']
 ['diva' 1483232
  'DIVa BAIXO TABLADO EM MADEIRA PARA USO FISIOTERAPeUTICO' ...
  6.982348442077637 2.3284659385681152 'diva1']]
0: 0/425/1380
0: 0/426/1380
BOW composed only by nouns and medicines/me

  23.79026985168457 2.4571030139923096 'ativar-1']]
0: 0/448/1380
0: 0/449/1380
0: 0/450/1380
0: 0/451/1380
0: 0/452/1380
0: 0/453/1380
0: 0/454/1380
0: 0/455/1380
0: 0/456/1380
0: 0/457/1380
0: 0/458/1380
0: 0/459/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['chuveiro' 374 'CHUVEIRO MONOFASICO 110V' ... 8.299479484558105
  10.447834014892578 'chuveiro0']
 ['chuveiro' 3547
  'CHUVEIRO ELÉTRICO, MATERIAL TERMOPLÁSTICO, VARIAÇÕES TEMPERATURA DA ÁGUA 3 TEMPERATURAS, COR BRANCA, POTÊNCIA APROXIMADA 4.400, TENSÃO OPERAÇÃO 127V, CARACTERÍSTICAS ADICIONAIS CAPA ISOLANTE INTERNA.'
  ... 4.649792194366455 4.468297004699707 'chuveiro1']
 ['chuveiro' 7821 'Chuveiro, 3 Estações, 127 V.' ... 7.811121463775635
  9.905403137207031 'chuveiro0']
 ...
 ['chuveiro' 1486136 'CHUVEIRO 4T' ... 8.134888648986816
  10.436318397521973 'chuveiro0']
 ['chuveiro' 1488836 'CHUVEIRO SIMPLES DE PVC - AGUA FRIA DE 7"' ...
  4.90

  'ticlopidina-1']]
0: 0/482/1380
0: 0/483/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['fosforo' 1124 'FOSOFORO PALITO LONGO CAIXA C/ 240 PALITOS' ...
  1.3753780126571655 7.828525066375732 'fosforo3']
 ['fosforo' 1827 'FOSFORO LONGO - ME Caixa com 200 palitos.' ...
  1.359897255897522 7.844277858734131 'fosforo3']
 ['fosforo' 9328
  'FOSFORO DE MADEIRA PEQUENO Pacote com 10 caixas de 40 Palitos Especificacoes: Fosforo, clorato de potassio e aglutinantes. PRODUTOS DE BOA QUALIDADE.'
  ... 2.0569236278533936 5.513259410858154 'fosforo-1']
 ...
 ['fosforo' 1488189 'FOSFORO' ... 9.218522071838379 8.274921417236328
  'fosforo2']
 ['fosforo' 1489157 'Caixa de fósforo, 40 palitos incediáveis' ...
  -6.157278537750244 3.432060480117798 'fosforo0']
 ['fosforo' 1489595 'FOSFORO PP-COLORIMETRICO' ... 9.266356468200684
  8.193666458129883 'fosforo2']]
0: 0/484/1380
BOW composed only by nouns and medicines/m

  -0.7400968074798584 4.235823631286621 -3.2267162799835205 'doppler-1']]
0: 0/488/1380
0: 0/489/1380
0: 0/490/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['fiat' 37939 'FIAT/FIORINO AMBULÂNCIA PLACA HMM9936' ...
  -1.0129642486572266 17.848663330078125 'fiat0']
 ['fiat' 39508 'FIAT PALIO WEKEND ADV OPB-0011 FILT.AR INT' ...
  -7.049302577972412 8.967752456665039 'fiat1']
 ['fiat' 43208
  'FIAT PALIO PUQ 3841 - SEGURO FIAT PALIO PUQ 3841 - SEGURO' ...
  -6.991105556488037 9.392391204833984 'fiat1']
 ...
 ['fiat' 1465793
  'FIAT TORO FREEDOM MT D4  DIESEL 2016/2017 PLACA PYU-9271 - Casco 100% T. FIPE - Danos Materiais R$ 100.000,00 - Danos Corporais R$ 200.000,00 - Vidro VD, Farol, lanterna, retrovisor.'
  ... -3.223123788833618 6.02254581451416 'fiat1']
 ['fiat' 1476366 'FIAT FIORINO HLF-0188 FILT.AR INT' ...
  -7.22429895401001 8.777833938598633 'fiat1']
 ['fiat' 1482386 'FIAT DOBLO CARGO 1.8 FLE

  'escrivaninha-1']]
0: 0/495/1380
0: 0/496/1380
0: 0/497/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['aspersor' 17069 'Aspersor para Jardinagem/agricultura' ...
  14.588912010192871 8.686888694763184 'aspersor-1']
 ['aspersor' 45484 'Aspersor, 50 x 3/4 x 1/2' ... 15.652565956115723
  10.529877662658691 'aspersor-1']
 ['aspersor' 69368 'ASPERSOR PERERECA' ... 16.043373107910156
  10.272310256958008 'aspersor-1']
 ...
 ['aspersor' 1451176 '005802 ASPERSOR 50 CM 1/2' ... 15.64700698852539
  10.18869686126709 'aspersor-1']
 ['aspersor' 1456779 'ASPERSOR PARA JARDIM' ... 14.832365036010742
  8.822478294372559 'aspersor-1']
 ['aspersor' 1466730 'ASPERSOR JARDIM 1 MT' ... 15.023186683654785
  9.189417839050293 'aspersor-1']]
0: 0/498/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['fermento' 3016 'FERMENTO biologico 

  6.818831443786621 14.539253234863281 'zuclopentixol-1']]
0: 0/522/1380
0: 0/523/1380
0: 0/524/1380
0: 0/525/1380
0: 0/526/1380
0: 0/527/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['contratacao' 1575 'Contratação de empresa ou profissional predreiro'
  ... -1.4324767589569092 1.6268681287765503 'contratacao7']
 ['contratacao' 4569 '028272 CONTRATACAO DE EMPRESA OU PESSOA FISICA' ...
  -0.9943551421165466 1.2991816997528076 'contratacao7']
 ['contratacao' 11901
  'Contratacao de empresa para execucao de obra de construcao de viaduto de ligacao entre o Centro e o Bairro Pio XII, localizado na MG 431 Km 72 + 550m,'
  ... -0.27806925773620605 0.739691972732544 'contratacao7']
 ...
 ['contratacao' 1477070
  'Contratação de Empresa de seguros Marca: MERCEDES-BENZTipo: SPRINTER  Modelo:515-CDI 2.2 BI-TB VAN(T. Alto)  - DIESEL. 21 lugaresAno/Fab./Mod.: 2014/2014Uso:SECRETARIA MUNICIPAL DE SAÚDE Cobertur

  -6.11078405380249 'lavado-1']]
0: 0/544/1380
0: 0/545/1380
0: 0/546/1380
0: 0/547/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['impressao' 6272 'MATERIAL IMPRESSAO POLIESTER' ... 6.644828796386719
  2.8719286918640137 'impressao2']
 ['impressao' 7097
  'IMPRESSÃO EM PAPEL FOTOGRÁFICO TAMANHO 10X15CM  - ALTA QUALIDADE' ...
  -7.231242656707764 7.533799171447754 'impressao1']
 ['impressao' 7541
  'IMPRESSÃO DE CARTILHA 30X22 COM 4 PAG PAPEL COUCHE 150GR 4X4' ...
  -7.12469482421875 7.405458927154541 'impressao1']
 ...
 ['impressao' 1478612 '(00007874)IMPRESSAO DE FOLDER PAPEL COUCHE 150 G'
  ... -6.24990701675415 6.241157531738281 'impressao0']
 ['impressao' 1481492 '102655 - IMPRESSAO EM FORMATO A3 MONOCROMATICA'
  ... 4.313424587249756 -1.5061538219451904 'impressao3']
 ['impressao' 1487204
  'MATERIAL P  IMPRESSAO A BASE DE SILICONE MASSA DENSO  C  01 kg. Material de moldagem a base de silicone

  -3.4750943183898926 3.5011703968048096 2.7828898429870605 'extencao-1']]
0: 0/574/1380
0: 0/575/1380
0: 0/576/1380
0: 0/577/1380
0: 0/578/1380
0: 0/579/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['arrebite' 18547 'Arrebite, 2,5 cm' ... 0.6351219415664673
  11.303497314453125 'arrebite-1']
 ['arrebite' 27428 'ARREBITE' ... 0.9645823836326599 11.200822830200195
  'arrebite-1']
 ['arrebite' 47860 'ARREBITE 302X10' ... 0.4486851096153259
  11.218944549560547 'arrebite-1']
 ...
 ['arrebite' 1442489 'ARREBITE 4 MM' ... 1.0111488103866577
  11.050175666809082 'arrebite-1']
 ['arrebite' 1445171 'Arrebites' ... 0.8173040747642517 11.43662166595459
  'arrebite-1']
 ['arrebite' 1460553 'ARREBITE 3 MM' ... 0.6630324125289917
  11.06554889678955 'arrebite-1']]
0: 0/580/1380
0: 0/581/1380
0: 0/582/1380
0: 0/583/1380
0: 0/584/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMA

  1.1745095252990723 'reg-1']]
0: 0/586/1380
0: 0/587/1380
0: 0/588/1380
0: 0/589/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['noretisterona' 6513 'Noretisterona' ... 0.7423599362373352
  4.96770715713501 'noretisterona2']
 ['noretisterona' 12077 'NORETISTERONA 0,35MG' ... 0.389210969209671
  4.7917561531066895 'noretisterona2']
 ['noretisterona' 14590 'NORETISTERONA 0,35 MG' ... 0.9043818116188049
  5.348813533782959 'noretisterona2']
 ...
 ['noretisterona' 1483801 'Noretisterona 0,35 mg comp.' ...
  0.5616104602813721 5.158655166625977 'noretisterona2']
 ['noretisterona' 1485400
  'NORESTISTERONA + ESTRADIOL ENANTATO/VALERATO 50+5 MG/ML SOLUÇÃO INJETÁVEL'
  ... 6.152641773223877 6.525894641876221 'noretisterona3']
 ['noretisterona' 1489752
  '71730 - NORETISTERONA - APRESENTACAO: COMPRIMIDO, DOSAGEM: 0,35 MG'
  ... 6.45339298248291 6.168470859527588 'noretisterona2']]
0: 0/590/1380
0: 0/591/138

  'fentanil0']]
0: 0/652/1380
0: 0/653/1380
0: 0/654/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['jcb' 46541 'RJCB DENTE CACAMBA DIANT.RETRO'
  'jcb dente cacamba diant retro' 1.283570408821106 3.2305164337158203
  10.71780014038086 14.144868850708008 10.135632514953613
  -10.244682312011719 -4.104613304138184 -8.110041618347168
  10.8429594039917 5.241302013397217 6.164065837860107 6.724729537963867
  2.7235300540924072 10.902024269104004 13.241520881652832 'jcb-1']
 ['jcb' 93916 'RJCB JUNTA DO CABECOTE RETROES'
  'jcb junta cabecote retroar' 0.6386346817016602 3.621424674987793
  10.217567443847656 14.614892959594727 10.622179985046387
  -10.464638710021973 -4.427403926849365 -8.92702865600586
  10.971484184265137 4.887522220611572 5.5364556312561035
  6.918992042541504 1.6579313278198242 11.182734489440918
  12.865415573120117 'jcb-1']
 ['jcb' 145226 'RJCB CABO DE ACO ALAVANCA P/FR'
  'jcb cab

  12.711763381958008 'jcb-1']]
0: 0/655/1380
0: 0/656/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['vit' 35411 'VIT B + VIT A + VIT D3 + FERRO' 'vit b d3 ferro'
  3.825176477432251 4.358636856079102 -6.923260688781738
  11.25190258026123 10.526310920715332 5.183811664581299
  8.157919883728027 2.3474161624908447 9.5878324508667 12.456350326538086
  0.36239224672317505 11.222887992858887 14.953866958618164
  4.045316696166992 5.603623867034912 'vit-1']
 ['vit' 39758 'VIT.A 50000UI + VIT.D 10000UI' 'vit ui d'
  3.7796239852905273 4.26236629486084 -7.005465030670166
  11.445594787597656 10.521283149719238 4.672868251800537
  8.45250129699707 1.7852636575698853 9.181153297424316
  12.335397720336914 0.4129573404788971 11.101778030395508
  14.796489715576172 3.9133520126342773 5.511394023895264 'vit-1']
 ['vit' 65151 'VIT B12' 'vit 012' 3.8039627075195312 4.301969051361084
  -7.138981819152832 11.34206

  3.6858484745025635 5.59071683883667 'vit-1']]
0: 0/657/1380
0: 0/658/1380
0: 0/659/1380
0: 0/660/1380
0: 0/661/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['bateria' 1022
  'BATERIA 9 V - BATERIA ALCALINA NAO RECARREGAVEL DE 9 V.' ...
  4.852480411529541 4.617860794067383 'bateria0']
 ['bateria' 3466 'BATERIA ALCALINA 9 VOLTS..' ... 4.796268463134766
  4.5193095207214355 'bateria0']
 ['bateria' 3717 'Bateria' ... 4.516677379608154 -6.109343528747559
  'bateria4']
 ...
 ['bateria' 1488990 'BATERIA de lition LR 1130, tipo botão' ...
  6.488682746887207 5.157289981842041 'bateria16']
 ['bateria' 1489092 'BATERIA CR2032' ... 4.294319152832031
  -6.1698384284973145 'bateria4']
 ['bateria' 1489161 'BATERIA DE LITHIUM 3V CR 2032' ... 1.192152500152588
  11.087510108947754 'bateria5']]
0: 0/662/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by H

  9.918890953063965 'presilha-1']]
0: 0/663/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['vedacit' 45952 'VEDALIT (CAL LIQUIDO) 5 L' 'vedacit cal liquido 5 l'
  6.253180980682373 4.329177379608154 13.731195449829102
  -3.125640630722046 -2.1302623748779297 6.712472438812256
  2.854910373687744 1.505581259727478 11.184956550598145
  9.081334114074707 1.0210388898849487 3.8196158409118652
  6.716622829437256 1.0799939632415771 12.720563888549805 'vedacit-1']
 ['vedacit' 83766 'Vedacit 18 lt.' 'vedacit 18 lt' 5.887622833251953
  4.8057122230529785 13.838281631469727 -3.159825086593628
  -2.386751413345337 5.778969764709473 3.4491286277770996
  0.952598512172699 10.822139739990234 8.729548454284668
  0.4621239900588989 3.7474477291107178 6.5090651512146
  1.1981624364852905 13.095738410949707 'vedacit-1']
 ['vedacit' 119839 'VEDACIT LATA COM 3,5 LT' 'vedacit lata com 3 5 lt'
  6.414577484130859 4.4538

  1.0924910306930542 12.983257293701172 'vedacit-1']]
0: 0/664/1380
0: 0/665/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['abridor' 2408
  '0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000001 ABRIDOR DE BOCA - PLASTICO TRANSPARENTE'
  ... 4.802374362945557 2.259509563446045 'abridor3']
 ['abridor' 4419
  'ABRIDOR DE LATA - TIPO: COMBINADO COM ABRIDOR DE GARRAFA, MATERIA-PRIMA: ACO TEMPERADO (32527)'
  ... 1.757275938987732 1.6112679243087769 'abridor1']
 ['abridor' 8871
  'Abridor de boca  infantil de silicone, autoclavável TAM 30 x 25 x 18 mm.'
  ... 3.6579079627990723 3.5265753269195557 'abridor7']
 ...
 ['abridor' 1486174 'ABRIDOR DE BOCA NYLON INFANTIL AUTOCLAVAVEL - 2 UN.'
  ... 4.320793151855469 7.691926956176758 'abridor0']
 ['abridor' 1487306
  'ABRIDOR DE LATAS, MANUAL, EM ACO INOX, MEDINDO APROXIMADAMENTE 15 CM.'
  ... 0.882214307

  -2.809544324874878 0.26528608798980713 'adubo-1']]
0: 0/672/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['beclometasona' 1640
  'BECLOMETASONA (DIPROPIONATO) 50MCG/DOSE SUSPENSAO SPRAY NASAL FRASCO 200 DOSES'
  ... 5.5327372550964355 6.6292219161987305 'beclometasona6']
 ['beclometasona' 2728 'BECLOMETASONA 250 MCG' ... 7.78644323348999
  11.065094947814941 'beclometasona0']
 ['beclometasona' 4588 'Beclometasona dipropionato 50mcg aerossol oral'
  ... 6.182288646697998 -0.5365667343139648 'beclometasona1']
 ...
 ['beclometasona' 1487117
  'BECLOMETASONA DIPROPIONATO 50 MCG - SUSPENSÃO NASAL' ...
  5.216655731201172 6.448011875152588 'beclometasona6']
 ['beclometasona' 1488863
  'Beclometasona dipropionato 250 mcg aerossol oral' ...
  6.208630084991455 -0.4852510392665863 'beclometasona1']
 ['beclometasona' 1489231
  'BECLOMETASONA DIPROPIONATO 250mcg 200 DOSES SPRAY ORAL' ...
  5.67009973526001 

  -0.23690463602542877 -9.480321884155273 'fsh-1']]
0: 0/701/1380
0: 0/702/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['solucao' 15 'SOLUCAO DE BICARBONATO DE SODIO 8,4% 250ML SIST.FECHADO'
  ... -6.511895656585693 10.292434692382812 'solucao-1']
 ['solucao' 565 'SOLUCAO PARA BATERIA' ... 8.695852279663086
  4.8350749015808105 'solucao43']
 ['solucao' 569 '013198 SOLUCAO DE LIMPEZA ENZIMATICA WL E-Z 100' ...
  8.105521202087402 -0.23795638978481293 'solucao38']
 ...
 ['solucao' 1488691 'Solução de schiller, uso tópico, 100mg/ml.' ...
  11.562021255493164 6.229816436767578 'solucao49']
 ['solucao' 1488856
  'SOLUÇÃO FISIOLÓGICA,CLORETO DE SÓDIO 100ML SOLUÇÃO FISIOLÓGICA, CLORETO DE SÓDIO A 0, 9% 100 ML SISTEMA FECHADO. Solução eletrolítica injetável. Via de admnistração injetável. Um Sistema Fechado é aquele que não requer de elementos externos adicionai'
  ... 5.646806716918945 3.378730297088623 

  8.582813262939453 3.179330348968506 'radiador4']]
0: 0/724/1380
0: 0/725/1380
0: 0/726/1380
0: 0/727/1380
0: 0/728/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['esculpido' 4296 'ESCULPIDOR LECRON' ... 12.753820419311523
  3.413032054901123 'esculpido1']
 ['esculpido' 7051 'ESCULPIDOR HOLL 3S' ... 12.307111740112305
  3.037994861602783 'esculpido1']
 ['esculpido' 11932 'ESCULPIDOR HOLLEMBACK 3S' ... 12.405548095703125
  3.670299768447876 'esculpido1']
 ...
 ['esculpido' 1472404 'ESCULPIDOR HOLLEMBACK 03 S' ... 12.800798416137695
  3.0330348014831543 'esculpido1']
 ['esculpido' 1485372 'ESCULPIDOR FRAN N02' ... 12.66643238067627
  3.167421817779541 'esculpido1']
 ['esculpido' 1486488
  'Esculpidores, andrews 1 em aço inoxidável, reistente a altas temperaturas, com registro na ANVISA'
  ... 12.716739654541016 5.123640537261963 'esculpido1']]
0: 0/729/1380
0: 0/730/1380
0: 0/731/1380
0: 0/732/1380
0

  -6.148012638092041 'mnc-1']]
0: 0/742/1380
0: 0/743/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['grelha' 1551 '009704 GRELHA  QUADRADA N.  20 100 MM' ...
  11.135933876037598 12.60335636138916 'grelha3']
 ['grelha' 12754 'GRELHA PARA BUEIROS' ... 5.644028663635254
  7.554527282714844 'grelha2']
 ['grelha' 19533 'GRELHA INOX GIRATORIA 150MM -ABRE/FECHA' ...
  9.982522964477539 7.303447246551514 'grelha-1']
 ...
 ['grelha' 1481739 'GRELHA INOX QUADRADA PARA CAIXA SIFONADA 150X150MM'
  ... 5.297671318054199 8.146726608276367 'grelha2']
 ['grelha' 1482148 '040940 GRELHA COM PORTA GRELHA 30x50' ...
  10.84545612335205 12.499740600585938 'grelha3']
 ['grelha' 1483472 '009705 GRELHA  QUADRADA N.  30 150 MM' ...
  11.249480247497559 12.653306007385254 'grelha3']]
0: 0/744/1380
0: 0/745/1380
0: 0/746/1380
0: 0/747/1380
0: 0/748/1380
0: 0/749/1380
0: 0/750/1380
0: 0/751/1380
0: 0/752/1380
0: 0/753/1380
0

  10.124055862426758 11.81267261505127 'campainha-1']]
0: 0/759/1380
0: 0/760/1380
0: 0/761/1380
0: 0/762/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['transp' 82770 'TRANSP. AMBULANCIA UTI KM' ... 7.142560958862305
  -1.4279166460037231 'transp-1']
 ['transp' 89054
  'TRANSP SIMPLES REMOÇÃO(OXI+TEC ENF) VRB-VIÇOS VRB para Viçosa' ...
  7.938618183135986 -1.1857030391693115 'transp-1']
 ['transp' 101034 '(00019187)29-Transp. veículo minimo 09 lugares - 4'
  ... 9.716042518615723 -1.2536239624023438 'transp-1']
 ...
 ['transp' 1464650 'TRANSP. CARATER SUPLEMENTAR DE' ... 8.937617301940918
  -1.1679946184158325 'transp-1']
 ['transp' 1468632 'TRANSP.DE ALUNOS DA ESTRADA DO' ... 4.045076370239258
  -3.890737533569336 'transp-1']
 ['transp' 1478657 'TRANSP. AMB. SIMPLES P/ MIRAI' ... 8.425477027893066
  -0.9845494031906128 'transp-1']]
0: 0/763/1380
BOW composed only by nouns and medicines/medical ter

  0.31445175409317017 6.642426013946533 'linagliptina-1']]
0: 0/775/1380
0: 0/776/1380
0: 0/777/1380
0: 0/778/1380
0: 0/779/1380
0: 0/780/1380
0: 0/781/1380
0: 0/782/1380
0: 0/783/1380
0: 0/784/1380
0: 0/785/1380
0: 0/786/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['leiteiro' 9890 'LEITEIRA EM ALUMÍNIO resistente, capacidade 5 litros.'
  ... 2.3792731761932373 6.7694902420043945 'leiteiro1']
 ['leiteiro' 18060
  '34 - LEITEIRA DE ACO INOX C/ CAPACIDADE PARA 2 LITROS' ...
  2.2902274131774902 6.657169342041016 'leiteiro1']
 ['leiteiro' 18690
  'LEITEIRA - MATERIA-PRIMA: ALUMINIO, CAPACIDADE: 5000ML, CABO: COM CABO EM MADEIRA, TAMPA: SEM TAMPA (32847)'
  ... 6.458831787109375 -12.602927207946777 'leiteiro0']
 ...
 ['leiteiro' 1453815 'LEITEIRA DE 01 LITRO EM INOX' ...
  1.8178383111953735 6.852261543273926 'leiteiro1']
 ['leiteiro' 1460357
  'LEITEIRA EM ALUMINIO COM CABO DE MADEIRA CAPACIDADE DE 0

  8.778596878051758 4.260830402374268 'lactato-1']]
0: 0/814/1380
0: 0/815/1380
0: 0/816/1380
0: 0/817/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['tramal' 592 'TRAMAL 100 MG COMPRIMIDO' 'tramal 100 mg comprimido'
  -0.2585546374320984 -0.20697547495365143 1.6131231784820557
  9.274523735046387 2.9851694107055664 5.25580358505249 7.809953212738037
  10.471646308898926 1.0775399208068848 4.6216301918029785
  -0.6473787426948547 1.096805453300476 12.166695594787598
  8.55420970916748 -0.1485721617937088 'tramal-1']
 ['tramal' 4096 'TRAMAL INJETAVEL 100MG/2ML'
  'tramal injetavel 100 mg 2 ml' 0.4333837032318115 0.2806869447231293
  1.4759379625320435 9.426969528198242 2.475410223007202
  5.233786106109619 7.8042473793029785 10.613554954528809
  1.2645225524902344 5.351992607116699 -0.8590430617332458
  1.4072426557540894 12.189425468444824 7.869252681732178
  -1.1082665920257568 'tramal-1']
 ['trama

  12.43332290649414 7.8985114097595215 -0.7358461618423462 'tramal-1']]
0: 0/818/1380
0: 0/819/1380
0: 0/820/1380
0: 0/821/1380
0: 0/822/1380
0: 0/823/1380
0: 0/824/1380
0: 0/825/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['massageador' 15219 'MASSAGEADOR ELETRICO' 'massageador eletrico'
  -2.19740891456604 18.11362648010254 14.860345840454102
  18.013561248779297 8.80429744720459 -5.667108535766602
  3.9181201457977295 1.3005726337432861 2.7828123569488525
  3.1771011352539062 2.228728771209717 9.74940013885498
  -1.123526930809021 -4.4687910079956055 2.194331645965576
  'massageador-1']
 ['massageador' 62280
  '0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001810011 MASSAGEADOR MANUAL DE MADEIRA FLEXIVEL - MOLA'
  '181 0011 massageador manual madeira flexivel mola' -2.268429756164551
  18.082857131958008 14.634941101074219 18.52398109436035


  'massageador-1']]
0: 0/826/1380
0: 0/827/1380
0: 0/828/1380
0: 0/829/1380
0: 0/830/1380
0: 0/831/1380
0: 0/832/1380
0: 0/833/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['azalim' 17725 'AZULIM 01 LT CX C/ 12' ... -3.2431392669677734
  4.013357162475586 'azalim-1']
 ['azalim' 37556 'AZULIM P/ LIMPEZA PESADA - FR -C/ 01LT' ...
  -3.390139579772949 6.197341442108154 'azalim-1']
 ['azalim' 46407 'AZULIM 1L' ... -3.2393643856048584 3.8422999382019043
  'azalim-1']
 ...
 ['azalim' 1469803 'AZULIM' ... -3.7459535598754883 4.12093448638916
  'azalim-1']
 ['azalim' 1474047 'AZULIM 01LTS' ... -3.0530285835266113
  4.200901508331299 'azalim-1']
 ['azalim' 1481328 'AZULIM DE 1 LITRO 103848' ... -3.37004017829895
  4.0108466148376465 'azalim-1']]
0: 0/834/1380
0: 0/835/1380
0: 0/836/1380
0: 0/837/1380
0: 0/838/1380
0: 0/839/1380
0: 0/840/1380
BOW composed only by nouns and medicines/medical terms of the desc

  2.622551679611206 8.773313522338867 5.262814044952393 'alt-1']]
0: 0/858/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['palmeira' 5982 'PALMEIRA AZUL (BISMAKIA NOBILE, 1 /1,20 MT )'
  'palmeira azul bismaki nobile 1 20 mt' 6.352517604827881
  -0.5705050826072693 5.443376541137695 9.760313987731934
  3.579968214035034 0.21597424149513245 3.2285659313201904
  3.617865800857544 6.807349681854248 3.1749367713928223
  7.461922645568848 4.439122676849365 2.7979822158813477
  5.648744583129883 -1.5146749019622803 'palmeira-1']
 ['palmeira' 28639 'PALMEIRA IMPERIAL( 03METROS)'
  'palmeira imperial 3 metro' 6.7611846923828125 0.07866891473531723
  5.661865711212158 10.171186447143555 2.4770188331604004
  0.3613094985485077 3.72879958152771 3.594416379928589 6.954196929931641
  2.9423022270202637 7.479763984680176 4.354050636291504
  2.9417262077331543 6.212684631347656 -2.284780502319336 'palmeira-1']
 ['

  -2.004246950149536 'palmeira-1']]
0: 0/859/1380
0: 0/860/1380
0: 0/861/1380
0: 0/862/1380
0: 0/863/1380
0: 0/864/1380
0: 0/865/1380
0: 0/866/1380
0: 0/867/1380
0: 0/868/1380
0: 0/869/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['articulador' 14024 'Articulador odontológico' ... 2.74849271774292
  3.7304880619049072 'articulador0']
 ['articulador' 34669 'ARTICULADOR DA CAIXA DE DIRECAO CELTA' ...
  11.243485450744629 10.01814079284668 'articulador1']
 ['articulador' 47159 'ARTICULADOR ODONTOLOGICO' ... 2.854990005493164
  3.8509254455566406 'articulador0']
 ...
 ['articulador' 1451675 'ARTICULADOR CHARNEIRA DE PLASTICO RESISTENTE'
  ... 10.825505256652832 8.623117446899414 'articulador1']
 ['articulador' 1482199
  'ARTICULADOR ODONTOLOGICO SEMI AJUSTAVEL ARCONDISTANCIA INTERCONDILAR AJUSTAVEL EM P M G ATRAVES DE FUSO EXPANSOR GUIA CONDILICA CURVA REGULAGEM DO ANGULO DA GUIA CONDILICA REGULAGEM DO

  8.755424499511719 'hora-1']]
0: 0/913/1380
0: 0/914/1380
0: 0/915/1380
0: 0/916/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['ampicilina' 4633 'AMPICILINA 500MG CPR' ... 7.431251049041748
  3.0629711151123047 'ampicilina0']
 ['ampicilina' 7155 'AMPICILINA 500 MG AMPOLA CAIXA COM 12UN' ...
  7.222299098968506 7.097684860229492 'ampicilina3']
 ['ampicilina' 8300
  '0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001540021 AMPICILINA + SULBACTAM 3 MG'
  ... 7.163698196411133 6.292003154754639 'ampicilina3']
 ...
 ['ampicilina' 1485457 'AMPICILINA 1G/ML AMP.' ... 7.418542861938477
  3.1275291442871094 'ampicilina0']
 ['ampicilina' 1488813 'AMPICILINA 500MG' ... 7.319001197814941
  3.1452560424804688 'ampicilina0']
 ['ampicilina' 1489118
  'AMPICILINA 50MG/ML Embalagem primária frasco de vidro âmbar, embalagem secundária caixa de papelão, suspensão 

  2.285181760787964 -2.255866765975952 'tarracha-1']]
0: 0/932/1380
0: 0/933/1380
0: 0/934/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['cianocobalamina' 7952 'CIANOCOBALAMINA  (COMPLEXO B)' ...
  -3.1176059246063232 4.285948276519775 'cianocobalamina-1']
 ['cianocobalamina' 10415 'Cianocobalamina 1000 Mcg Solução Injetável'
  ... 5.349672317504883 10.519658088684082 'cianocobalamina-1']
 ['cianocobalamina' 36769 'CIANOCOBALAMINA 1000 MG SOLUCAO INJETAVEL' ...
  5.3991546630859375 10.589457511901855 'cianocobalamina-1']
 ...
 ['cianocobalamina' 1407766 'IANOCOBALAMINA+MONONITRATO DE TIAMINA' ...
  6.3543925285339355 14.383136749267578 'cianocobalamina-1']
 ['cianocobalamina' 1413754 'CIANOCOCALAMINA 1000 MCG SOL.' ...
  -3.1568105220794678 3.981086492538452 'cianocobalamina-1']
 ['cianocobalamina' 1426234
  'Cianocobalamina, 1000 mcg soluCAo injetAvel (via i - 6404703422' ...
  10.510757446289062 

  5.45313835144043 4.833931922912598 'sinvastatina3']]
0: 0/988/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['botina' 3962 'BOTINA BICO DE FERRO' ... 3.0073153972625732
  15.466010093688965 'botina5']
 ['botina' 4223 'BOTINAS BORRACHAS 118659' ... 3.9276483058929443
  -6.112136363983154 'botina4']
 ['botina' 6157
  'BOTINA, BORSEGUINHO EM COURO PRETO CORRUGADO DE 1 QUALIDADE SOLADO EM PV POLIURETANO ANTI DERRAPANTE E PALMILHA ANTIMICRÓBICOS COM FECHAMENTO EM ELÁSTICO LATERAL, TAM: 40. PARA O COMBATE A DENGUE.'
  ... 3.190009832382202 12.30283260345459 'botina-1']
 ...
 ['botina' 1477055 'Botina confeccionada em couro, cano médio, nº 38.'
  ... 6.009883403778076 5.078990936279297 'botina1']
 ['botina' 1478722 'BOTINA DE SEGURANCA' ... 7.37688684463501
  3.319815158843994 'botina2']
 ['botina' 1484635 'BOTINA COURO SOLADO DE PNEU TAMANHO DIVERSOS' ...
  5.235865116119385 4.130500316619873 'botina8']

  0.5460548400878906 'cetotifeno-1']]
0: 0/1006/1380
0: 0/1007/1380
0: 0/1008/1380
0: 0/1009/1380
0: 0/1010/1380
0: 0/1011/1380
0: 0/1012/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['tapazol' 20510 'Tiamazol, 10 mg - cps' ... 0.42712539434432983
  11.314658164978027 'tapazol-1']
 ['tapazol' 28404 '002550 TIAMAZOL 10MG' ... 0.4216197729110718
  10.930122375488281 'tapazol-1']
 ['tapazol' 40428 'TIAMAZOL COMP 10MG' ... 0.8022123575210571
  10.972061157226562 'tapazol-1']
 ...
 ['tapazol' 1448883 'TAPAZOL 5MG (METIMAZOL) - CAIXA COM 100 COMPRIMIDOS'
  ... -0.1757582575082779 6.582033634185791 'tapazol-1']
 ['tapazol' 1467684 'TIAMAZOL 10MG COMPRIMIDO' ... -0.21113227307796478
  6.640603542327881 'tapazol-1']
 ['tapazol' 1489140 'TAPAZOL 50g.' ... 0.5185182690620422
  10.905594825744629 'tapazol-1']]
0: 0/1013/1380
0: 0/1014/1380
0: 0/1015/1380
BOW composed only by nouns and medicines/medical terms o

  11.554841995239258 'vale-1']]
0: 0/1041/1380
0: 0/1042/1380
0: 0/1043/1380
0: 0/1044/1380
0: 0/1045/1380
0: 0/1046/1380
0: 0/1047/1380
0: 0/1048/1380
0: 0/1049/1380
0: 0/1050/1380
0: 0/1051/1380
0: 0/1052/1380
0: 0/1053/1380
0: 0/1054/1380
0: 0/1055/1380
0: 0/1056/1380
0: 0/1057/1380
0: 0/1058/1380
0: 0/1059/1380
0: 0/1060/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['ver' 246656 'VEJA 02LTS' 'ver 2 lts' -0.5268391370773315
  8.501632690429688 0.3519003689289093 9.823759078979492
  3.848677158355713 5.931802272796631 5.12000036239624 1.211678147315979
  7.427348613739014 7.930705547332764 13.319068908691406
  8.075759887695312 10.220524787902832 7.188713550567627
  -1.449942708015442 'ver-1']
 ['ver' 264646 '2 VIAS METADE A4 FTO 18' '2 ver metade 14 fto 18'
  -0.5964981913566589 7.980230331420898 0.23326973617076874
  9.574477195739746 3.930008888244629 6.437692642211914 5.027027130126953
  0.80

  -1.4409523010253906 'ver-1']]
0: 0/1061/1380
0: 0/1062/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['azeite' 628 'AZEITE de extra virgem de 500ml' ... 9.748003959655762
  -0.7878568172454834 'azeite1']
 ['azeite' 2982 'Azeite embalagem de 200ml' ... 0.963835597038269
  1.3275736570358276 'azeite-1']
 ['azeite' 6047 'AZEITE DE OLIVA' ... 10.707221031188965 6.36728048324585
  'azeite0']
 ...
 ['azeite' 1477050 'AZEITE DE OLIVA EXTRA VIRGEM' ... 7.683071613311768
  -7.503023624420166 'azeite3']
 ['azeite' 1481925
  'AZEITE, DE OLIVA, EXTRA VIRGEM, PURO, EMBALAGEM COM 500 ML' ...
  7.823866844177246 -7.459169387817383 'azeite3']
 ['azeite' 1485311 'AZEITE DE OLIVA   LATA C/ 500ML' ...
  7.960602283477783 -6.196837902069092 'azeite4']]
0: 0/1063/1380
0: 0/1064/1380
0: 0/1065/1380
0: 0/1066/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDB

  11.418055534362793 -3.8580026626586914 'patim-1']]
0: 0/1083/1380
0: 0/1084/1380
0: 0/1085/1380
0: 0/1086/1380
0: 0/1087/1380
0: 0/1088/1380
0: 0/1089/1380
0: 0/1090/1380
0: 0/1091/1380
0: 0/1092/1380
0: 0/1093/1380
0: 0/1094/1380
0: 0/1095/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['divisorio' 8116
  'DIVISORIA PARA PAPELETA DE INTERNACAO - DIVISORIA PARA PAPELETA DE INTERNACAO, PAPEL NA GRAMATURA DE SULFITE, COR LARANJA, TAMANHO 33X22 CM, PERFURADO, IMPRESSAO PRETA.'
  ... -2.399174451828003 2.96262264251709 'divisorio0']
 ['divisorio' 13885 'DIVISORIAS' ... -3.139777660369873
  17.790241241455078 'divisorio1']
 ['divisorio' 22178
  'DIVISORIA PARA PAPELETA DE INTERNACAO - DIVISORIA PARA PAPELETA DE INTERNACAO, PAPEL NA GRAMATURA DE SULFITE, COR LARANJA, TAMANHO 33X22 CM, PERFURADO, IMPRESSAO PRETA.'
  ... -2.4006383419036865 3.0109291076660156 'divisorio0']
 ...
 ['divisorio' 1468926 'DIVIS

  -0.5752540826797485 9.88857364654541 'pramipexol-1']]
0: 0/1119/1380
0: 0/1120/1380
0: 0/1121/1380
0: 0/1122/1380
0: 0/1123/1380
0: 0/1124/1380
0: 0/1125/1380
0: 0/1126/1380
0: 0/1127/1380
0: 0/1128/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['carro' 119 'CARRO PARA TRANSPORTES DE MATERIAIS (DIVERSOS)' ...
  4.779548168182373 8.650386810302734 'carro22']
 ['carro' 463 'CARRO LIMPEZ.114X98X46 4BALDES' ... 9.022384643554688
  7.286339282989502 'carro19']
 ['carro' 1292 'CARRO PARA TRANSPORTE DE ALIMENTOS PARA USO HOSPITALAR'
  ... 4.842660427093506 8.528861045837402 'carro22']
 ...
 ['carro' 1487702
  'CARRO DE CURATIVO EM ACO  FERRO PINTADO E ACESSORIOS BALDE E BACIA'
  ... 7.6061882972717285 6.272504806518555 'carro24']
 ['carro' 1489253
  'CARRO MACA SIMPLES  -MATERIAL DE CONFECCAO ACO/FERRO PINTADO  -GRADES LATERAIS-POSSUI  -SUPORTE DE SORO-NAO POSSUI  -ACESSORIO(S)-COLCHONETE'
  ... 7.441849

  8.555736541748047 1.310956358909607 'peroxido-1']]
0: 0/1150/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['hidrocor' 10153 'Hidrocor jogo com 12 cores'
  'hidrocor jogo com 12 cor' 0.5251439809799194 3.898603916168213
  4.399829864501953 11.212834358215332 4.233162879943848
  15.912534713745117 3.349857807159424 6.175390243530273 2.62253999710083
  15.816193580627441 7.863771438598633 -0.6652859449386597
  12.414373397827148 10.766631126403809 12.18414306640625 'hidrocor-1']
 ['hidrocor' 24189 'HIDROCOR COM 12 CORES ESCOLAR'
  'hidrocor com 12 cor escolar' 0.40173113346099854 3.6796741485595703
  4.51778507232666 11.064062118530273 3.995595932006836
  16.137584686279297 3.8372018337249756 6.582075119018555
  2.7039260864257812 15.529844284057617 7.761709213256836
  -0.5726781487464905 11.918089866638184 10.239935874938965
  12.559015274047852 'hidrocor-1']
 ['hidrocor' 33734
  'Embalagem de hidr

  12.587238311767578 10.609403610229492 12.524347305297852 'hidrocor-1']]
0: 0/1151/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['diprospan' 20661
  'DIPROSPAN SOLUÇÃO INJETAVEL AMPOLA DE 1ML + SERINGA,'
  'diprospan solucao injetavel ampola 1 ml seringa' 8.102470397949219
  5.73214054107666 2.968113899230957 -0.235042005777359 3.328082323074341
  4.332021236419678 12.60276985168457 8.62563419342041 2.31108021736145
  3.1403403282165527 7.200535297393799 4.361965179443359
  5.116268634796143 8.524462699890137 0.5452456474304199 'diprospan-1']
 ['diprospan' 63646 'DISPOSPAN INJ.' 'diprospan inj' 8.472127914428711
  5.247250556945801 4.141236782073975 0.024469519034028053
  4.021656036376953 3.8203818798065186 12.062844276428223
  9.542974472045898 4.049654960632324 3.422900438308716 7.03826904296875
  4.766705513000488 5.81174898147583 8.839911460876465
  -0.02327078767120838 'diprospan-1']
 ['dipr

  5.419663906097412 8.414063453674316 0.2979278266429901 'diprospan-1']]
0: 0/1152/1380
0: 0/1153/1380
0: 0/1154/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['musculo' 14205 'MUSCULO' ... 9.128203392028809 0.417672336101532
  'musculo-1']
 ['musculo' 34675
  'MUSCULO BOVINO RESFRIADO EM PEDACO OU MOIDO,DE 1a QUALIDADE' ...
  -2.3228163719177246 15.302648544311523 'musculo-1']
 ['musculo' 45941 'MUSCULO BOVINO FRESCO' ... -2.26706862449646
  14.91273307800293 'musculo-1']
 ...
 ['musculo' 1464115 'MUSCULO BOVINO FRESCO' ... -2.068854331970215
  15.259560585021973 'musculo-1']
 ['musculo' 1468564 'MUSCULO' ... 9.169472694396973 0.756068229675293
  'musculo-1']
 ['musculo' 1479239 'Musculo Bovino kg' ... -2.1645121574401855
  15.630846977233887 'musculo-1']]
0: 0/1155/1380
0: 0/1156/1380
0: 0/1157/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clusterin

  0.7834697961807251 11.553598403930664 3.935169219970703 'mecanico-1']]
0: 0/1186/1380
0: 0/1187/1380
0: 0/1188/1380
0: 0/1189/1380
0: 0/1190/1380
0: 0/1191/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['iodopovidona' 1449 'IODOPOVIDONA DEGERMANTE 1 LITRO' ...
  -2.4929778575897217 8.640573501586914 'iodopovidona1']
 ['iodopovidona' 4141 'IODOPOVIDONA 10% TÓPICO FRASCO 1000ML' ...
  4.243735313415527 10.264220237731934 'iodopovidona0']
 ['iodopovidona' 6061 'IODOPOLIVIDONA 10% 100ML TOPIC' ...
  -2.6965394020080566 8.666790962219238 'iodopovidona1']
 ...
 ['iodopovidona' 1480011 'Iodopovidona Tópico 1000 ml' ...
  4.290593147277832 9.925826072692871 'iodopovidona0']
 ['iodopovidona' 1483995 'Iodopolividona degermante 10% 1000 ml' ...
  -2.6394448280334473 8.74925422668457 'iodopovidona1']
 ['iodopovidona' 1487598 'IODOPOLIVIDONA TOPICO 1000 ML' ...
  4.285879135131836 9.931605339050293 'iodopovido

  23.253889083862305 1.94343101978302 'velcro-1']]
0: 0/1199/1380
0: 0/1200/1380
0: 0/1201/1380
0: 0/1202/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['cefalexina' 460 'CEFALEXINA 250MG /5ML)PO SUS' ... 4.52380895614624
  6.761114120483398 'cefalexina8']
 ['cefalexina' 2301 'CEFALEXINA 250MG60ML  SUSPENSAO ORAL' ...
  3.7632298469543457 4.226369380950928 'cefalexina0']
 ['cefalexina' 2464 'CEFALEXINA 50 MG/ML PARA SUPENSAO ORAL - 1070010028'
  ... 1.2435181140899658 5.9908447265625 'cefalexina3']
 ...
 ['cefalexina' 1486149 'CEFALEXINA 250 MG SUSPENSAO' ...
  5.298791885375977 8.284998893737793 'cefalexina1']
 ['cefalexina' 1487095 'CEFALEXINA SUSPENSAO' ... 5.335000991821289
  8.271434783935547 'cefalexina1']
 ['cefalexina' 1487762 'CEFALEXINA 500MG' ... 8.195509910583496
  -2.928745985031128 'cefalexina2']]
0: 0/1203/1380
0: 0/1204/1380
BOW composed only by nouns and medicines/medical terms of t

  12.605461120605469 7.340151309967041 12.074090957641602 'manete-1']]
0: 0/1207/1380
0: 0/1208/1380
0: 0/1209/1380
0: 0/1210/1380
0: 0/1211/1380
0: 0/1212/1380
0: 0/1213/1380
0: 0/1214/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['raio' 3059 'RAIO X ABDOMEM AP-LATERAL OU' ... -14.660582542419434
  0.804051399230957 'raio0']
 ['raio' 3618 'RAIO X SEIOS DA FACE 2 INC, COM LAUDO' ...
  7.300730228424072 6.222923278808594 'raio-1']
 ['raio' 3716 'RAIO X COXO-FEMURAL 1 INC, COM LAUDO' ...
  8.976606369018555 11.018874168395996 'raio1']
 ...
 ['raio' 1480938 'RAIO X ATM 2 INC COM LAUDO' ... 8.935225486755371
  11.307412147521973 'raio1']
 ['raio' 1481721 'RAIO X MASTOIDES 2 INC, COM LAUDO' ...
  9.024740219116211 11.245462417602539 'raio1']
 ['raio' 1481808 'RAIO X TORAX PA - LAT OBLIQUAS' ... 7.805168628692627
  10.823358535766602 'raio1']]
0: 0/1215/1380
0: 0/1216/1380
0: 0/1217/1380
0: 0/1218/1380
0

  3.4149086475372314 4.192091941833496 2.9662952423095703 'retifica-1']]
0: 0/1220/1380
0: 0/1221/1380
0: 0/1222/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['veda' 2357 '004110 VEDA CALHA 280 GRS' ... 2.910470962524414
  -1.2546608448028564 'veda0']
 ['veda' 4468 'VEDA ROSCA 5MTS' ... 16.897165298461914 0.7123163342475891
  'veda1']
 ['veda' 6555 'Veda Calha 400 GRS' ... 2.873619318008423
  -1.005284070968628 'veda0']
 ...
 ['veda' 1481146 'VEDA CALHA 285 GR' ... 2.787156343460083
  -1.2277032136917114 'veda0']
 ['veda' 1489221 'VEDA ROSCA' ... 16.91891098022461 0.8491854071617126
  'veda1']
 ['veda' 1489747 'VEDA CALHA ALUMINIO 280G AMAZONAS' ... 2.68467378616333
  -1.281261682510376 'veda0']]
0: 0/1223/1380
0: 0/1224/1380
0: 0/1225/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['estampado' 30942 'PLÁSTICO ES

  9.108985900878906 16.170045852661133 'estampado-1']]
0: 0/1226/1380
0: 0/1227/1380
0: 0/1228/1380
0: 0/1229/1380
0: 0/1230/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['tatame' 30537 'TATAME EM EVA 1X1,20 CM DE' ... -9.523513793945312
  -7.402149200439453 'tatame-1']
 ['tatame' 41412 'TATAME INFANTIL COLORIDO 50X50' ... -10.446117401123047
  -7.412288665771484 'tatame-1']
 ['tatame' 163411
  'TATAME DE EVA Etileno vinil acetato fabricado com matéria prima especial para tatame superfície texturizada e siliconada tamanho: 15 mm dimensões: 100.0 cm x 100.0 cm x 1,5 cm   c x l x a . Peso: 1.5 Kg'
  ... -9.777599334716797 -6.812634468078613 'tatame-1']
 ...
 ['tatame' 1460859
  'TATAME DE ENCAIXE, MÉDIA PROJEÇÃO, MEDIDAS 1 X 1 M, ESPESSURA 40 MM, COR AZUL EM EVA EMBORRACHADO.'
  ... -9.048450469970703 -7.014768600463867 'tatame-1']
 ['tatame' 1476886
  'TATAME EM LONA VINIL  ANTIDERRAPANTE IMPERMEAVE

  3.2983853816986084 'prolactina-1']]
0: 0/1232/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['deltametrina' 55011 'DELTAMETRINA 0,02 % LOCAO'
  'deltametrina 0 2 locao' 14.025859832763672 -3.9757816791534424
  10.127604484558105 -3.4249751567840576 -13.136153221130371
  8.938143730163574 -4.32784366607666 7.454366683959961 5.101738452911377
  3.341935396194458 4.941306114196777 12.075590133666992
  11.078267097473145 0.9414127469062805 12.597394943237305
  'deltametrina-1']
 ['deltametrina' 64951 'DELTAMETRINA SHAMPOO 0,2MG/ML'
  'deltametrina shampoo 0 2 mg ml' -2.5046567916870117 2.843289375305176
  11.38552474975586 0.8659166693687439 5.892091274261475
  4.450117111206055 4.893679618835449 3.6712324619293213
  1.894673466682434 11.061408042907715 -1.1135927438735962
  11.766992568969727 6.474991321563721 1.3317897319793701
  -2.595076084136963 'deltametrina-1']
 ['deltametrina' 103620
  'Deltam

  'deltametrina-1']]
0: 0/1233/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['medalha' 10349 'MEDALHA - 200070042' ... 7.998773097991943
  10.900108337402344 'medalha0']
 ['medalha' 17361 'MEDALHA DE BRONZE  DE 6CM DE DIAMETROS E' ...
  8.15003776550293 10.682294845581055 'medalha0']
 ['medalha' 17536 'MEDALHA NO COR DOURADA' ... 8.0687255859375
  11.198003768920898 'medalha0']
 ...
 ['medalha' 1457467 'Medalha' ... 7.790523529052734 11.006830215454102
  'medalha0']
 ['medalha' 1459363
  'MEDALHAS SIMPLES MEDIA COM LOGOMARCA, MEDALHAS DE LIGA METALICA PINTADA EM FORMATO REDONDO DE 30MM A 45MM DE DIAMENTRO, COM DETALHES EM ALTO RELEVO OU ASDESIVO COM DIZERES DO EVENTO E LOGOMARCA, VERSO COM ADESIVO E FITA.'
  ... 5.64731502532959 9.937491416931152 'medalha1']
 ['medalha' 1472156
  'MEDALHA METAL EM ALTO RELEVO, 49 MM COM FITA, HONRA AO MÉRITO, BRONZEADA'
  ... 5.639486789703369 10.01337718963623 'me

failed. This is likely due to too small an eigengap. Consider
adding some noise or jitter to your data.

Falling back to random initialisation!


UMAP employed
Clustering by HDBSCAN with l2 metric.
[['artrolive' 67225 'ARTROLIVE 1,2+1,5   ( SACHE)'
  'artrolive 1 2 5 sache' 7.241791248321533 5.582345962524414
  -5.06388521194458 5.706790447235107 2.433912754058838 5.843802452087402
  5.954897880554199 2.379512310028076 4.7479376792907715
  2.5947628021240234 1.9886234998703003 7.063319683074951
  5.202756881713867 4.622992038726807 9.875839233398438 'artrolive-1']
 ['artrolive' 71686 'ARTROLIVE' 'artrolive' 7.669827461242676
  5.842057228088379 -4.421393394470215 5.563485145568848
  2.561358690261841 5.313807010650635 5.375694274902344
  2.0607693195343018 4.539839744567871 3.156459331512451
  1.862274408340454 7.435418605804443 4.7567243576049805 4.08780574798584
  10.233284950256348 'artrolive-1']
 ['artrolive' 237754 'ARTROLIVE 30 CPR' 'artrolive 30 cpr'
  7.656322479248047 5.113775253295898 -4.3475117683410645
  5.442950248718262 2.45941162109375 5.659106731414795 5.9104413986206055
  2.1553988456726074 4.2945146560668945 3.

  5.434326648712158 3.9383795261383057 10.089322090148926 'artrolive-1']]
0: 0/1254/1380
0: 0/1255/1380
0: 0/1256/1380
0: 0/1257/1380
0: 0/1258/1380
0: 0/1259/1380
0: 0/1260/1380
0: 0/1261/1380
0: 0/1262/1380
0: 0/1263/1380
0: 0/1264/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['coifa' 35402 'COIFA DA RODA PALIO' ... -1.5124694108963013
  10.782546043395996 'coifa1']
 ['coifa' 36781 'COIFA HOMOCINETICA INTERNO Sandero placa OQM - 8580.'
  ... 0.18629465997219086 7.005219459533691 'coifa2']
 ['coifa' 37137 'COIFA DO AMORTECEDOR @@@@@' ... 1.1035664081573486
  -2.5946619510650635 'coifa0']
 ...
 ['coifa' 1474729 'COIFA INTERNA HOMOCINETICA CAIXA CAMBIO FIAT UNO 2005'
  ... -2.363468885421753 12.1597900390625 'coifa1']
 ['coifa' 1478475 'COIFA DA RODA GOL.' ... -1.6086437702178955
  10.79810619354248 'coifa1']
 ['coifa' 1479305 'COIFA LADO CAMBIO SAVEIRO 2014' ...
  -0.21971091628074646 7.48674964904

  -4.245765686035156 'meperidina-1']]
0: 0/1274/1380
0: 0/1275/1380
0: 0/1276/1380
0: 0/1277/1380
0: 0/1278/1380
0: 0/1279/1380
0: 0/1280/1380
0: 0/1281/1380
0: 0/1282/1380
0: 0/1283/1380
0: 0/1284/1380
0: 0/1285/1380
0: 0/1286/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['estribo' 89982 'ESTRIBO 17X17' ... -1.5575956106185913
  6.905541896820068 'estribo-1']
 ['estribo' 90362 'ESTRIBO 17 X 17 X 17' ... -1.4330140352249146
  6.7722392082214355 'estribo-1']
 ['estribo' 96846
  'ESTRIBO 7 X 17, De aço 4.2 mm , caixa com 200 unidades.' ...
  -1.1989562511444092 6.998174667358398 'estribo-1']
 ...
 ['estribo' 1434381 'ESTRIBO VERG. 4.2MM 17 x 17' ... -1.213945746421814
  6.935492038726807 'estribo-1']
 ['estribo' 1444182 'ESTRIBO DE FERRO 7X13 3METROS' ...
  -1.2416719198226929 6.7441935539245605 'estribo-1']
 ['estribo' 1463245
  'Estribo vergalhão 4,2mm 7x17 CA-50 nervurado   CX c/200 unidades.' ...

  -0.8675082921981812 'diacereina-1']]
0: 0/1301/1380
0: 0/1302/1380
0: 0/1303/1380
0: 0/1304/1380
0: 0/1305/1380
0: 0/1306/1380
0: 0/1307/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['cafeina' 59871
  'Cafeina 30 mg + Carisoprol 125 mg + Diclofenaco 50 mg + Paracetamol 300 mg, (comprimidos)'
  'cafeina 30 mg carisoprodol 125 diclofenaco 50 paracetamol 300 comprimido'
  2.613330125808716 2.269742727279663 2.0868613719940186 8.57835578918457
  7.427464008331299 -1.8673685789108276 6.522615909576416
  6.661154270172119 8.486543655395508 0.605304479598999
  11.476841926574707 6.927743911743164 4.283863067626953
  17.388717651367188 2.890990734100342 'cafeina-1']
 ['cafeina' 70174 'CAFEINA+CARISOP+DICLOF.SODIO+'
  'cafeina arison diclo sodio' 3.981304407119751 2.198744535446167
  1.9101054668426514 8.702224731445312 7.966429233551025
  -1.79916250705719 7.23455810546875 6.725832462310791 8.27030181884

  16.873390197753906 3.067030906677246 'cafeina-1']]
0: 0/1308/1380
0: 0/1309/1380
0: 0/1310/1380
0: 0/1311/1380
0: 0/1312/1380
0: 0/1313/1380
0: 0/1314/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['tinidazol' 19544 'TINIDAZOL+NITRATO DE MICONAZOL' ...
  7.740622520446777 5.737598419189453 'tinidazol1']
 ['tinidazol' 23783 'TINIDAZOL  500MG COMP.' ... 4.339291572570801
  7.907022476196289 'tinidazol0']
 ['tinidazol' 25211 'TINIDAZOL 500 mg COMPRIMIDO' ... 4.665114402770996
  7.684550762176514 'tinidazol0']
 ...
 ['tinidazol' 1377031 'TINIDAZOL 500 MG' ... 4.518737316131592
  7.692694187164307 'tinidazol0']
 ['tinidazol' 1463481 'TINIDAZOL + NITRATO DE MICONA' ...
  7.643012046813965 5.994508266448975 'tinidazol1']
 ['tinidazol' 1479911 'TINIDAZOL+MICONAZOL 40GR POMADA' ...
  7.009921550750732 5.724215030670166 'tinidazol1']]
0: 0/1315/1380
0: 0/1316/1380
0: 0/1317/1380
0: 0/1318/1380
0: 0/1319/138

  2.8603103160858154 14.763425827026367 13.42354965209961 'cabide-1']]
0: 0/1321/1380
0: 0/1322/1380
0: 0/1323/1380
0: 0/1324/1380
0: 0/1325/1380
0: 0/1326/1380
0: 0/1327/1380
0: 0/1328/1380
BOW composed only by nouns and medicines/medical terms of the descriptions!
UMAP employed
Clustering by HDBSCAN with l2 metric.
[['paramonoclorofenol' 1263
  'Paramonoclorofenol, canforado, para desinfecção de sistema de canais radiculares. Frasco com 20 ml.'
  ... 5.707361698150635 3.887085199356079 'paramonoclorofenol2']
 ['paramonoclorofenol' 4305 'Paramonoclorofenol' ... 9.672825813293457
  6.265803337097168 'paramonoclorofenol0']
 ['paramonoclorofenol' 6305 'PARAMONOCLOROFENOL 20 ML' ...
  9.733607292175293 6.275102138519287 'paramonoclorofenol0']
 ...
 ['paramonoclorofenol' 1483542 'Paramonoclorofenol, Canforado 20 ML' ...
  -0.6273415684700012 7.902161121368408 'paramonoclorofenol1']
 ['paramonoclorofenol' 1487637 'Paramonoclorofenol, frasco 20ml' ...
  5.911252498626709 4.120553970336914 'p

  -2.625730037689209 -1.3806090354919434 'anilhar-1']]
0: 0/1380/1380


Process Process-12:
Traceback (most recent call last):
  File "/dados01/workspace/ufmg.f01dcc/buildout.python/parts/opt/lib/python3.7/multiprocessing/managers.py", line 811, in _callmethod
    conn = self._tls.connection
AttributeError: 'ForkAwareLocal' object has no attribute 'connection'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/dados01/workspace/ufmg.f01dcc/buildout.python/parts/opt/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/dados01/workspace/ufmg.f01dcc/buildout.python/parts/opt/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-17-ce7ef4e96b05>", line 429, in cluster_on_first_token_groups_bow
    Return_Repres_Comp[it_thread] = df_results
  File "<string>", line 2, in __setitem__
  File "/dados01/workspace/ufmg.f01dcc/buildout.python/parts/opt/lib/python3.7/multiprocessing/managers.py", 

In [None]:
def main(saving_dict, cluster_option, number_threads, tokens, dim_red):


    now = datetime.now()
    print("Start Time = " + str(now))
    
    manager_results = multiprocessing.Manager()
    Return_Dict = manager_results.dict()

    manager_repres = multiprocessing.Manager()
    Return_Repres = manager_repres.dict()    

    Return_FT_Repres = 
    jobs = []
    n_threads = number_threads

    #It loads the medical terms (medicines, drugs, etc):
    medicines = get_tokens_set('../dados/palavras/medications.txt')
    #It loads the canonical forms and their classes
    canonical_form, word_class = get_canonical_words()
    print("Read Canonical terms.")
        
    #It loads the items from the list:
    itemlist = ItemList()
    itemlist.load_items_from_file('../dados/items_preprocessed_sp0_sc1.zip', original=True)
                
    print('Read data preprocessed')    
    #It gets the first tokens of each description and groups
    #based on this approach:
    first_token_groups = itemlist.get_first_token_groups()
    group_len = len(first_token_groups)
    first_token_groups_new = {}

    #It shuffles the itens based on their keys:
    keys_ft = list(first_token_groups.keys())
    random.shuffle(keys_ft)
    random.shuffle(keys_ft)
    
    #It fills another dictionary with the shuffled keys:
    for k in keys_ft:
        first_token_groups_new[k] = first_token_groups[k]
    
    #It defines the ranges (of the groups) the processes will work on:
    thread_ranges = get_ranges(group_len, n_threads)
    print('Read ranges')
    print(thread_ranges) 
    
    #It creates the processes (balanced by shuffling the keys of the dictionary:
    for i in range(n_threads):
        p = multiprocessing.Process(target=cluster_on_first_token_groups_bow, 
                                    args=(first_token_groups_new, itemlist, i, thread_ranges[0][i], 
                                          thread_ranges[1][i], medicines, canonical_form, word_class, 
                                          cluster_option, tokens, dim_red, Return_dict, Return_Repres))
        jobs.append(p)
        p.start()
        
    #It joins the results
    for i in range(n_threads):   
        jobs[i].join()
        
    dictionary_clusters = {}   
    #It joins the results
    for k in Return_dict:
        print('key: '+str(k))
        dictionary_clusters.update(Return_dict[k])

    #It saves the dictionary in a file, which is possible to reconstruct the final dictionary:
    a_file = open(saving_dict, "wb")
    pickle.dump(dictionary_clusters, a_file)
    a_file.close()
    
    now = datetime.now()
    print("End Time = " + str(now))    