# Arquivo para teste de clusters

In [20]:
#imports
import sys
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
sys.path.append('./clusters')

from clusters.clusters import *
from clusters.utils_clusters import *

# Testando os algoritmos de clustering sem redução de dimensionalidade

In [None]:
# parametros de teste
# dataset = 'Beauty'
# dataset = 'ML-1M'
# dataset = 'Steam'
# dataset = 'HomeKitchen'
# dataset = 'PetSupplies'
dataset = 'Tools'


embedding_path = f'../data_preprocessing/{dataset}/{dataset}-similarity-values-thenlper_gte-large.pt'
# embedding_path = f'../data_preprocessing/{dataset}/Pet-similarity-values-thenlper_gte-large.pt'
output_path = f'clusters/{dataset}/embeddings_completos_metricastrocadas'

os.makedirs(output_path, exist_ok=True)

embeddings = load_embeddings(embedding_path) 
print(f"Embeddings originais shape: {embeddings.shape}")

Embeddings originais shape: torch.Size([3416, 1000])


In [3]:
#teste dos clusters com algoritmos que usam k

# methods = [run_kmeans, run_agnes, run_fasterpam, run_fastermsc, run_dynmsc]
methods = [run_fasterpam, run_fastermsc, run_dynmsc]
# k_values = [6, 20, 50, 100, 200, 400, 750, 1000]
k_values = [2,6,10,20,30,50,70,100,150,200,300,400,500,750,1000]


run_methods_with_k(
    embeddings=embeddings, 
    methods = methods, 
    k_values = k_values, 
    output_path = output_path, 
    dataset_name=dataset
)

Executando run_fasterpam
 - k = 2
Clusters salvos em clusters/ML-1M/embeddings_completos_metricastrocadas\run_fasterpam\ML-1M_cluster_run_fasterpam_k2.csv
 - k = 6
Clusters salvos em clusters/ML-1M/embeddings_completos_metricastrocadas\run_fasterpam\ML-1M_cluster_run_fasterpam_k6.csv
 - k = 10
Clusters salvos em clusters/ML-1M/embeddings_completos_metricastrocadas\run_fasterpam\ML-1M_cluster_run_fasterpam_k10.csv
 - k = 20
Clusters salvos em clusters/ML-1M/embeddings_completos_metricastrocadas\run_fasterpam\ML-1M_cluster_run_fasterpam_k20.csv
 - k = 30
Clusters salvos em clusters/ML-1M/embeddings_completos_metricastrocadas\run_fasterpam\ML-1M_cluster_run_fasterpam_k30.csv
 - k = 50
Clusters salvos em clusters/ML-1M/embeddings_completos_metricastrocadas\run_fasterpam\ML-1M_cluster_run_fasterpam_k50.csv
 - k = 70
Clusters salvos em clusters/ML-1M/embeddings_completos_metricastrocadas\run_fasterpam\ML-1M_cluster_run_fasterpam_k70.csv
 - k = 100
Clusters salvos em clusters/ML-1M/embeddings

In [None]:
# teste spectral clustering
k_values = [10, 50, 100, 300,500]
n_values = [20, 50, 75, 100, 150]                              

run_spectral_combinations(
    embeddings=embeddings,
    k_values=k_values,
    neighbor_values=n_values,
    output_path=output_path,
    dataset_name=dataset
)


In [4]:
#teste dbscan
eps_values = [0.05, 0.1, 0.2, 0.5, 1.0]          
min_samples_values = [10, 20, 30, 50, 100, 200]

run_dbscan_combinations(
    embeddings=embeddings,
    eps_values=eps_values,
    min_samples_values=min_samples_values,
    output_path=output_path,
    dataset_name=dataset
)

Executando run_dbscan...
DBSCAN - eps = 0.05, min_samples = 10
DBSCAN (eps=0.05, min_samples=10) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.05, min_samples = 20
DBSCAN (eps=0.05, min_samples=20) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.05, min_samples = 30
DBSCAN (eps=0.05, min_samples=30) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.05, min_samples = 50
DBSCAN (eps=0.05, min_samples=50) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.05, min_samples = 100
DBSCAN (eps=0.05, min_samples=100) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.05, min_samples = 200
DBSCAN (eps=0.05, min_samples=200) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.1, min_samples = 10
DBSCAN (eps=0.1, min_samples=10) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.1, min_samples = 20
DBSCAN (eps=0.1, min_samples=20) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.1, min_samples = 30
DBSCAN (eps=0.1, min_samples=30) gerou 1 cluster o

In [5]:
#teste hdbscan
# min_clusters_sizes = [20, 30, 100, 200]
# min_samples_values = [10, 20, 30, 50, 100] 
min_clusters_sizes = [30, 200] 
min_samples_values = [20, 100] 
run_hdbscan_combinations(
    embeddings=embeddings,
    min_cluster_sizes=min_clusters_sizes,
    min_samples = min_samples_values,
    output_path=output_path,
    dataset_name=dataset
)

Executando run_hdbscan...
HDBSCAN - min_cluster_size = 30, min_samples = 20
Erro no HDBSCAN (min_cluster_size=30 e min_samples =20): Unrecognized metric 'cosine'
HDBSCAN - min_cluster_size = 30, min_samples = 100
Erro no HDBSCAN (min_cluster_size=30 e min_samples =100): Unrecognized metric 'cosine'
HDBSCAN - min_cluster_size = 200, min_samples = 20
Erro no HDBSCAN (min_cluster_size=200 e min_samples =20): Unrecognized metric 'cosine'
HDBSCAN - min_cluster_size = 200, min_samples = 100
Erro no HDBSCAN (min_cluster_size=200 e min_samples =100): Unrecognized metric 'cosine'
Clusterização concluída e resultados salvos




# Testando com redução de dimensionalidade

In [17]:
# parametros de teste
dataset = 'Beauty'
# dataset = 'ML-1M'
# dataset = 'Steam'
# dataset = 'PetSupplies'
# dataset = 'Tools'


embedding_path = f'../data_preprocessing/{dataset}/{dataset}-similarity-values-thenlper_gte-large.pt'
# embedding_path = f'../data_preprocessing/{dataset}/Pet-similarity-values-thenlper_gte-large.pt'
embeddings = load_embeddings(embedding_path)
print(f"Embeddings originais shape: {embeddings.shape}")

# methods_reducing = ['pca','umap']
methods_reducing = ['umap']
# n_components = [8,32,64,128,256,512]
n_components = [16,4]


Embeddings originais shape: torch.Size([57226, 1000])


In [18]:
# testando metodos com k usando redução de dimensionalidade
# methods_clustering = [run_kmeans, run_agnes, run_fasterpam, run_fastermsc]
methods_clustering = [run_kmeans, run_agnes, run_fasterpam]
k_values = [2, 5, 10, 30, 50, 100, 150, 200, 400, 500, 750, 1000, 1250, 1500, 1750, 2000, 2250, 2500, 2750,3000]

for method_red in methods_reducing:
    for n in n_components:
        print(f'\n Embeddings reduzidos com {method_red} e n_components {n}')
        reduced_embeddings = dimensionality_reduction(embeddings=embeddings, method=method_red, n_components=n)
        if not isinstance(reduced_embeddings, torch.Tensor):
            reduced_embeddings = torch.tensor(reduced_embeddings, dtype=torch.float32)
        output_path = f'clusters/{dataset}/{method_red}/n_components{n}'
        os.makedirs(output_path, exist_ok=True)
        
        print('Metodos com numero de clusters (k) como parametro ...')

        run_methods_with_k(
            embeddings = reduced_embeddings, 
            methods = methods_clustering, 
            k_values = k_values, 
            output_path = output_path, 
            dataset_name=dataset
        )  
                        
print("\nTodos os testes para métodos com k fixo concluídos")


 Embeddings reduzidos com umap e n_components 16
Aplicando UMAP com n_components = 16...


  warn(


Dimensionalidade reduzida de 1000 para 16
Metodos com numero de clusters (k) como parametro ...
Executando run_kmeans
 - k = 2
Clusters salvos em clusters/Beauty/umap/n_components16\run_kmeans\Beauty_cluster_run_kmeans_k2.csv
 - k = 5
Clusters salvos em clusters/Beauty/umap/n_components16\run_kmeans\Beauty_cluster_run_kmeans_k5.csv
 - k = 10
Clusters salvos em clusters/Beauty/umap/n_components16\run_kmeans\Beauty_cluster_run_kmeans_k10.csv
 - k = 30
Clusters salvos em clusters/Beauty/umap/n_components16\run_kmeans\Beauty_cluster_run_kmeans_k30.csv
 - k = 50
Clusters salvos em clusters/Beauty/umap/n_components16\run_kmeans\Beauty_cluster_run_kmeans_k50.csv
 - k = 100
Clusters salvos em clusters/Beauty/umap/n_components16\run_kmeans\Beauty_cluster_run_kmeans_k100.csv
 - k = 150
Clusters salvos em clusters/Beauty/umap/n_components16\run_kmeans\Beauty_cluster_run_kmeans_k150.csv
 - k = 200
Clusters salvos em clusters/Beauty/umap/n_components16\run_kmeans\Beauty_cluster_run_kmeans_k200.csv


  warn(


Dimensionalidade reduzida de 1000 para 4
Metodos com numero de clusters (k) como parametro ...
Executando run_kmeans
 - k = 2
Clusters salvos em clusters/Beauty/umap/n_components4\run_kmeans\Beauty_cluster_run_kmeans_k2.csv
 - k = 5
Clusters salvos em clusters/Beauty/umap/n_components4\run_kmeans\Beauty_cluster_run_kmeans_k5.csv
 - k = 10
Clusters salvos em clusters/Beauty/umap/n_components4\run_kmeans\Beauty_cluster_run_kmeans_k10.csv
 - k = 30
Clusters salvos em clusters/Beauty/umap/n_components4\run_kmeans\Beauty_cluster_run_kmeans_k30.csv
 - k = 50
Clusters salvos em clusters/Beauty/umap/n_components4\run_kmeans\Beauty_cluster_run_kmeans_k50.csv
 - k = 100
Clusters salvos em clusters/Beauty/umap/n_components4\run_kmeans\Beauty_cluster_run_kmeans_k100.csv
 - k = 150
Clusters salvos em clusters/Beauty/umap/n_components4\run_kmeans\Beauty_cluster_run_kmeans_k150.csv
 - k = 200
Clusters salvos em clusters/Beauty/umap/n_components4\run_kmeans\Beauty_cluster_run_kmeans_k200.csv
 - k = 40

In [21]:
dataset = 'ML-1M'

embedding_path = f'../data_preprocessing/{dataset}/{dataset}-similarity-values-thenlper_gte-large.pt'
embeddings = load_embeddings(embedding_path)

methods_reducing = ['umap']
n_components = [32,64,128,256,512]
methods_clustering = [run_kmeans, run_fasterpam, run_agnes]
k_values = [2, 5, 10, 30, 50, 100, 150, 200, 400, 500, 750, 1000, 1250, 1500, 2000, 2500,3000]

for method_red in methods_reducing:
    for n in n_components:
        print(f'\n Embeddings reduzidos com {method_red} e n_components {n}')
        reduced_embeddings = dimensionality_reduction(embeddings=embeddings, method=method_red, n_components=n)
        if not isinstance(reduced_embeddings, torch.Tensor):
            reduced_embeddings = torch.tensor(reduced_embeddings, dtype=torch.float32)
        output_path = f'clusters/{dataset}/{method_red}/n_components{n}'
        os.makedirs(output_path, exist_ok=True)
        
        print('Metodos com numero de clusters (k) como parametro ...')

        run_methods_with_k(
            embeddings = reduced_embeddings, 
            methods = methods_clustering, 
            k_values = k_values, 
            output_path = output_path, 
            dataset_name=dataset
        )  
                        
print("\nTodos os testes para métodos com k fixo concluídos")

dataset = 'Beauty'

embedding_path = f'../data_preprocessing/{dataset}/{dataset}-similarity-values-thenlper_gte-large.pt'
embeddings = load_embeddings(embedding_path)

methods_reducing = ['umap']
n_components = [32,64,128,256,512]
methods_clustering = [run_kmeans, run_agnes, run_fasterpam]
k_values = [2, 5, 10, 30, 50, 100, 150, 200, 400, 500, 750, 1000, 1250, 1500, 2000, 2500,3000]

for method_red in methods_reducing:
    for n in n_components:
        print(f'\n Embeddings reduzidos com {method_red} e n_components {n}')
        reduced_embeddings = dimensionality_reduction(embeddings=embeddings, method=method_red, n_components=n)
        if not isinstance(reduced_embeddings, torch.Tensor):
            reduced_embeddings = torch.tensor(reduced_embeddings, dtype=torch.float32)
        output_path = f'clusters/{dataset}/{method_red}/n_components{n}'
        os.makedirs(output_path, exist_ok=True)
        
        print('Metodos com numero de clusters (k) como parametro ...')

        run_methods_with_k(
            embeddings = reduced_embeddings, 
            methods = methods_clustering, 
            k_values = k_values, 
            output_path = output_path, 
            dataset_name=dataset
        )  
                        
print("\nTodos os testes para métodos com k fixo concluídos")



 Embeddings reduzidos com umap e n_components 32
Aplicando UMAP com n_components = 32...


  warn(


Dimensionalidade reduzida de 1000 para 32
Metodos com numero de clusters (k) como parametro ...
Executando run_kmeans
 - k = 2
Clusters salvos em clusters/ML-1M/umap/n_components32\run_kmeans\ML-1M_cluster_run_kmeans_k2.csv
 - k = 5
Clusters salvos em clusters/ML-1M/umap/n_components32\run_kmeans\ML-1M_cluster_run_kmeans_k5.csv
 - k = 10
Clusters salvos em clusters/ML-1M/umap/n_components32\run_kmeans\ML-1M_cluster_run_kmeans_k10.csv
 - k = 30
Clusters salvos em clusters/ML-1M/umap/n_components32\run_kmeans\ML-1M_cluster_run_kmeans_k30.csv
 - k = 50
Clusters salvos em clusters/ML-1M/umap/n_components32\run_kmeans\ML-1M_cluster_run_kmeans_k50.csv
 - k = 100
Clusters salvos em clusters/ML-1M/umap/n_components32\run_kmeans\ML-1M_cluster_run_kmeans_k100.csv
 - k = 150
Clusters salvos em clusters/ML-1M/umap/n_components32\run_kmeans\ML-1M_cluster_run_kmeans_k150.csv
 - k = 200
Clusters salvos em clusters/ML-1M/umap/n_components32\run_kmeans\ML-1M_cluster_run_kmeans_k200.csv
 - k = 400
Clust

  warn(


Dimensionalidade reduzida de 1000 para 64
Metodos com numero de clusters (k) como parametro ...
Executando run_kmeans
 - k = 2
Clusters salvos em clusters/ML-1M/umap/n_components64\run_kmeans\ML-1M_cluster_run_kmeans_k2.csv
 - k = 5
Clusters salvos em clusters/ML-1M/umap/n_components64\run_kmeans\ML-1M_cluster_run_kmeans_k5.csv
 - k = 10
Clusters salvos em clusters/ML-1M/umap/n_components64\run_kmeans\ML-1M_cluster_run_kmeans_k10.csv
 - k = 30
Clusters salvos em clusters/ML-1M/umap/n_components64\run_kmeans\ML-1M_cluster_run_kmeans_k30.csv
 - k = 50
Clusters salvos em clusters/ML-1M/umap/n_components64\run_kmeans\ML-1M_cluster_run_kmeans_k50.csv
 - k = 100
Clusters salvos em clusters/ML-1M/umap/n_components64\run_kmeans\ML-1M_cluster_run_kmeans_k100.csv
 - k = 150
Clusters salvos em clusters/ML-1M/umap/n_components64\run_kmeans\ML-1M_cluster_run_kmeans_k150.csv
 - k = 200
Clusters salvos em clusters/ML-1M/umap/n_components64\run_kmeans\ML-1M_cluster_run_kmeans_k200.csv
 - k = 400
Clust

  warn(


Dimensionalidade reduzida de 1000 para 128
Metodos com numero de clusters (k) como parametro ...
Executando run_kmeans
 - k = 2
Clusters salvos em clusters/ML-1M/umap/n_components128\run_kmeans\ML-1M_cluster_run_kmeans_k2.csv
 - k = 5
Clusters salvos em clusters/ML-1M/umap/n_components128\run_kmeans\ML-1M_cluster_run_kmeans_k5.csv
 - k = 10
Clusters salvos em clusters/ML-1M/umap/n_components128\run_kmeans\ML-1M_cluster_run_kmeans_k10.csv
 - k = 30
Clusters salvos em clusters/ML-1M/umap/n_components128\run_kmeans\ML-1M_cluster_run_kmeans_k30.csv
 - k = 50
Clusters salvos em clusters/ML-1M/umap/n_components128\run_kmeans\ML-1M_cluster_run_kmeans_k50.csv
 - k = 100
Clusters salvos em clusters/ML-1M/umap/n_components128\run_kmeans\ML-1M_cluster_run_kmeans_k100.csv
 - k = 150
Clusters salvos em clusters/ML-1M/umap/n_components128\run_kmeans\ML-1M_cluster_run_kmeans_k150.csv
 - k = 200
Clusters salvos em clusters/ML-1M/umap/n_components128\run_kmeans\ML-1M_cluster_run_kmeans_k200.csv
 - k = 

  warn(


Dimensionalidade reduzida de 1000 para 256
Metodos com numero de clusters (k) como parametro ...
Executando run_kmeans
 - k = 2
Clusters salvos em clusters/ML-1M/umap/n_components256\run_kmeans\ML-1M_cluster_run_kmeans_k2.csv
 - k = 5
Clusters salvos em clusters/ML-1M/umap/n_components256\run_kmeans\ML-1M_cluster_run_kmeans_k5.csv
 - k = 10
Clusters salvos em clusters/ML-1M/umap/n_components256\run_kmeans\ML-1M_cluster_run_kmeans_k10.csv
 - k = 30
Clusters salvos em clusters/ML-1M/umap/n_components256\run_kmeans\ML-1M_cluster_run_kmeans_k30.csv
 - k = 50
Clusters salvos em clusters/ML-1M/umap/n_components256\run_kmeans\ML-1M_cluster_run_kmeans_k50.csv
 - k = 100
Clusters salvos em clusters/ML-1M/umap/n_components256\run_kmeans\ML-1M_cluster_run_kmeans_k100.csv
 - k = 150
Clusters salvos em clusters/ML-1M/umap/n_components256\run_kmeans\ML-1M_cluster_run_kmeans_k150.csv
 - k = 200
Clusters salvos em clusters/ML-1M/umap/n_components256\run_kmeans\ML-1M_cluster_run_kmeans_k200.csv
 - k = 

  warn(


Dimensionalidade reduzida de 1000 para 512
Metodos com numero de clusters (k) como parametro ...
Executando run_kmeans
 - k = 2
Clusters salvos em clusters/ML-1M/umap/n_components512\run_kmeans\ML-1M_cluster_run_kmeans_k2.csv
 - k = 5
Clusters salvos em clusters/ML-1M/umap/n_components512\run_kmeans\ML-1M_cluster_run_kmeans_k5.csv
 - k = 10
Clusters salvos em clusters/ML-1M/umap/n_components512\run_kmeans\ML-1M_cluster_run_kmeans_k10.csv
 - k = 30
Clusters salvos em clusters/ML-1M/umap/n_components512\run_kmeans\ML-1M_cluster_run_kmeans_k30.csv
 - k = 50
Clusters salvos em clusters/ML-1M/umap/n_components512\run_kmeans\ML-1M_cluster_run_kmeans_k50.csv
 - k = 100
Clusters salvos em clusters/ML-1M/umap/n_components512\run_kmeans\ML-1M_cluster_run_kmeans_k100.csv
 - k = 150
Clusters salvos em clusters/ML-1M/umap/n_components512\run_kmeans\ML-1M_cluster_run_kmeans_k150.csv
 - k = 200
Clusters salvos em clusters/ML-1M/umap/n_components512\run_kmeans\ML-1M_cluster_run_kmeans_k200.csv
 - k = 

  warn(


Dimensionalidade reduzida de 1000 para 32
Metodos com numero de clusters (k) como parametro ...
Executando run_kmeans
 - k = 2
Clusters salvos em clusters/Beauty/umap/n_components32\run_kmeans\Beauty_cluster_run_kmeans_k2.csv
 - k = 5
Clusters salvos em clusters/Beauty/umap/n_components32\run_kmeans\Beauty_cluster_run_kmeans_k5.csv
 - k = 10
Clusters salvos em clusters/Beauty/umap/n_components32\run_kmeans\Beauty_cluster_run_kmeans_k10.csv
 - k = 30
Clusters salvos em clusters/Beauty/umap/n_components32\run_kmeans\Beauty_cluster_run_kmeans_k30.csv
 - k = 50
Clusters salvos em clusters/Beauty/umap/n_components32\run_kmeans\Beauty_cluster_run_kmeans_k50.csv
 - k = 100
Clusters salvos em clusters/Beauty/umap/n_components32\run_kmeans\Beauty_cluster_run_kmeans_k100.csv
 - k = 150
Clusters salvos em clusters/Beauty/umap/n_components32\run_kmeans\Beauty_cluster_run_kmeans_k150.csv
 - k = 200
Clusters salvos em clusters/Beauty/umap/n_components32\run_kmeans\Beauty_cluster_run_kmeans_k200.csv


  warn(


Dimensionalidade reduzida de 1000 para 64
Metodos com numero de clusters (k) como parametro ...
Executando run_kmeans
 - k = 2
Clusters salvos em clusters/Beauty/umap/n_components64\run_kmeans\Beauty_cluster_run_kmeans_k2.csv
 - k = 5
Clusters salvos em clusters/Beauty/umap/n_components64\run_kmeans\Beauty_cluster_run_kmeans_k5.csv
 - k = 10
Clusters salvos em clusters/Beauty/umap/n_components64\run_kmeans\Beauty_cluster_run_kmeans_k10.csv
 - k = 30
Clusters salvos em clusters/Beauty/umap/n_components64\run_kmeans\Beauty_cluster_run_kmeans_k30.csv
 - k = 50
Clusters salvos em clusters/Beauty/umap/n_components64\run_kmeans\Beauty_cluster_run_kmeans_k50.csv
 - k = 100
Clusters salvos em clusters/Beauty/umap/n_components64\run_kmeans\Beauty_cluster_run_kmeans_k100.csv
 - k = 150
Clusters salvos em clusters/Beauty/umap/n_components64\run_kmeans\Beauty_cluster_run_kmeans_k150.csv
 - k = 200
Clusters salvos em clusters/Beauty/umap/n_components64\run_kmeans\Beauty_cluster_run_kmeans_k200.csv


  warn(


Dimensionalidade reduzida de 1000 para 128
Metodos com numero de clusters (k) como parametro ...
Executando run_kmeans
 - k = 2
Clusters salvos em clusters/Beauty/umap/n_components128\run_kmeans\Beauty_cluster_run_kmeans_k2.csv
 - k = 5
Clusters salvos em clusters/Beauty/umap/n_components128\run_kmeans\Beauty_cluster_run_kmeans_k5.csv
 - k = 10
Clusters salvos em clusters/Beauty/umap/n_components128\run_kmeans\Beauty_cluster_run_kmeans_k10.csv
 - k = 30
Clusters salvos em clusters/Beauty/umap/n_components128\run_kmeans\Beauty_cluster_run_kmeans_k30.csv
 - k = 50
Clusters salvos em clusters/Beauty/umap/n_components128\run_kmeans\Beauty_cluster_run_kmeans_k50.csv
 - k = 100
Clusters salvos em clusters/Beauty/umap/n_components128\run_kmeans\Beauty_cluster_run_kmeans_k100.csv
 - k = 150
Clusters salvos em clusters/Beauty/umap/n_components128\run_kmeans\Beauty_cluster_run_kmeans_k150.csv
 - k = 200
Clusters salvos em clusters/Beauty/umap/n_components128\run_kmeans\Beauty_cluster_run_kmeans_

  warn(


Dimensionalidade reduzida de 1000 para 256
Metodos com numero de clusters (k) como parametro ...
Executando run_kmeans
 - k = 2
Clusters salvos em clusters/Beauty/umap/n_components256\run_kmeans\Beauty_cluster_run_kmeans_k2.csv
 - k = 5
Clusters salvos em clusters/Beauty/umap/n_components256\run_kmeans\Beauty_cluster_run_kmeans_k5.csv
 - k = 10
Clusters salvos em clusters/Beauty/umap/n_components256\run_kmeans\Beauty_cluster_run_kmeans_k10.csv
 - k = 30
Clusters salvos em clusters/Beauty/umap/n_components256\run_kmeans\Beauty_cluster_run_kmeans_k30.csv
 - k = 50
Clusters salvos em clusters/Beauty/umap/n_components256\run_kmeans\Beauty_cluster_run_kmeans_k50.csv
 - k = 100
Clusters salvos em clusters/Beauty/umap/n_components256\run_kmeans\Beauty_cluster_run_kmeans_k100.csv
 - k = 150
Clusters salvos em clusters/Beauty/umap/n_components256\run_kmeans\Beauty_cluster_run_kmeans_k150.csv
 - k = 200
Clusters salvos em clusters/Beauty/umap/n_components256\run_kmeans\Beauty_cluster_run_kmeans_

  warn(


Dimensionalidade reduzida de 1000 para 512
Metodos com numero de clusters (k) como parametro ...
Executando run_kmeans
 - k = 2
Clusters salvos em clusters/Beauty/umap/n_components512\run_kmeans\Beauty_cluster_run_kmeans_k2.csv
 - k = 5
Clusters salvos em clusters/Beauty/umap/n_components512\run_kmeans\Beauty_cluster_run_kmeans_k5.csv
 - k = 10
Clusters salvos em clusters/Beauty/umap/n_components512\run_kmeans\Beauty_cluster_run_kmeans_k10.csv
 - k = 30
Clusters salvos em clusters/Beauty/umap/n_components512\run_kmeans\Beauty_cluster_run_kmeans_k30.csv
 - k = 50
Clusters salvos em clusters/Beauty/umap/n_components512\run_kmeans\Beauty_cluster_run_kmeans_k50.csv
 - k = 100
Clusters salvos em clusters/Beauty/umap/n_components512\run_kmeans\Beauty_cluster_run_kmeans_k100.csv
 - k = 150
Clusters salvos em clusters/Beauty/umap/n_components512\run_kmeans\Beauty_cluster_run_kmeans_k150.csv
 - k = 200
Clusters salvos em clusters/Beauty/umap/n_components512\run_kmeans\Beauty_cluster_run_kmeans_

In [13]:
k_values = [10, 20, 50, 100, 300, 450]
n_values = [30, 75, 100, 150, 200]   

print('Testes para Spectral Clustering')
for method_red in methods_reducing:
    for n in n_components:
        print(f'\n Embeddings reduzidos com {method_red} e n_components {n}')
        reduced_embeddings = dimensionality_reduction(embeddings=embeddings, method=method_red, n_components=n)
        if not isinstance(reduced_embeddings, torch.Tensor):
            reduced_embeddings = torch.tensor(reduced_embeddings, dtype=torch.float32)
        output_path = f'clusters/{dataset}/{method_red}/n_components{n}'
        os.makedirs(output_path, exist_ok=True)               

        run_spectral_combinations(
            embeddings=reduced_embeddings,
            k_values=k_values,
            neighbor_values=n_values,
            output_path=output_path,
            dataset_name=dataset
        )

print("\nTodos os testes para Spectral Clustering concluídos")

Testes para Spectral Clustering

 Embeddings reduzidos com umap e n_components 16
Aplicando UMAP com n_components = 16...


  warn(


KeyboardInterrupt: 

In [None]:
#teste dbscan
eps_values = [0.05, 0.1, 0.2, 0.6, 1.0]          
min_samples_values = [10, 20, 50, 100, 200]


print('Testes para DBSCAN Clustering')
for method_red in methods_reducing:
    for n in n_components:
        print(f'\n Embeddings reduzidos com {method_red} e n_components {n}')
        reduced_embeddings = dimensionality_reduction(embeddings=embeddings, method=method_red, n_components=n)
        if not isinstance(reduced_embeddings, torch.Tensor):
            reduced_embeddings = torch.tensor(reduced_embeddings, dtype=torch.float32)
        output_path = f'clusters/{dataset}/{method_red}/n_components{n}'
        os.makedirs(output_path, exist_ok=True)               

        run_dbscan_combinations(
            embeddings=reduced_embeddings,
            eps_values=eps_values,
            min_samples_values=min_samples_values,
            output_path=output_path,
            dataset_name=dataset
        )

print("\nTodos os testes para DBSCAN concluídos")

Testes para DBSCAN Clustering

 Embeddings reduzidos com umap e n_components 8
Aplicando UMAP com n_components = 8...


  warn(


Dimensionalidade reduzida de 1000 para 8
Executando run_dbscan...
DBSCAN - eps = 0.05, min_samples = 10
Clusters salvos em clusters/Tools/umap/n_components8\run_dbscan/Tools_clusters_run_dbscan_eps0.05_min10.csv
DBSCAN - eps = 0.05, min_samples = 20
Clusters salvos em clusters/Tools/umap/n_components8\run_dbscan/Tools_clusters_run_dbscan_eps0.05_min20.csv
DBSCAN - eps = 0.05, min_samples = 50
DBSCAN (eps=0.05, min_samples=50) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.05, min_samples = 100
DBSCAN (eps=0.05, min_samples=100) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.05, min_samples = 200
DBSCAN (eps=0.05, min_samples=200) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.1, min_samples = 10
Clusters salvos em clusters/Tools/umap/n_components8\run_dbscan/Tools_clusters_run_dbscan_eps0.1_min10.csv
DBSCAN - eps = 0.1, min_samples = 20
Clusters salvos em clusters/Tools/umap/n_components8\run_dbscan/Tools_clusters_run_dbscan_eps0.1_min20.csv
DBSCAN - eps = 0.

  warn(


Dimensionalidade reduzida de 1000 para 32
Executando run_dbscan...
DBSCAN - eps = 0.05, min_samples = 10
Clusters salvos em clusters/Tools/umap/n_components32\run_dbscan/Tools_clusters_run_dbscan_eps0.05_min10.csv
DBSCAN - eps = 0.05, min_samples = 20
Clusters salvos em clusters/Tools/umap/n_components32\run_dbscan/Tools_clusters_run_dbscan_eps0.05_min20.csv
DBSCAN - eps = 0.05, min_samples = 50
DBSCAN (eps=0.05, min_samples=50) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.05, min_samples = 100
DBSCAN (eps=0.05, min_samples=100) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.05, min_samples = 200
DBSCAN (eps=0.05, min_samples=200) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.1, min_samples = 10
Clusters salvos em clusters/Tools/umap/n_components32\run_dbscan/Tools_clusters_run_dbscan_eps0.1_min10.csv
DBSCAN - eps = 0.1, min_samples = 20
Clusters salvos em clusters/Tools/umap/n_components32\run_dbscan/Tools_clusters_run_dbscan_eps0.1_min20.csv
DBSCAN - eps

  warn(


Dimensionalidade reduzida de 1000 para 64
Executando run_dbscan...
DBSCAN - eps = 0.05, min_samples = 10
Clusters salvos em clusters/Tools/umap/n_components64\run_dbscan/Tools_clusters_run_dbscan_eps0.05_min10.csv
DBSCAN - eps = 0.05, min_samples = 20
Clusters salvos em clusters/Tools/umap/n_components64\run_dbscan/Tools_clusters_run_dbscan_eps0.05_min20.csv
DBSCAN - eps = 0.05, min_samples = 50
DBSCAN (eps=0.05, min_samples=50) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.05, min_samples = 100
DBSCAN (eps=0.05, min_samples=100) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.05, min_samples = 200
DBSCAN (eps=0.05, min_samples=200) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.1, min_samples = 10
Clusters salvos em clusters/Tools/umap/n_components64\run_dbscan/Tools_clusters_run_dbscan_eps0.1_min10.csv
DBSCAN - eps = 0.1, min_samples = 20
Clusters salvos em clusters/Tools/umap/n_components64\run_dbscan/Tools_clusters_run_dbscan_eps0.1_min20.csv
DBSCAN - eps

  warn(


Dimensionalidade reduzida de 1000 para 128
Executando run_dbscan...
DBSCAN - eps = 0.05, min_samples = 10
Clusters salvos em clusters/Tools/umap/n_components128\run_dbscan/Tools_clusters_run_dbscan_eps0.05_min10.csv
DBSCAN - eps = 0.05, min_samples = 20
Clusters salvos em clusters/Tools/umap/n_components128\run_dbscan/Tools_clusters_run_dbscan_eps0.05_min20.csv
DBSCAN - eps = 0.05, min_samples = 50
DBSCAN (eps=0.05, min_samples=50) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.05, min_samples = 100
DBSCAN (eps=0.05, min_samples=100) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.05, min_samples = 200
DBSCAN (eps=0.05, min_samples=200) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.1, min_samples = 10
Clusters salvos em clusters/Tools/umap/n_components128\run_dbscan/Tools_clusters_run_dbscan_eps0.1_min10.csv
DBSCAN - eps = 0.1, min_samples = 20
Clusters salvos em clusters/Tools/umap/n_components128\run_dbscan/Tools_clusters_run_dbscan_eps0.1_min20.csv
DBSCAN 

  warn(


Dimensionalidade reduzida de 1000 para 256
Executando run_dbscan...
DBSCAN - eps = 0.05, min_samples = 10
Clusters salvos em clusters/Tools/umap/n_components256\run_dbscan/Tools_clusters_run_dbscan_eps0.05_min10.csv
DBSCAN - eps = 0.05, min_samples = 20
Clusters salvos em clusters/Tools/umap/n_components256\run_dbscan/Tools_clusters_run_dbscan_eps0.05_min20.csv
DBSCAN - eps = 0.05, min_samples = 50
DBSCAN (eps=0.05, min_samples=50) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.05, min_samples = 100
DBSCAN (eps=0.05, min_samples=100) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.05, min_samples = 200
DBSCAN (eps=0.05, min_samples=200) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.1, min_samples = 10
Clusters salvos em clusters/Tools/umap/n_components256\run_dbscan/Tools_clusters_run_dbscan_eps0.1_min10.csv
DBSCAN - eps = 0.1, min_samples = 20
Clusters salvos em clusters/Tools/umap/n_components256\run_dbscan/Tools_clusters_run_dbscan_eps0.1_min20.csv
DBSCAN 

  warn(


Dimensionalidade reduzida de 1000 para 512
Executando run_dbscan...
DBSCAN - eps = 0.05, min_samples = 10
Clusters salvos em clusters/Tools/umap/n_components512\run_dbscan/Tools_clusters_run_dbscan_eps0.05_min10.csv
DBSCAN - eps = 0.05, min_samples = 20
Clusters salvos em clusters/Tools/umap/n_components512\run_dbscan/Tools_clusters_run_dbscan_eps0.05_min20.csv
DBSCAN - eps = 0.05, min_samples = 50
DBSCAN (eps=0.05, min_samples=50) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.05, min_samples = 100
DBSCAN (eps=0.05, min_samples=100) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.05, min_samples = 200
DBSCAN (eps=0.05, min_samples=200) gerou 1 cluster ou só ruído, ignorado.
DBSCAN - eps = 0.1, min_samples = 10
Clusters salvos em clusters/Tools/umap/n_components512\run_dbscan/Tools_clusters_run_dbscan_eps0.1_min10.csv
DBSCAN - eps = 0.1, min_samples = 20
Clusters salvos em clusters/Tools/umap/n_components512\run_dbscan/Tools_clusters_run_dbscan_eps0.1_min20.csv
DBSCAN 

In [None]:
#teste hdbscan
min_clusters_sizes = [30, 50, 100, 200, 300]
min_samples_values = [10, 20, 30, 50, 100]   
 

print('Testes para HDBSCAN Clustering')
for method_red in methods_reducing:
    for n in n_components:
        print(f'\n Embeddings reduzidos com {method_red} e n_components {n}')
        reduced_embeddings = dimensionality_reduction(embeddings=embeddings, method=method_red, n_components=n)
        if not isinstance(reduced_embeddings, torch.Tensor):
            reduced_embeddings = torch.tensor(reduced_embeddings, dtype=torch.float32)
        output_path = f'clusters/{dataset}/{method_red}/n_components{n}'
        os.makedirs(output_path, exist_ok=True)               

        run_hdbscan_combinations(
            embeddings=reduced_embeddings,
            min_cluster_sizes=min_clusters_sizes,
            min_samples = min_samples_values,
            output_path=output_path,
            dataset_name=dataset
        )

print("\nTodos os testes para HDBSCAN concluídos")

Testes para HDBSCAN Clustering

 Embeddings reduzidos com umap e n_components 8
Aplicando UMAP com n_components = 8...


  warn(


Dimensionalidade reduzida de 1000 para 8
Executando run_hdbscan...
HDBSCAN - min_cluster_size = 30, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components8\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample10.csv
HDBSCAN - min_cluster_size = 30, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components8\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample20.csv
HDBSCAN - min_cluster_size = 30, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components8\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample30.csv
HDBSCAN - min_cluster_size = 30, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components8\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample50.csv
HDBSCAN - min_cluster_size = 30, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components8\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample100.csv
HDBSCAN - min_cluster_size = 50, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components8\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample10.csv
HDBSCAN - min_cluster_size = 50, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components8\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample20.csv
HDBSCAN - min_cluster_size = 50, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components8\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample30.csv
HDBSCAN - min_cluster_size = 50, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components8\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample50.csv
HDBSCAN - min_cluster_size = 50, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components8\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample100.csv
HDBSCAN - min_cluster_size = 100, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components8\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample10.csv
HDBSCAN - min_cluster_size = 100, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components8\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample20.csv
HDBSCAN - min_cluster_size = 100, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components8\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample30.csv
HDBSCAN - min_cluster_size = 100, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components8\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample50.csv
HDBSCAN - min_cluster_size = 100, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components8\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample100.csv
HDBSCAN - min_cluster_size = 200, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components8\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample10.csv
HDBSCAN - min_cluster_size = 200, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components8\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample20.csv
HDBSCAN - min_cluster_size = 200, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components8\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample30.csv
HDBSCAN - min_cluster_size = 200, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components8\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample50.csv
HDBSCAN - min_cluster_size = 200, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components8\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample100.csv
HDBSCAN - min_cluster_size = 300, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components8\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample10.csv
HDBSCAN - min_cluster_size = 300, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components8\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample20.csv
HDBSCAN - min_cluster_size = 300, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components8\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample30.csv
HDBSCAN - min_cluster_size = 300, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components8\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample50.csv
HDBSCAN - min_cluster_size = 300, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components8\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample100.csv
Clusterização concluída e resultados salvos

 Embeddings reduzidos com umap e n_components 32
Aplicando UMAP com n_components = 32...


  warn(


Dimensionalidade reduzida de 1000 para 32
Executando run_hdbscan...
HDBSCAN - min_cluster_size = 30, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components32\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample10.csv
HDBSCAN - min_cluster_size = 30, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components32\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample20.csv
HDBSCAN - min_cluster_size = 30, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components32\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample30.csv
HDBSCAN - min_cluster_size = 30, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components32\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample50.csv
HDBSCAN - min_cluster_size = 30, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components32\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample100.csv
HDBSCAN - min_cluster_size = 50, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components32\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample10.csv
HDBSCAN - min_cluster_size = 50, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components32\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample20.csv
HDBSCAN - min_cluster_size = 50, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components32\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample30.csv
HDBSCAN - min_cluster_size = 50, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components32\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample50.csv
HDBSCAN - min_cluster_size = 50, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components32\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample100.csv
HDBSCAN - min_cluster_size = 100, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components32\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample10.csv
HDBSCAN - min_cluster_size = 100, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components32\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample20.csv
HDBSCAN - min_cluster_size = 100, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components32\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample30.csv
HDBSCAN - min_cluster_size = 100, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components32\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample50.csv
HDBSCAN - min_cluster_size = 100, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components32\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample100.csv
HDBSCAN - min_cluster_size = 200, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components32\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample10.csv
HDBSCAN - min_cluster_size = 200, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components32\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample20.csv
HDBSCAN - min_cluster_size = 200, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components32\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample30.csv
HDBSCAN - min_cluster_size = 200, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components32\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample50.csv
HDBSCAN - min_cluster_size = 200, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components32\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample100.csv
HDBSCAN - min_cluster_size = 300, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components32\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample10.csv
HDBSCAN - min_cluster_size = 300, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components32\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample20.csv
HDBSCAN - min_cluster_size = 300, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components32\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample30.csv
HDBSCAN - min_cluster_size = 300, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components32\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample50.csv
HDBSCAN - min_cluster_size = 300, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components32\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample100.csv
Clusterização concluída e resultados salvos

 Embeddings reduzidos com umap e n_components 64
Aplicando UMAP com n_components = 64...


  warn(


Dimensionalidade reduzida de 1000 para 64
Executando run_hdbscan...
HDBSCAN - min_cluster_size = 30, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components64\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample10.csv
HDBSCAN - min_cluster_size = 30, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components64\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample20.csv
HDBSCAN - min_cluster_size = 30, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components64\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample30.csv
HDBSCAN - min_cluster_size = 30, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components64\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample50.csv
HDBSCAN - min_cluster_size = 30, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components64\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample100.csv
HDBSCAN - min_cluster_size = 50, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components64\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample10.csv
HDBSCAN - min_cluster_size = 50, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components64\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample20.csv
HDBSCAN - min_cluster_size = 50, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components64\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample30.csv
HDBSCAN - min_cluster_size = 50, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components64\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample50.csv
HDBSCAN - min_cluster_size = 50, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components64\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample100.csv
HDBSCAN - min_cluster_size = 100, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components64\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample10.csv
HDBSCAN - min_cluster_size = 100, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components64\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample20.csv
HDBSCAN - min_cluster_size = 100, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components64\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample30.csv
HDBSCAN - min_cluster_size = 100, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components64\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample50.csv
HDBSCAN - min_cluster_size = 100, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components64\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample100.csv
HDBSCAN - min_cluster_size = 200, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components64\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample10.csv
HDBSCAN - min_cluster_size = 200, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components64\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample20.csv
HDBSCAN - min_cluster_size = 200, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components64\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample30.csv
HDBSCAN - min_cluster_size = 200, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components64\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample50.csv
HDBSCAN - min_cluster_size = 200, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components64\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample100.csv
HDBSCAN - min_cluster_size = 300, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components64\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample10.csv
HDBSCAN - min_cluster_size = 300, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components64\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample20.csv
HDBSCAN - min_cluster_size = 300, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components64\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample30.csv
HDBSCAN - min_cluster_size = 300, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components64\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample50.csv
HDBSCAN - min_cluster_size = 300, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components64\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample100.csv
Clusterização concluída e resultados salvos

 Embeddings reduzidos com umap e n_components 128
Aplicando UMAP com n_components = 128...


  warn(


Dimensionalidade reduzida de 1000 para 128
Executando run_hdbscan...
HDBSCAN - min_cluster_size = 30, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components128\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample10.csv
HDBSCAN - min_cluster_size = 30, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components128\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample20.csv
HDBSCAN - min_cluster_size = 30, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components128\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample30.csv
HDBSCAN - min_cluster_size = 30, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components128\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample50.csv
HDBSCAN - min_cluster_size = 30, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components128\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample100.csv
HDBSCAN - min_cluster_size = 50, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components128\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample10.csv
HDBSCAN - min_cluster_size = 50, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components128\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample20.csv
HDBSCAN - min_cluster_size = 50, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components128\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample30.csv
HDBSCAN - min_cluster_size = 50, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components128\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample50.csv
HDBSCAN - min_cluster_size = 50, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components128\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample100.csv
HDBSCAN - min_cluster_size = 100, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components128\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample10.csv
HDBSCAN - min_cluster_size = 100, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components128\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample20.csv
HDBSCAN - min_cluster_size = 100, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components128\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample30.csv
HDBSCAN - min_cluster_size = 100, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components128\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample50.csv
HDBSCAN - min_cluster_size = 100, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components128\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample100.csv
HDBSCAN - min_cluster_size = 200, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components128\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample10.csv
HDBSCAN - min_cluster_size = 200, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components128\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample20.csv
HDBSCAN - min_cluster_size = 200, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components128\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample30.csv
HDBSCAN - min_cluster_size = 200, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components128\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample50.csv
HDBSCAN - min_cluster_size = 200, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components128\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample100.csv
HDBSCAN - min_cluster_size = 300, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components128\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample10.csv
HDBSCAN - min_cluster_size = 300, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components128\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample20.csv
HDBSCAN - min_cluster_size = 300, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components128\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample30.csv
HDBSCAN - min_cluster_size = 300, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components128\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample50.csv
HDBSCAN - min_cluster_size = 300, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components128\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample100.csv
Clusterização concluída e resultados salvos

 Embeddings reduzidos com umap e n_components 256
Aplicando UMAP com n_components = 256...


  warn(


Dimensionalidade reduzida de 1000 para 256
Executando run_hdbscan...
HDBSCAN - min_cluster_size = 30, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components256\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample10.csv
HDBSCAN - min_cluster_size = 30, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components256\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample20.csv
HDBSCAN - min_cluster_size = 30, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components256\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample30.csv
HDBSCAN - min_cluster_size = 30, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components256\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample50.csv
HDBSCAN - min_cluster_size = 30, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components256\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample100.csv
HDBSCAN - min_cluster_size = 50, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components256\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample10.csv
HDBSCAN - min_cluster_size = 50, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components256\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample20.csv
HDBSCAN - min_cluster_size = 50, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components256\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample30.csv
HDBSCAN - min_cluster_size = 50, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components256\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample50.csv
HDBSCAN - min_cluster_size = 50, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components256\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample100.csv
HDBSCAN - min_cluster_size = 100, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components256\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample10.csv
HDBSCAN - min_cluster_size = 100, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components256\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample20.csv
HDBSCAN - min_cluster_size = 100, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components256\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample30.csv
HDBSCAN - min_cluster_size = 100, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components256\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample50.csv
HDBSCAN - min_cluster_size = 100, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components256\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample100.csv
HDBSCAN - min_cluster_size = 200, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components256\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample10.csv
HDBSCAN - min_cluster_size = 200, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components256\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample20.csv
HDBSCAN - min_cluster_size = 200, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components256\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample30.csv
HDBSCAN - min_cluster_size = 200, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components256\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample50.csv
HDBSCAN - min_cluster_size = 200, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components256\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample100.csv
HDBSCAN - min_cluster_size = 300, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components256\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample10.csv
HDBSCAN - min_cluster_size = 300, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components256\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample20.csv
HDBSCAN - min_cluster_size = 300, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components256\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample30.csv
HDBSCAN - min_cluster_size = 300, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components256\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample50.csv
HDBSCAN - min_cluster_size = 300, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components256\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample100.csv
Clusterização concluída e resultados salvos

 Embeddings reduzidos com umap e n_components 512
Aplicando UMAP com n_components = 512...


  warn(


Dimensionalidade reduzida de 1000 para 512
Executando run_hdbscan...
HDBSCAN - min_cluster_size = 30, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components512\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample10.csv
HDBSCAN - min_cluster_size = 30, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components512\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample20.csv
HDBSCAN - min_cluster_size = 30, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components512\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample30.csv
HDBSCAN - min_cluster_size = 30, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components512\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample50.csv
HDBSCAN - min_cluster_size = 30, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components512\run_hdbscan/Tools_clusters_run_hdbscan_minsize30_minsample100.csv
HDBSCAN - min_cluster_size = 50, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components512\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample10.csv
HDBSCAN - min_cluster_size = 50, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components512\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample20.csv
HDBSCAN - min_cluster_size = 50, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components512\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample30.csv
HDBSCAN - min_cluster_size = 50, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components512\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample50.csv
HDBSCAN - min_cluster_size = 50, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components512\run_hdbscan/Tools_clusters_run_hdbscan_minsize50_minsample100.csv
HDBSCAN - min_cluster_size = 100, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components512\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample10.csv
HDBSCAN - min_cluster_size = 100, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components512\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample20.csv
HDBSCAN - min_cluster_size = 100, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components512\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample30.csv
HDBSCAN - min_cluster_size = 100, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components512\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample50.csv
HDBSCAN - min_cluster_size = 100, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components512\run_hdbscan/Tools_clusters_run_hdbscan_minsize100_minsample100.csv
HDBSCAN - min_cluster_size = 200, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components512\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample10.csv
HDBSCAN - min_cluster_size = 200, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components512\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample20.csv
HDBSCAN - min_cluster_size = 200, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components512\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample30.csv
HDBSCAN - min_cluster_size = 200, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components512\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample50.csv
HDBSCAN - min_cluster_size = 200, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components512\run_hdbscan/Tools_clusters_run_hdbscan_minsize200_minsample100.csv
HDBSCAN - min_cluster_size = 300, min_samples = 10




Clusters salvos em clusters/Tools/umap/n_components512\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample10.csv
HDBSCAN - min_cluster_size = 300, min_samples = 20




Clusters salvos em clusters/Tools/umap/n_components512\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample20.csv
HDBSCAN - min_cluster_size = 300, min_samples = 30




Clusters salvos em clusters/Tools/umap/n_components512\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample30.csv
HDBSCAN - min_cluster_size = 300, min_samples = 50




Clusters salvos em clusters/Tools/umap/n_components512\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample50.csv
HDBSCAN - min_cluster_size = 300, min_samples = 100




Clusters salvos em clusters/Tools/umap/n_components512\run_hdbscan/Tools_clusters_run_hdbscan_minsize300_minsample100.csv
Clusterização concluída e resultados salvos

Todos os testes para HDBSCAN concluídos
