In [None]:
!pip install umap-learn

Collecting umap-learn
  Downloading umap_learn-0.5.7-py3-none-any.whl.metadata (21 kB)
Collecting pynndescent>=0.5 (from umap-learn)
  Downloading pynndescent-0.5.13-py3-none-any.whl.metadata (6.8 kB)
Downloading umap_learn-0.5.7-py3-none-any.whl (88 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.8/88.8 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pynndescent-0.5.13-py3-none-any.whl (56 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.9/56.9 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynndescent, umap-learn
Successfully installed pynndescent-0.5.13 umap-learn-0.5.7


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

def read_and_prepare_data(file_path, normalization_method='standard'):
   """
   Read CSV, remove first column, and normalize data

   Args:
       file_path (str): Path to CSV file
       normalization_method (str): 'minmax' or 'standard'

   Returns:
       tuple: Normalized DataFrame, first column
   """
   try:
       # Read CSV file
       df = pd.read_csv(file_path)

       # Extract first column
       first_column = df.iloc[:, 0]

       # Remove first column from DataFrame
       X = df.iloc[:, 1:]

       # Normalize data
       if normalization_method == 'minmax':
           scaler = MinMaxScaler()
           X_normalized = pd.DataFrame(
               scaler.fit_transform(X),
               columns=X.columns
           )
       elif normalization_method == 'standard':
           from sklearn.preprocessing import StandardScaler
           scaler = StandardScaler()
           X_normalized = pd.DataFrame(
               scaler.fit_transform(X),
               columns=X.columns
           )
       else:
           raise ValueError("Invalid normalization method")

       return X_normalized, first_column

   except Exception as e:
       print(f"Error processing data: {e}")
       return None, None

In [None]:
import umap
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.manifold import TSNE
from sklearn.metrics import pairwise_distances
from itertools import product
import math

def run_umap_with_tsne(X_scaled, X_query, param_grid, first_column_doc, first_column_query):
    """Run UMAP with different parameter combinations and plot reduced space with t-SNE."""
    results = []
    min_error = float('inf')  # Track the minimum reconstruction error
    best_idx = None           # Track the index of the best parameter combination

    # Folder for saving CSVs
    output_folder = "Umap_2"
    os.makedirs(output_folder, exist_ok=True)

    # Get the total number of parameter combinations
    param_combinations = list(product(
        param_grid['n_components'],
        param_grid['n_neighbors'],
        param_grid['min_dist'],
        param_grid['metric']
    ))

    # Iterate through each combination of n_components, n_neighbors, and min_dist
    for idx, (n_components, n_neighbors, min_dist, metric) in enumerate(param_combinations):
        try:

            # Validate parameters
            if n_components > X_scaled.shape[1]:
                print(f"Skipping n_components={n_components}: exceeds feature dimensions.")
                continue
            if n_neighbors >= X_scaled.shape[0]:
                print(f"Skipping n_neighbors={n_neighbors}: exceeds number of data points.")
                continue

            # Run UMAP with the new parameter
            umap_model = umap.UMAP(
                n_neighbors=n_neighbors,
                n_components=n_components,
                min_dist=min_dist,
                metric=metric,
                random_state=42
            )
            umap_transformed_data = umap_model.fit_transform(X_scaled)

            # Create DataFrame with first column
            output_df = pd.DataFrame(umap_transformed_data)
            output_df.insert(0, 'original_column', first_column_doc)

            # Save the reduced space to a CSV file
            file_name = f"umap_ncomp{n_components}_nneigh{n_neighbors}_mindist{min_dist}_metric{metric}.csv"
            file_path = os.path.join(output_folder, file_name)
            pd.DataFrame(output_df).to_csv(file_path, index=False)
            print(f"Saved Umap doc result to {file_path}")

            umap_transformed_data_query = umap_model.transform(X_query)

            output_df_query = pd.DataFrame(umap_transformed_data_query)
            output_df_query.insert(0, 'original_column', first_column_query)

            file_name_query = f"umap_query_ncomp{n_components}_nneigh{n_neighbors}_mindist{min_dist}_metric{metric}.csv"
            file_path_query = os.path.join(output_folder, file_name_query)
            pd.DataFrame(output_df_query).to_csv(file_path_query, index=False)
            print(f"Saved Umap query result to {file_path_query}")

        except ValueError as ve:
            print(f"ValueError for n_components={n_components}, n_neighbors={n_neighbors}, min_dist={min_dist}: {str(ve)}")
        except Exception as e:
            print(f"Unexpected error for n_components={n_components}, n_neighbors={n_neighbors}, min_dist={min_dist}: {str(e)}")

    return results


In [None]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min

def cluster_documents_and_assign_queries(doc_csv, query_csv, n_clusters, output_csv):
    """
    Clusters documents using K-Means and assigns queries to the closest cluster.

    Parameters:
    - doc_csv: Path to the CSV file containing reduced dimensionality of documents.
    - query_csv: Path to the CSV file containing reduced dimensionality of queries.
    - n_clusters: Number of clusters for K-Means.
    - output_csv: File path to save query-to-cluster assignment results.
    """
    # Load the reduced dimensions for documents and queries
    doc_data = pd.read_csv(doc_csv)
    query_data = pd.read_csv(query_csv)

    # Extract feature vectors and document/query names
    document_vectors = doc_data[['0', '1']].values
    document_names = doc_data['original_column'].values  # Document names

    query_vectors = query_data[['0', '1']].values
    query_names = query_data['original_column'].values  # Query names

    # Apply K-Means clustering to documents
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    kmeans.fit(document_vectors)

    # Get the cluster assignments for documents
    doc_clusters = kmeans.labels_

    # Assign queries to the closest cluster
    query_cluster_indices, _ = pairwise_distances_argmin_min(query_vectors, kmeans.cluster_centers_)

    # Prepare query-cluster matches
    query_matches = []
    for query_name, query_cluster_idx in zip(query_names, query_cluster_indices):
        # Get documents belonging to the same cluster
        matched_docs = [
            document_names[idx] for idx, cluster in enumerate(doc_clusters) if cluster == query_cluster_idx
        ]
        query_matches.append({
            'Query': query_name,
            'Assigned_Cluster': query_cluster_idx,
            'Matched_Documents': ', '.join(matched_docs)  # Join matched document names
        })

    # Save results to a CSV
    matches_df = pd.DataFrame(query_matches)
    matches_df.to_csv(output_csv, index=False)
    print(f"Query-to-cluster assignments saved to {output_csv}")


# For tf_idf test best umap:
2 comp,

20 neigh,

min_dist 0.7,

metric cosine

## Data preparation ##

In [None]:
file_path_documents = 'tf_idf_documentos.csv'
X_doc, first_column_doc = read_and_prepare_data(file_path_documents, 'standard')

file_path_queries = 'queries_vector.csv'
X_queries, first_column_queries = read_and_prepare_data(file_path_queries, 'standard')

## Umap ##

In [None]:
param_grid = {
    'n_components': [2],
    'n_neighbors': [20],
    'min_dist': [0.7],
    'metric': ['cosine']
}

run_umap_with_tsne(X_doc, X_queries, param_grid, first_column_doc, first_column_queries)

  warn(


Saved Umap doc result to Umap/umap_ncomp2_nneigh20_mindist0.7_metriccosine.csv
Saved Umap query result to Umap/umap_query_ncomp2_nneigh20_mindist0.7_metriccosine.csv


[]

## Clustering ##

In [None]:
# Example usage
doc_csv = "/content/Umap/umap_ncomp2_nneigh20_mindist0.7_metriccosine.csv"   # CSV with reduced dimensions of documents
query_csv = "/content/Umap/umap_query_ncomp2_nneigh20_mindist0.7_metriccosine.csv"   # CSV with reduced dimensions of queries
output_csv = "query_cluster_matches.csv"  # Output CSV with matches
n_clusters = 6  # Number of neighbors for KNN

cluster_documents_and_assign_queries(doc_csv, query_csv, n_clusters, output_csv)


Query-to-cluster assignments saved to query_cluster_matches.csv


## Similarities with clustering results ##

In [None]:
import pandas as pd
import numpy as np

# Load the queries CSV
query_csv = "/content/Umap/umap_query_ncomp2_nneigh20_mindist0.7_metriccosine.csv"
query_data = pd.read_csv(query_csv)

document_csv = "/content/Umap/umap_ncomp2_nneigh20_mindist0.7_metriccosine.csv"
document_data = pd.read_csv(document_csv)


In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

def calculate_similarity_for_selected_documents(documents_df, query_vector, selected_documents):
    # Filter the DataFrame to include only the selected documents
    selected_df = documents_df[documents_df['original_column'].isin(selected_documents)]

    # Extract the vectors of the selected documents
    selected_vectors = selected_df.iloc[:, 1:].values  # Excludes 'original_column'

    # Compute cosine similarity between each selected document vector and the query vector
    similarities = cosine_similarity(selected_vectors, query_vector.reshape(1, -1)).flatten()

    # Create a result DataFrame with document names and their similarity scores
    results = pd.DataFrame({
        'document': selected_df['original_column'].values,
        'similarity': similarities
    })

    # Sort the results by similarity in descending order
    results = results.sort_values(by='similarity', ascending=False).reset_index(drop=True)

    return results

In [None]:
import numpy as np

def get_similar_documents_manhattan(query_vector, document_data, selected_documents):

    # Filter the document data to include only the selected documents
    filtered_data = document_data[document_data['original_column'].isin(selected_documents)]

    # Extract the document names and their corresponding vectors
    document_names = filtered_data['original_column'].values
    document_vectors = filtered_data[['0', '1']].values  # Adjust if there are more dimensions

    # Compute Manhattan distances between the query vector and all selected document vectors
    distances = np.sum(np.abs(document_vectors - query_vector), axis=1)

    # Combine document names with their distances and sort by distance
    results = list(zip(document_names, distances))
    results.sort(key=lambda x: x[1])  # Sort by distance (ascending)

    return results


In [None]:
import numpy as np

def get_similar_documents_pearson(query_vector, document_data, selected_documents):
    # Filter the document data to include only the selected documents
    filtered_data = document_data[document_data['original_column'].isin(selected_documents)]

    # Extract the document names and their corresponding vectors
    document_names = filtered_data['original_column'].values
    document_vectors = filtered_data[['0', '1']].values  # Adjust if there are more dimensions

    # Calculate Pearson correlation between the query vector and all document vectors
    correlations = []
    for doc_vector in document_vectors:
        # Pearson correlation: np.corrcoef returns a correlation matrix; take the off-diagonal element
        correlation = np.corrcoef(query_vector, doc_vector)[0, 1]
        correlations.append(correlation)

    # Combine document names with their correlations and sort by correlation (descending)
    results = list(zip(document_names, correlations))
    results.sort(key=lambda x: x[1], reverse=True)  # Sort by correlation (descending)

    return results


### Cosine Similarity ###

In [None]:
# Example usage
document_csv = "/content/Umap/umap_ncomp2_nneigh20_mindist0.7_metriccosine.csv"
query_csv = "/content/Umap/umap_query_ncomp2_nneigh20_mindist0.7_metriccosine.csv"

# Load data
document_data = pd.read_csv(document_csv)
query_data = pd.read_csv(query_csv)

# Example: Use the first query vector
query_vector = query_data[['0', '1']].iloc[0].values

# Select specific document names
selected_documents = [
    "32_Emprendimiento.txt.txt",
    "65_Alta_Cocina_Mundial.txt.txt",
    "110_Creatividad_Empresarial.txt.txt",
    "63_Servicio_y_Protocolo.txt.txt",
    "67_Innovacion_Culinaria.txt.txt",
    "47_Conceptos_y_Tecnicas_1.txt.txt",
    "62_Pasteleria.txt.txt",
    "100_Principios_de_Seguros.txt.txt",
    "56_Carniceria.txt.txt",
    "64_Practica_Culinaria_2.txt.txt",
    "135_Gestion_y_Produccion_Cultural.txt.txt",
    "66_Reposteria_y_Chocolateria.txt.txt",
    "53_Conceptos_y_Tecnicas_2_+PRA.txt.txt",
    "54_Practica_Culinaria_1.txt.txt",
    "50_Nutricion_Humana_+Lab.txt.txt",
    "115_Tributacion_y_Entorno_Legal.txt.txt",
    "69_Administracion_de_Eventos.txt.txt",
    "61_Alta_Cocina_Francesa.txt.txt",
    "97_Principios_de_Marketing.txt.txt",
    "57_Alta_Cocina_Ecuatoriana.txt.txt",
    "68_Enologia_y_Cocteleria.txt.txt",
    "45_Proyecto_Integrador_CMP.txt.txt",
    "113_Negocios_Internacionales.txt.txt",
    "112_Gestion_del_Talento.txt.txt",
    "111_Proyectos_Empresariales.txt.txt",
    "104_Finanzas_Corporativas.txt.txt",
    "49_Panaderia.txt.txt"
]
 # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = calculate_similarity_for_selected_documents(document_data, query_vector, selected_documents)

print(similarity_results)

                                     document  similarity
0             61_Alta_Cocina_Francesa.txt.txt    0.998866
1          57_Alta_Cocina_Ecuatoriana.txt.txt    0.998648
2             54_Practica_Culinaria_1.txt.txt    0.998220
3           47_Conceptos_y_Tecnicas_1.txt.txt    0.996564
4            68_Enologia_y_Cocteleria.txt.txt    0.994971
5             63_Servicio_y_Protocolo.txt.txt    0.991857
6             64_Practica_Culinaria_2.txt.txt    0.991103
7                       62_Pasteleria.txt.txt    0.989946
8              65_Alta_Cocina_Mundial.txt.txt    0.989526
9                        49_Panaderia.txt.txt    0.989058
10     53_Conceptos_y_Tecnicas_2_+PRA.txt.txt    0.987945
11       69_Administracion_de_Eventos.txt.txt    0.984686
12                      56_Carniceria.txt.txt    0.984074
13         97_Principios_de_Marketing.txt.txt    0.983696
14       66_Reposteria_y_Chocolateria.txt.txt    0.979563
15            67_Innovacion_Culinaria.txt.txt    0.977527
16  135_Gestio

In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1']].iloc[1].values

# Select specific document names
selected_documents = [
    "7_Ingles_Nivel_2.txt.txt",
    "119_Composicion_Visual_1.txt.txt",
    "178_Sonido.txt.txt",
    "125_Nuevos_Medios.txt.txt",
    "176_Lenguaje_Visual_y_Montaje.txt.txt",
    "117_Herramientas_Digitales_1.txt.txt",
    "123_Fundamentos_de_Escultura.txt.txt",
    "180_DiseÃ±o_de_Produccion.txt.txt",
    "169_Improvisacion.txt.txt",
    "14_Programacion_Avanzada_en_C++.txt.txt",
    "143_Programacion_Para_DiseÃ±o_1.txt.txt",
    "145_Programacion_Para_DiseÃ±o_3.txt.txt",
    "94_Zoologia_+Lab.txt.txt",
    "95_Fisiologia_+Lab.txt.txt",
    "173_Performance.txt.txt",
    "96_Evolucion.txt.txt",
    "172_Composicion.txt.txt",
    "122_Fotografia_1.txt.txt",
    "144_Programacion_Para_DiseÃ±o_2.txt.txt",
    "170_Danza_Moderna_2.txt.txt",
    "175_Lenguaje_del_Cine.txt.txt",
    "177_Cinematografia.txt.txt",
    "6_Ingles_Nivel_1.txt.txt",
    "179_Storytelling.txt.txt",
    "146_Juegos_y_Narrativa.txt.txt"
]
  # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = calculate_similarity_for_selected_documents(document_data, query_vector, selected_documents)

print(similarity_results)

                                   document  similarity
0             175_Lenguaje_del_Cine.txt.txt    0.999997
1          119_Composicion_Visual_1.txt.txt    0.999854
2                  179_Storytelling.txt.txt    0.999784
3                177_Cinematografia.txt.txt    0.999480
4            146_Juegos_y_Narrativa.txt.txt    0.999302
5                  122_Fotografia_1.txt.txt    0.998505
6     176_Lenguaje_Visual_y_Montaje.txt.txt    0.997657
7                 125_Nuevos_Medios.txt.txt    0.995628
8                        178_Sonido.txt.txt    0.995492
9                  6_Ingles_Nivel_1.txt.txt    0.993639
10                  172_Composicion.txt.txt    0.992432
11              170_Danza_Moderna_2.txt.txt    0.988183
12     117_Herramientas_Digitales_1.txt.txt    0.987764
13                169_Improvisacion.txt.txt    0.987056
14                  173_Performance.txt.txt    0.983227
15                     96_Evolucion.txt.txt    0.978430
16  14_Programacion_Avanzada_en_C++.txt.txt    0

In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1']].iloc[2].values

# Select specific document names
selected_documents = [
    "7_Ingles_Nivel_2.txt.txt",
    "119_Composicion_Visual_1.txt.txt",
    "178_Sonido.txt.txt",
    "125_Nuevos_Medios.txt.txt",
    "176_Lenguaje_Visual_y_Montaje.txt.txt",
    "117_Herramientas_Digitales_1.txt.txt",
    "123_Fundamentos_de_Escultura.txt.txt",
    "180_DiseÃ±o_de_Produccion.txt.txt",
    "169_Improvisacion.txt.txt",
    "14_Programacion_Avanzada_en_C++.txt.txt",
    "143_Programacion_Para_DiseÃ±o_1.txt.txt",
    "145_Programacion_Para_DiseÃ±o_3.txt.txt",
    "94_Zoologia_+Lab.txt.txt",
    "95_Fisiologia_+Lab.txt.txt",
    "173_Performance.txt.txt",
    "96_Evolucion.txt.txt",
    "172_Composicion.txt.txt",
    "122_Fotografia_1.txt.txt",
    "144_Programacion_Para_DiseÃ±o_2.txt.txt",
    "170_Danza_Moderna_2.txt.txt",
    "175_Lenguaje_del_Cine.txt.txt",
    "177_Cinematografia.txt.txt",
    "6_Ingles_Nivel_1.txt.txt",
    "179_Storytelling.txt.txt",
    "146_Juegos_y_Narrativa.txt.txt"
]
# Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = calculate_similarity_for_selected_documents(document_data, query_vector, selected_documents)

print(similarity_results)

                                   document  similarity
0                  179_Storytelling.txt.txt    1.000000
1          119_Composicion_Visual_1.txt.txt    0.999996
2                177_Cinematografia.txt.txt    0.999924
3            146_Juegos_y_Narrativa.txt.txt    0.999848
4             175_Lenguaje_del_Cine.txt.txt    0.999844
5                  122_Fotografia_1.txt.txt    0.999396
6     176_Lenguaje_Visual_y_Montaje.txt.txt    0.998822
7                 125_Nuevos_Medios.txt.txt    0.997292
8                        178_Sonido.txt.txt    0.997184
9                  6_Ingles_Nivel_1.txt.txt    0.995685
10                  172_Composicion.txt.txt    0.994682
11              170_Danza_Moderna_2.txt.txt    0.991040
12     117_Herramientas_Digitales_1.txt.txt    0.990675
13                169_Improvisacion.txt.txt    0.990055
14                  173_Performance.txt.txt    0.986666
15                     96_Evolucion.txt.txt    0.982351
16  14_Programacion_Avanzada_en_C++.txt.txt    0

In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1']].iloc[3].values

# Select specific document names
selected_documents = [
    "105_Marketing_Digital.txt.txt",
    "185_Ingenieria_de_la_Calidad_+_Lab.txt.txt",
    "101_Analisis_de_Datos.txt.txt",
    "183_Procesos,_Metodos_y_Estandares.txt.txt",
    "116_Analisis_Estrategico_ADM.txt.txt",
    "44_Aplicaciones_Distribuidas.txt.txt",
    "36_Aprendizaje_Automatico.txt.txt",
    "181_Inv._de_Operaciones_1_+Lab.txt.txt",
    "186_Sistemas_Lean.txt.txt",
    "55_Administracion_de_A_&_B.txt.txt",
    "42_Seguridad_Informatica.txt.txt",
    "99_Gerencia_de_Costos.txt.txt",
    "30_Programacion_Avanzada_de_Apps.txt.txt",
    "182_Control_de_Produccion.txt.txt",
    "106_Negociacion.txt.txt",
    "109_Investigacion_de_Mercados.txt.txt",
    "21_Programacion_de_Apps.txt.txt",
    "43_Mineria_de_Datos.txt.txt",
    "16_Introduccion_a_la_Economia.txt.txt",
    "108_Economia_y_Negocios.txt.txt",
    "98_Estadistica_Empresarial_+Lab.txt.txt",
    "48_Principios_de_Administracion.txt.txt",
    "52_Contabilidad_Empresarial.txt.txt",
    "107_Operaciones_+Lab.txt.txt",
    "31_Organizacion_de_Computadores.txt.txt",
    "46_Matematica_Empresarial_+Ej.txt.txt",
    "39_Proyectos__Gerencia_y_Analisis.txt.txt",
    "27_Estructuras_de_Datos.txt.txt",
    "59_Introduccion_al_Marketing_HSP.txt.txt",
    "38_Sistemas_Operativos.txt.txt",
    "70_Gerencia_Financiera_HSP.txt.txt",
    "159_Combinatoria_y_Grafos.txt.txt",
    "51_Introduccion_a_la_Hospitalidad.txt.txt",
    "37_Redes_+Lab.txt.txt",
    "33_DiseÃ±o_de_Sistemas.txt.txt"
]
  # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = calculate_similarity_for_selected_documents(document_data, query_vector, selected_documents)

print(similarity_results)

                                      document  similarity
0               38_Sistemas_Operativos.txt.txt    0.999942
1              27_Estructuras_de_Datos.txt.txt    0.999911
2                        37_Redes_+Lab.txt.txt    0.999896
3   183_Procesos,_Metodos_y_Estandares.txt.txt    0.999880
4    39_Proyectos__Gerencia_y_Analisis.txt.txt    0.999818
5        109_Investigacion_de_Mercados.txt.txt    0.999680
6                    186_Sistemas_Lean.txt.txt    0.999603
7             42_Seguridad_Informatica.txt.txt    0.999581
8   185_Ingenieria_de_la_Calidad_+_Lab.txt.txt    0.999567
9                101_Analisis_de_Datos.txt.txt    0.999559
10             21_Programacion_de_Apps.txt.txt    0.999110
11     48_Principios_de_Administracion.txt.txt    0.999039
12                     106_Negociacion.txt.txt    0.998544
13                 43_Mineria_de_Datos.txt.txt    0.998482
14          70_Gerencia_Financiera_HSP.txt.txt    0.998264
15     98_Estadistica_Empresarial_+Lab.txt.txt    0.9982

In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1']].iloc[4].values

# Select specific document names
selected_documents = [
    "34_Inteligencia_Artificial.txt.txt",
    "3_Calculo_Diferencial_+_Ej.txt.txt",
    "155_Logica_y_Teoria_de_Conjuntos.txt.txt",
    "26_Electronica_Basica_+Lab.txt.txt",
    "161_Algebra_Lineal_2.txt.txt",
    "35_Base_de_Datos.txt.txt",
    "151_Ecuaciones_Diferenciales.txt.txt",
    "18_Calculo_Vectorial.txt.txt",
    "28_Probabilidad_y_Estadistica_+Ej.txt.txt",
    "58_Introduccion_a_la_Biologia_+Ej.txt.txt",
    "4_Quimica_General_1_+Lab_Ej.txt.txt",
    "162_Algebra_Abstracta_1.txt.txt",
    "17_Fisica_para_Ingenieria_1_+Lab_Ej.txt.txt",
    "164_Analisis_Funcional.txt.txt",
    "153_Introduccion_a_Probabilidades.txt.txt",
    "150_Variable_Compleja.txt.txt",
    "165_Algebra_Abstracta_2.txt.txt",
    "15_Matematicas_Discretas.txt.txt",
    "163_Geometria_Diferencial.txt.txt",
    "22_Algebra_Lineal_1_+Ej.txt.txt",
    "137_Matematicas_Cotidianas.txt.txt",
    "154_Fundamentos_de_Geometria.txt.txt",
    "149_Teoria_de_Grupos.txt.txt",
    "152_Calculo_para_Ciencias_2.txt.txt",
    "8_Programacion_en_C++_+Ej.txt.txt",
    "160_Ecuaciones_Diferenciales_Parciales.txt.txt",
    "76_Estadistica_para_CCSS.txt.txt",
    "9_Calculo_Integral_+_Ej.txt.txt",
    "23_Fisica_para_Ingenieria_2_+Lab_Ej.txt.txt",
    "166_Topologia_1.txt.txt",
    "148_Calculo_para_Ciencias_1.txt.txt",
    "147_Modelado_3D_1.txt.txt",
    "157_Analisis_Numerico.txt.txt",
    "156_Inferencia_Estadistica.txt.txt",
    "168_Analisis_Real.txt.txt",
    "167_Topologia_2.txt.txt",
    "158_Teoria_de_Numeros.txt.txt"
]
 # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = calculate_similarity_for_selected_documents(document_data, query_vector, selected_documents)

print(similarity_results)

                                          document  similarity
0               26_Electronica_Basica_+Lab.txt.txt    0.999996
1                 15_Matematicas_Discretas.txt.txt    0.999994
2               137_Matematicas_Cotidianas.txt.txt    0.999938
3      23_Fisica_para_Ingenieria_2_+Lab_Ej.txt.txt    0.999921
4      17_Fisica_para_Ingenieria_1_+Lab_Ej.txt.txt    0.999846
5                         35_Base_de_Datos.txt.txt    0.999830
6         155_Logica_y_Teoria_de_Conjuntos.txt.txt    0.999700
7               156_Inferencia_Estadistica.txt.txt    0.999629
8             151_Ecuaciones_Diferenciales.txt.txt    0.999305
9                8_Programacion_en_C++_+Ej.txt.txt    0.998820
10                76_Estadistica_para_CCSS.txt.txt    0.998742
11                   150_Variable_Compleja.txt.txt    0.998578
12  160_Ecuaciones_Diferenciales_Parciales.txt.txt    0.997984
13             4_Quimica_General_1_+Lab_Ej.txt.txt    0.997720
14              3_Calculo_Diferencial_+_Ej.txt.txt    0

## Mahatthan ##

In [None]:
# Example usage
document_csv = "/content/Umap/umap_ncomp2_nneigh20_mindist0.7_metriccosine.csv"
query_csv = "/content/Umap/umap_query_ncomp2_nneigh20_mindist0.7_metriccosine.csv"

# Load data
document_data = pd.read_csv(document_csv)
query_data = pd.read_csv(query_csv)

# Example: Use the first query vector
query_vector = query_data[['0', '1']].iloc[0].values

# Select specific document names
selected_documents = [
    "32_Emprendimiento.txt.txt",
    "65_Alta_Cocina_Mundial.txt.txt",
    "110_Creatividad_Empresarial.txt.txt",
    "63_Servicio_y_Protocolo.txt.txt",
    "67_Innovacion_Culinaria.txt.txt",
    "47_Conceptos_y_Tecnicas_1.txt.txt",
    "62_Pasteleria.txt.txt",
    "100_Principios_de_Seguros.txt.txt",
    "56_Carniceria.txt.txt",
    "64_Practica_Culinaria_2.txt.txt",
    "135_Gestion_y_Produccion_Cultural.txt.txt",
    "66_Reposteria_y_Chocolateria.txt.txt",
    "53_Conceptos_y_Tecnicas_2_+PRA.txt.txt",
    "54_Practica_Culinaria_1.txt.txt",
    "50_Nutricion_Humana_+Lab.txt.txt",
    "115_Tributacion_y_Entorno_Legal.txt.txt",
    "69_Administracion_de_Eventos.txt.txt",
    "61_Alta_Cocina_Francesa.txt.txt",
    "97_Principios_de_Marketing.txt.txt",
    "57_Alta_Cocina_Ecuatoriana.txt.txt",
    "68_Enologia_y_Cocteleria.txt.txt",
    "45_Proyecto_Integrador_CMP.txt.txt",
    "113_Negocios_Internacionales.txt.txt",
    "112_Gestion_del_Talento.txt.txt",
    "111_Proyectos_Empresariales.txt.txt",
    "104_Finanzas_Corporativas.txt.txt",
    "49_Panaderia.txt.txt"
]
  # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = get_similar_documents_manhattan(query_vector, document_data, selected_documents)

for i in range(len(similarity_results)):
    print(f"Document: {similarity_results[i][0]}, Distance: {similarity_results[i][1]}")

Document: 57_Alta_Cocina_Ecuatoriana.txt.txt, Distance: 0.87462147
Document: 64_Practica_Culinaria_2.txt.txt, Distance: 1.05253927
Document: 61_Alta_Cocina_Francesa.txt.txt, Distance: 1.07761137
Document: 65_Alta_Cocina_Mundial.txt.txt, Distance: 1.3366648699999994
Document: 54_Practica_Culinaria_1.txt.txt, Distance: 1.4258705700000007
Document: 67_Innovacion_Culinaria.txt.txt, Distance: 1.9007830700000001
Document: 53_Conceptos_y_Tecnicas_2_+PRA.txt.txt, Distance: 2.0005991699999996
Document: 47_Conceptos_y_Tecnicas_1.txt.txt, Distance: 2.2137768699999993
Document: 49_Panaderia.txt.txt, Distance: 2.299937869999999
Document: 68_Enologia_y_Cocteleria.txt.txt, Distance: 2.45302457
Document: 66_Reposteria_y_Chocolateria.txt.txt, Distance: 2.6193349699999993
Document: 110_Creatividad_Empresarial.txt.txt, Distance: 2.7220776699999996
Document: 113_Negocios_Internacionales.txt.txt, Distance: 2.856204269999999
Document: 62_Pasteleria.txt.txt, Distance: 3.032748169999999
Document: 112_Gestion_

In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1']].iloc[1].values

# Select specific document names
selected_documents = [
    "7_Ingles_Nivel_2.txt.txt",
    "119_Composicion_Visual_1.txt.txt",
    "178_Sonido.txt.txt",
    "125_Nuevos_Medios.txt.txt",
    "176_Lenguaje_Visual_y_Montaje.txt.txt",
    "117_Herramientas_Digitales_1.txt.txt",
    "123_Fundamentos_de_Escultura.txt.txt",
    "180_DiseÃ±o_de_Produccion.txt.txt",
    "169_Improvisacion.txt.txt",
    "14_Programacion_Avanzada_en_C++.txt.txt",
    "143_Programacion_Para_DiseÃ±o_1.txt.txt",
    "145_Programacion_Para_DiseÃ±o_3.txt.txt",
    "94_Zoologia_+Lab.txt.txt",
    "95_Fisiologia_+Lab.txt.txt",
    "173_Performance.txt.txt",
    "96_Evolucion.txt.txt",
    "172_Composicion.txt.txt",
    "122_Fotografia_1.txt.txt",
    "144_Programacion_Para_DiseÃ±o_2.txt.txt",
    "170_Danza_Moderna_2.txt.txt",
    "175_Lenguaje_del_Cine.txt.txt",
    "177_Cinematografia.txt.txt",
    "6_Ingles_Nivel_1.txt.txt",
    "179_Storytelling.txt.txt",
    "146_Juegos_y_Narrativa.txt.txt"
]
 # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = get_similar_documents_manhattan(query_vector, document_data, selected_documents)

for i in range(len(similarity_results)):
    print(f"Document: {similarity_results[i][0]}, Distance: {similarity_results[i][1]}")

Document: 125_Nuevos_Medios.txt.txt, Distance: 0.9627523999999994
Document: 119_Composicion_Visual_1.txt.txt, Distance: 0.973693299999999
Document: 6_Ingles_Nivel_1.txt.txt, Distance: 1.2378856000000003
Document: 172_Composicion.txt.txt, Distance: 1.3014019999999995
Document: 177_Cinematografia.txt.txt, Distance: 1.3931197999999991
Document: 170_Danza_Moderna_2.txt.txt, Distance: 1.621471699999999
Document: 169_Improvisacion.txt.txt, Distance: 1.7324660000000005
Document: 173_Performance.txt.txt, Distance: 1.985607
Document: 175_Lenguaje_del_Cine.txt.txt, Distance: 2.3021409999999998
Document: 123_Fundamentos_de_Escultura.txt.txt, Distance: 2.3513910000000005
Document: 176_Lenguaje_Visual_y_Montaje.txt.txt, Distance: 2.3551034
Document: 178_Sonido.txt.txt, Distance: 2.3607226
Document: 7_Ingles_Nivel_2.txt.txt, Distance: 2.4015547
Document: 117_Herramientas_Digitales_1.txt.txt, Distance: 2.4037609
Document: 122_Fotografia_1.txt.txt, Distance: 2.592605099999999
Document: 95_Fisiologia_+

In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1']].iloc[2].values

# Select specific document names
selected_documents = [
    "7_Ingles_Nivel_2.txt.txt",
    "119_Composicion_Visual_1.txt.txt",
    "178_Sonido.txt.txt",
    "125_Nuevos_Medios.txt.txt",
    "176_Lenguaje_Visual_y_Montaje.txt.txt",
    "117_Herramientas_Digitales_1.txt.txt",
    "123_Fundamentos_de_Escultura.txt.txt",
    "180_DiseÃ±o_de_Produccion.txt.txt",
    "169_Improvisacion.txt.txt",
    "14_Programacion_Avanzada_en_C++.txt.txt",
    "143_Programacion_Para_DiseÃ±o_1.txt.txt",
    "145_Programacion_Para_DiseÃ±o_3.txt.txt",
    "94_Zoologia_+Lab.txt.txt",
    "95_Fisiologia_+Lab.txt.txt",
    "173_Performance.txt.txt",
    "96_Evolucion.txt.txt",
    "172_Composicion.txt.txt",
    "122_Fotografia_1.txt.txt",
    "144_Programacion_Para_DiseÃ±o_2.txt.txt",
    "170_Danza_Moderna_2.txt.txt",
    "175_Lenguaje_del_Cine.txt.txt",
    "177_Cinematografia.txt.txt",
    "6_Ingles_Nivel_1.txt.txt",
    "179_Storytelling.txt.txt",
    "146_Juegos_y_Narrativa.txt.txt"
]
# Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = get_similar_documents_manhattan(query_vector, document_data, selected_documents)

for i in range(len(similarity_results)):
    print(f"Document: {similarity_results[i][0]}, Distance: {similarity_results[i][1]}")

Document: 6_Ingles_Nivel_1.txt.txt, Distance: 1.3125671000000003
Document: 125_Nuevos_Medios.txt.txt, Distance: 1.9387830999999998
Document: 172_Composicion.txt.txt, Distance: 1.9810115000000006
Document: 169_Improvisacion.txt.txt, Distance: 2.1946375000000002
Document: 119_Composicion_Visual_1.txt.txt, Distance: 2.2298167999999996
Document: 177_Cinematografia.txt.txt, Distance: 2.6492432999999997
Document: 170_Danza_Moderna_2.txt.txt, Distance: 2.8388278000000002
Document: 123_Fundamentos_de_Escultura.txt.txt, Distance: 2.8413805
Document: 173_Performance.txt.txt, Distance: 2.9543864999999996
Document: 7_Ingles_Nivel_2.txt.txt, Distance: 3.1445073999999997
Document: 175_Lenguaje_del_Cine.txt.txt, Distance: 3.5582645000000004
Document: 176_Lenguaje_Visual_y_Montaje.txt.txt, Distance: 3.6112269000000006
Document: 178_Sonido.txt.txt, Distance: 3.6168461000000005
Document: 117_Herramientas_Digitales_1.txt.txt, Distance: 3.6598844000000006
Document: 122_Fotografia_1.txt.txt, Distance: 3.84

In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1']].iloc[3].values

# Select specific document names
selected_documents = [
    "105_Marketing_Digital.txt.txt",
    "185_Ingenieria_de_la_Calidad_+_Lab.txt.txt",
    "101_Analisis_de_Datos.txt.txt",
    "183_Procesos,_Metodos_y_Estandares.txt.txt",
    "116_Analisis_Estrategico_ADM.txt.txt",
    "44_Aplicaciones_Distribuidas.txt.txt",
    "36_Aprendizaje_Automatico.txt.txt",
    "181_Inv._de_Operaciones_1_+Lab.txt.txt",
    "186_Sistemas_Lean.txt.txt",
    "55_Administracion_de_A_&_B.txt.txt",
    "42_Seguridad_Informatica.txt.txt",
    "99_Gerencia_de_Costos.txt.txt",
    "30_Programacion_Avanzada_de_Apps.txt.txt",
    "182_Control_de_Produccion.txt.txt",
    "106_Negociacion.txt.txt",
    "109_Investigacion_de_Mercados.txt.txt",
    "21_Programacion_de_Apps.txt.txt",
    "43_Mineria_de_Datos.txt.txt",
    "16_Introduccion_a_la_Economia.txt.txt",
    "108_Economia_y_Negocios.txt.txt",
    "98_Estadistica_Empresarial_+Lab.txt.txt",
    "48_Principios_de_Administracion.txt.txt",
    "52_Contabilidad_Empresarial.txt.txt",
    "107_Operaciones_+Lab.txt.txt",
    "31_Organizacion_de_Computadores.txt.txt",
    "46_Matematica_Empresarial_+Ej.txt.txt",
    "39_Proyectos__Gerencia_y_Analisis.txt.txt",
    "27_Estructuras_de_Datos.txt.txt",
    "59_Introduccion_al_Marketing_HSP.txt.txt",
    "38_Sistemas_Operativos.txt.txt",
    "70_Gerencia_Financiera_HSP.txt.txt",
    "159_Combinatoria_y_Grafos.txt.txt",
    "51_Introduccion_a_la_Hospitalidad.txt.txt",
    "37_Redes_+Lab.txt.txt",
    "33_DiseÃ±o_de_Sistemas.txt.txt"
]
 # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = get_similar_documents_manhattan(query_vector, document_data, selected_documents)

for i in range(len(similarity_results)):
    print(f"Document: {similarity_results[i][0]}, Distance: {similarity_results[i][1]}")

Document: 186_Sistemas_Lean.txt.txt, Distance: 0.8725640999999991
Document: 107_Operaciones_+Lab.txt.txt, Distance: 1.3654202000000009
Document: 101_Analisis_de_Datos.txt.txt, Distance: 1.4854938999999998
Document: 43_Mineria_de_Datos.txt.txt, Distance: 1.492050400000001
Document: 181_Inv._de_Operaciones_1_+Lab.txt.txt, Distance: 1.5607104000000014
Document: 98_Estadistica_Empresarial_+Lab.txt.txt, Distance: 1.5637269000000007
Document: 183_Procesos,_Metodos_y_Estandares.txt.txt, Distance: 1.6096610999999994
Document: 185_Ingenieria_de_la_Calidad_+_Lab.txt.txt, Distance: 1.836352100000001
Document: 182_Control_de_Produccion.txt.txt, Distance: 1.840330100000001
Document: 159_Combinatoria_y_Grafos.txt.txt, Distance: 1.8857949999999999
Document: 37_Redes_+Lab.txt.txt, Distance: 1.9703461000000013
Document: 31_Organizacion_de_Computadores.txt.txt, Distance: 2.0867004
Document: 27_Estructuras_de_Datos.txt.txt, Distance: 2.3565159000000007
Document: 46_Matematica_Empresarial_+Ej.txt.txt, Dis

In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1']].iloc[4].values

# Select specific document names
selected_documents = [
    "34_Inteligencia_Artificial.txt.txt",
    "3_Calculo_Diferencial_+_Ej.txt.txt",
    "155_Logica_y_Teoria_de_Conjuntos.txt.txt",
    "26_Electronica_Basica_+Lab.txt.txt",
    "161_Algebra_Lineal_2.txt.txt",
    "35_Base_de_Datos.txt.txt",
    "151_Ecuaciones_Diferenciales.txt.txt",
    "18_Calculo_Vectorial.txt.txt",
    "28_Probabilidad_y_Estadistica_+Ej.txt.txt",
    "58_Introduccion_a_la_Biologia_+Ej.txt.txt",
    "4_Quimica_General_1_+Lab_Ej.txt.txt",
    "162_Algebra_Abstracta_1.txt.txt",
    "17_Fisica_para_Ingenieria_1_+Lab_Ej.txt.txt",
    "164_Analisis_Funcional.txt.txt",
    "153_Introduccion_a_Probabilidades.txt.txt",
    "150_Variable_Compleja.txt.txt",
    "165_Algebra_Abstracta_2.txt.txt",
    "15_Matematicas_Discretas.txt.txt",
    "163_Geometria_Diferencial.txt.txt",
    "22_Algebra_Lineal_1_+Ej.txt.txt",
    "137_Matematicas_Cotidianas.txt.txt",
    "154_Fundamentos_de_Geometria.txt.txt",
    "149_Teoria_de_Grupos.txt.txt",
    "152_Calculo_para_Ciencias_2.txt.txt",
    "8_Programacion_en_C++_+Ej.txt.txt",
    "160_Ecuaciones_Diferenciales_Parciales.txt.txt",
    "76_Estadistica_para_CCSS.txt.txt",
    "9_Calculo_Integral_+_Ej.txt.txt",
    "23_Fisica_para_Ingenieria_2_+Lab_Ej.txt.txt",
    "166_Topologia_1.txt.txt",
    "148_Calculo_para_Ciencias_1.txt.txt",
    "147_Modelado_3D_1.txt.txt",
    "157_Analisis_Numerico.txt.txt",
    "156_Inferencia_Estadistica.txt.txt",
    "168_Analisis_Real.txt.txt",
    "167_Topologia_2.txt.txt",
    "158_Teoria_de_Numeros.txt.txt"
]
 # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = get_similar_documents_manhattan(query_vector, document_data, selected_documents)

for i in range(len(similarity_results)):
    print(f"Document: {similarity_results[i][0]}, Distance: {similarity_results[i][1]}")

Document: 15_Matematicas_Discretas.txt.txt, Distance: 0.6393239999999993
Document: 155_Logica_y_Teoria_de_Conjuntos.txt.txt, Distance: 0.8156052999999996
Document: 150_Variable_Compleja.txt.txt, Distance: 1.2041576999999988
Document: 160_Ecuaciones_Diferenciales_Parciales.txt.txt, Distance: 1.4009222000000001
Document: 149_Teoria_de_Grupos.txt.txt, Distance: 1.6234986999999999
Document: 137_Matematicas_Cotidianas.txt.txt, Distance: 1.7513917
Document: 158_Teoria_de_Numeros.txt.txt, Distance: 1.7686476999999998
Document: 168_Analisis_Real.txt.txt, Distance: 1.9664013000000002
Document: 162_Algebra_Abstracta_1.txt.txt, Distance: 2.133266699999999
Document: 154_Fundamentos_de_Geometria.txt.txt, Distance: 2.137386200000001
Document: 153_Introduccion_a_Probabilidades.txt.txt, Distance: 2.184528300000001
Document: 151_Ecuaciones_Diferenciales.txt.txt, Distance: 2.2758743
Document: 17_Fisica_para_Ingenieria_1_+Lab_Ej.txt.txt, Distance: 2.3988998000000006
Document: 18_Calculo_Vectorial.txt.txt

# For tf_idf_2 test best umap:
2 comp,

20 neigh,

min_dist 0.7,

metric cosine

## Data preparation ##

In [None]:
file_path_documents = 'tf_idf_documentos_2.csv'
X_doc, first_column_doc = read_and_prepare_data(file_path_documents, 'standard')

file_path_queries = 'queries_vector_2.csv'
X_queries, first_column_queries = read_and_prepare_data(file_path_queries, 'standard')

## Umap ##

In [None]:
param_grid = {
    'n_components': [2],
    'n_neighbors': [20],
    'min_dist': [0.7],
    'metric': ['cosine']
}

run_umap_with_tsne(X_doc, X_queries, param_grid, first_column_doc, first_column_queries)

  warn(


Saved Umap doc result to Umap_2/umap_ncomp2_nneigh20_mindist0.7_metriccosine.csv
Saved Umap query result to Umap_2/umap_query_ncomp2_nneigh20_mindist0.7_metriccosine.csv


[]

## Clustering ##

In [None]:
# Example usage
doc_csv = "/content/Umap_2/umap_ncomp2_nneigh20_mindist0.7_metriccosine.csv"   # CSV with reduced dimensions of documents
query_csv = "/content/Umap_2/umap_query_ncomp2_nneigh20_mindist0.7_metriccosine.csv"   # CSV with reduced dimensions of queries
output_csv = "query_cluster_matches_2.csv"  # Output CSV with matches
n_clusters = 3  # Number of neighbors for KNN

cluster_documents_and_assign_queries(doc_csv, query_csv, n_clusters, output_csv)

Query-to-cluster assignments saved to query_cluster_matches_2.csv


## Similarity ##

## Cosine ##

In [None]:
# Example usage
document_csv = "/content/Umap_2/umap_ncomp2_nneigh20_mindist0.7_metriccosine.csv"
query_csv = "/content/Umap_2/umap_query_ncomp2_nneigh20_mindist0.7_metriccosine.csv"

# Load data
document_data = pd.read_csv(document_csv)
query_data = pd.read_csv(query_csv)

In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1']].iloc[0].values

# Select specific document names
selected_documents = [
    "105_Marketing_Digital.txt.txt",
    "32_Emprendimiento.txt.txt",
    "24_Aprendizaje_y_Servicio_PASEC.txt.txt",
    "65_Alta_Cocina_Mundial.txt.txt",
    "110_Creatividad_Empresarial.txt.txt",
    "63_Servicio_y_Protocolo.txt.txt",
    "67_Innovacion_Culinaria.txt.txt",
    "116_Analisis_Estrategico_ADM.txt.txt",
    "47_Conceptos_y_Tecnicas_1.txt.txt",
    "62_Pasteleria.txt.txt",
    "100_Principios_de_Seguros.txt.txt",
    "56_Carniceria.txt.txt",
    "102_Principios_de_Finanzas.txt.txt",
    "25_Cultura_Gastronomica.txt.txt",
    "55_Administracion_de_A_&_B.txt.txt",
    "64_Practica_Culinaria_2.txt.txt",
    "135_Gestion_y_Produccion_Cultural.txt.txt",
    "99_Gerencia_de_Costos.txt.txt",
    "66_Reposteria_y_Chocolateria.txt.txt",
    "53_Conceptos_y_Tecnicas_2_+PRA.txt.txt",
    "54_Practica_Culinaria_1.txt.txt",
    "50_Nutricion_Humana_+Lab.txt.txt",
    "182_Control_de_Produccion.txt.txt",
    "106_Negociacion.txt.txt",
    "115_Tributacion_y_Entorno_Legal.txt.txt",
    "93_Practica_4.txt.txt",
    "69_Administracion_de_Eventos.txt.txt",
    "61_Alta_Cocina_Francesa.txt.txt",
    "97_Principios_de_Marketing.txt.txt",
    "109_Investigacion_de_Mercados.txt.txt",
    "57_Alta_Cocina_Ecuatoriana.txt.txt",
    "16_Introduccion_a_la_Economia.txt.txt",
    "75_Planificacion_y_Evaluacion_1.txt.txt",
    "68_Enologia_y_Cocteleria.txt.txt",
    "108_Economia_y_Negocios.txt.txt",
    "48_Principios_de_Administracion.txt.txt",
    "90_Practica_3.txt.txt",
    "52_Contabilidad_Empresarial.txt.txt",
    "94_Zoologia_+Lab.txt.txt",
    "113_Negocios_Internacionales.txt.txt",
    "112_Gestion_del_Talento.txt.txt",
    "77_Neurociencia_y_Educacion.txt.txt",
    "95_Fisiologia_+Lab.txt.txt",
    "114_Innovacion_y_Sustentabilidad.txt.txt",
    "39_Proyectos__Gerencia_y_Analisis.txt.txt",
    "87_Practica_2.txt.txt",
    "96_Evolucion.txt.txt",
    "59_Introduccion_al_Marketing_HSP.txt.txt",
    "111_Proyectos_Empresariales.txt.txt",
    "104_Finanzas_Corporativas.txt.txt",
    "92_Proyecto_Integrador_EDU.txt.txt",
    "38_Sistemas_Operativos.txt.txt",
    "70_Gerencia_Financiera_HSP.txt.txt",
    "49_Panaderia.txt.txt",
    "71_Identidad_Culinaria.txt.txt",
    "51_Introduccion_a_la_Hospitalidad.txt.txt"
]
# Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = calculate_similarity_for_selected_documents(document_data, query_vector, selected_documents)

print(similarity_results)

                                     document  similarity
0          57_Alta_Cocina_Ecuatoriana.txt.txt    0.999265
1        66_Reposteria_y_Chocolateria.txt.txt    0.998829
2         77_Neurociencia_y_Educacion.txt.txt    0.998610
3             61_Alta_Cocina_Francesa.txt.txt    0.997772
4              71_Identidad_Culinaria.txt.txt    0.997484
5              65_Alta_Cocina_Mundial.txt.txt    0.997419
6           47_Conceptos_y_Tecnicas_1.txt.txt    0.997101
7     24_Aprendizaje_y_Servicio_PASEC.txt.txt    0.995729
8                       62_Pasteleria.txt.txt    0.995470
9                        49_Panaderia.txt.txt    0.995181
10            67_Innovacion_Culinaria.txt.txt    0.994922
11     53_Conceptos_y_Tecnicas_2_+PRA.txt.txt    0.994681
12   114_Innovacion_y_Sustentabilidad.txt.txt    0.994583
13                      87_Practica_2.txt.txt    0.993110
14    75_Planificacion_y_Evaluacion_1.txt.txt    0.992677
15            64_Practica_Culinaria_2.txt.txt    0.991894
16            

In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1']].iloc[1].values

# Select specific document names
selected_documents = ["184_Ergonomia.txt.txt",
    "11_Ser_y_Cosmos.txt.txt",
    "89_Ensenanza_Integrada_de_CITIAM.txt.txt",
    "7_Ingles_Nivel_2.txt.txt",
    "171_Barra_para_danza_contemporanea.txt.txt",
    "129_Coloquios_ART.txt.txt",
    "10_Autoconocimiento.txt.txt",
    "119_Composicion_Visual_1.txt.txt",
    "79_Coloquios_EDU.txt.txt",
    "130_Arte_y_Contexto_Social.txt.txt",
    "178_Sonido.txt.txt",
    "125_Nuevos_Medios.txt.txt",
    "72_Fundamentos_de_la_Educacion.txt.txt",
    "85_Arte_y_Educacion.txt.txt",
    "138_Enfasis_3.txt.txt",
    "127_Laboratorio_de_Creacion_2.txt.txt",
    "176_Lenguaje_Visual_y_Montaje.txt.txt",
    "78_Planificacion_y_Evaluacion_2.txt.txt",
    "80_Ensenanza_de_Lenguaje.txt.txt",
    "60_Coloquios_Gastr.txt.txt",
    "118_Fundamentos_de_las_Artes.txt.txt",
    "91_Inclusion_y_Diversidad.txt.txt",
    "117_Herramientas_Digitales_1.txt.txt",
    "83_Ensenanza_de_Matematicas.txt.txt",
    "140_Arte_y_Educacion___Curaduria.txt.txt",
    "1_Escritura_Academica.txt.txt",
    "2_Taller_de_Ing._Cs._Computacion.txt.txt",
    "123_Fundamentos_de_Escultura.txt.txt",
    "126_Arte_Contemporaneo.txt.txt",
    "86_Bilingualism.txt.txt",
    "180_DiseÃ±o_de_Produccion.txt.txt",
    "169_Improvisacion.txt.txt",
    "13_Ingles_Nivel_4.txt.txt",
    "142_Produccion_&_Exhibicion.txt.txt",
    "81_Practica_1.txt.txt",
    "82_Metodologias_de_Ensenanza.txt.txt",
    "121_Dibujo_para_Arte_y_Diseno.txt.txt",
    "45_Proyecto_Integrador_CMP.txt.txt",
    "131_Taller_de_Investigacion.txt.txt",
    "84_Ensenanza_de_Ciencias_Sociales.txt.txt",
    "132_Enfasis_1.txt.txt",
    "20_Ingles_Nivel_6.txt.txt",
    "134_Enfasis_2.txt.txt",
    "174_Proyecto_final_en_Danza.txt.txt",
    "173_Performance.txt.txt",
    "41_Coloquios_ING.txt.txt",
    "88_Ensenanza_de_Ciencias.txt.txt",
    "29_Teoria_de_la_Computacion.txt.txt",
    "73_Teorias_del_Aprendizaje.txt.txt",
    "74_Desarrollo__NiÃ±o_y_Adolescente.txt.txt",
    "40_Practica_Pre-Profesional_PASEM.txt.txt",
    "172_Composicion.txt.txt",
    "136_Laboratorio_de_Creacion_3.txt.txt",
    "5_Cosmos.txt.txt",
    "122_Fotografia_1.txt.txt",
    "128_Taller_de_Arte_1.txt.txt",
    "170_Danza_Moderna_2.txt.txt",
    "103_Coloquios_adm.txt.txt",
    "120_Laboratorio_de_Creacion_1.txt.txt",
    "175_Lenguaje_del_Cine.txt.txt",
    "19_Ingles_Nivel_5.txt.txt",
    "177_Cinematografia.txt.txt",
    "133_Taller_de_Arte_2.txt.txt",
    "6_Ingles_Nivel_1.txt.txt",
    "179_Storytelling.txt.txt",
    "139_Taller_de_Arte_3.txt.txt",
    "141_Temas_en_Comunicacion_y_Arte.txt.txt",
    "124_Teoria_Critica_1__Arte_&_Media.txt.txt"
]
# Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = calculate_similarity_for_selected_documents(document_data, query_vector, selected_documents)

print(similarity_results)

                                  document  similarity
0             128_Taller_de_Arte_1.txt.txt    1.000000
1      29_Teoria_de_la_Computacion.txt.txt    0.999998
2      131_Taller_de_Investigacion.txt.txt    0.999997
3    136_Laboratorio_de_Creacion_3.txt.txt    0.999992
4             139_Taller_de_Arte_3.txt.txt    0.999984
..                                     ...         ...
61      73_Teorias_del_Aprendizaje.txt.txt    0.979684
62     83_Ensenanza_de_Matematicas.txt.txt    0.977883
63  72_Fundamentos_de_la_Educacion.txt.txt    0.977847
64        88_Ensenanza_de_Ciencias.txt.txt    0.974614
65    82_Metodologias_de_Ensenanza.txt.txt    0.973408

[66 rows x 2 columns]


In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1']].iloc[2].values

# Select specific document names
selected_documents = ["184_Ergonomia.txt.txt",
    "11_Ser_y_Cosmos.txt.txt",
    "89_Ensenanza_Integrada_de_CITIAM.txt.txt",
    "7_Ingles_Nivel_2.txt.txt",
    "171_Barra_para_danza_contemporanea.txt.txt",
    "129_Coloquios_ART.txt.txt",
    "10_Autoconocimiento.txt.txt",
    "119_Composicion_Visual_1.txt.txt",
    "79_Coloquios_EDU.txt.txt",
    "130_Arte_y_Contexto_Social.txt.txt",
    "178_Sonido.txt.txt",
    "125_Nuevos_Medios.txt.txt",
    "72_Fundamentos_de_la_Educacion.txt.txt",
    "85_Arte_y_Educacion.txt.txt",
    "138_Enfasis_3.txt.txt",
    "127_Laboratorio_de_Creacion_2.txt.txt",
    "176_Lenguaje_Visual_y_Montaje.txt.txt",
    "78_Planificacion_y_Evaluacion_2.txt.txt",
    "80_Ensenanza_de_Lenguaje.txt.txt",
    "60_Coloquios_Gastr.txt.txt",
    "118_Fundamentos_de_las_Artes.txt.txt",
    "91_Inclusion_y_Diversidad.txt.txt",
    "117_Herramientas_Digitales_1.txt.txt",
    "83_Ensenanza_de_Matematicas.txt.txt",
    "140_Arte_y_Educacion___Curaduria.txt.txt",
    "1_Escritura_Academica.txt.txt",
    "2_Taller_de_Ing._Cs._Computacion.txt.txt",
    "123_Fundamentos_de_Escultura.txt.txt",
    "126_Arte_Contemporaneo.txt.txt",
    "86_Bilingualism.txt.txt",
    "180_DiseÃ±o_de_Produccion.txt.txt",
    "169_Improvisacion.txt.txt",
    "13_Ingles_Nivel_4.txt.txt",
    "142_Produccion_&_Exhibicion.txt.txt",
    "81_Practica_1.txt.txt",
    "82_Metodologias_de_Ensenanza.txt.txt",
    "121_Dibujo_para_Arte_y_Diseno.txt.txt",
    "45_Proyecto_Integrador_CMP.txt.txt",
    "131_Taller_de_Investigacion.txt.txt",
    "84_Ensenanza_de_Ciencias_Sociales.txt.txt",
    "132_Enfasis_1.txt.txt",
    "20_Ingles_Nivel_6.txt.txt",
    "134_Enfasis_2.txt.txt",
    "174_Proyecto_final_en_Danza.txt.txt",
    "173_Performance.txt.txt",
    "41_Coloquios_ING.txt.txt",
    "88_Ensenanza_de_Ciencias.txt.txt",
    "29_Teoria_de_la_Computacion.txt.txt",
    "73_Teorias_del_Aprendizaje.txt.txt",
    "74_Desarrollo__NiÃ±o_y_Adolescente.txt.txt",
    "40_Practica_Pre-Profesional_PASEM.txt.txt",
    "172_Composicion.txt.txt",
    "136_Laboratorio_de_Creacion_3.txt.txt",
    "5_Cosmos.txt.txt",
    "122_Fotografia_1.txt.txt",
    "128_Taller_de_Arte_1.txt.txt",
    "170_Danza_Moderna_2.txt.txt",
    "103_Coloquios_adm.txt.txt",
    "120_Laboratorio_de_Creacion_1.txt.txt",
    "175_Lenguaje_del_Cine.txt.txt",
    "19_Ingles_Nivel_5.txt.txt",
    "177_Cinematografia.txt.txt",
    "133_Taller_de_Arte_2.txt.txt",
    "6_Ingles_Nivel_1.txt.txt",
    "179_Storytelling.txt.txt",
    "139_Taller_de_Arte_3.txt.txt",
    "141_Temas_en_Comunicacion_y_Arte.txt.txt",
    "124_Teoria_Critica_1__Arte_&_Media.txt.txt"
]

  # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = calculate_similarity_for_selected_documents(document_data, query_vector, selected_documents)

print(similarity_results)

                                    document  similarity
0                   79_Coloquios_EDU.txt.txt    0.999999
1    78_Planificacion_y_Evaluacion_2.txt.txt    0.999996
2   89_Ensenanza_Integrada_de_CITIAM.txt.txt    0.999994
3           80_Ensenanza_de_Lenguaje.txt.txt    0.999990
4                    86_Bilingualism.txt.txt    0.999971
..                                       ...         ...
61                  7_Ingles_Nivel_2.txt.txt    0.974591
62     176_Lenguaje_Visual_y_Montaje.txt.txt    0.967634
63      117_Herramientas_Digitales_1.txt.txt    0.966822
64             175_Lenguaje_del_Cine.txt.txt    0.962971
65                  179_Storytelling.txt.txt    0.960581

[66 rows x 2 columns]


In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1']].iloc[3].values

# Select specific document names
selected_documents = [
    "185_Ingenieria_de_la_Calidad_+_Lab.txt.txt",
    "34_Inteligencia_Artificial.txt.txt",
    "3_Calculo_Diferencial_+_Ej.txt.txt",
    "155_Logica_y_Teoria_de_Conjuntos.txt.txt",
    "26_Electronica_Basica_+Lab.txt.txt",
    "101_Analisis_de_Datos.txt.txt",
    "161_Algebra_Lineal_2.txt.txt",
    "183_Procesos,_Metodos_y_Estandares.txt.txt",
    "35_Base_de_Datos.txt.txt",
    "44_Aplicaciones_Distribuidas.txt.txt",
    "151_Ecuaciones_Diferenciales.txt.txt",
    "18_Calculo_Vectorial.txt.txt",
    "36_Aprendizaje_Automatico.txt.txt",
    "28_Probabilidad_y_Estadistica_+Ej.txt.txt",
    "58_Introduccion_a_la_Biologia_+Ej.txt.txt",
    "181_Inv._de_Operaciones_1_+Lab.txt.txt",
    "186_Sistemas_Lean.txt.txt",
    "4_Quimica_General_1_+Lab_Ej.txt.txt",
    "42_Seguridad_Informatica.txt.txt",
    "162_Algebra_Abstracta_1.txt.txt",
    "17_Fisica_para_Ingenieria_1_+Lab_Ej.txt.txt",
    "30_Programacion_Avanzada_de_Apps.txt.txt",
    "164_Analisis_Funcional.txt.txt",
    "153_Introduccion_a_Probabilidades.txt.txt",
    "150_Variable_Compleja.txt.txt",
    "165_Algebra_Abstracta_2.txt.txt",
    "15_Matematicas_Discretas.txt.txt",
    "12_Ingles_Nivel_3.txt.txt",
    "163_Geometria_Diferencial.txt.txt",
    "22_Algebra_Lineal_1_+Ej.txt.txt",
    "21_Programacion_de_Apps.txt.txt",
    "137_Matematicas_Cotidianas.txt.txt",
    "14_Programacion_Avanzada_en_C++.txt.txt",
    "154_Fundamentos_de_Geometria.txt.txt",
    "43_Mineria_de_Datos.txt.txt",
    "149_Teoria_de_Grupos.txt.txt",
    "152_Calculo_para_Ciencias_2.txt.txt",
    "8_Programacion_en_C++_+Ej.txt.txt",
    "143_Programacion_Para_DiseÃ±o_1.txt.txt",
    "98_Estadistica_Empresarial_+Lab.txt.txt",
    "160_Ecuaciones_Diferenciales_Parciales.txt.txt",
    "76_Estadistica_para_CCSS.txt.txt",
    "9_Calculo_Integral_+_Ej.txt.txt",
    "23_Fisica_para_Ingenieria_2_+Lab_Ej.txt.txt",
    "107_Operaciones_+Lab.txt.txt",
    "145_Programacion_Para_DiseÃ±o_3.txt.txt",
    "166_Topologia_1.txt.txt",
    "148_Calculo_para_Ciencias_1.txt.txt",
    "31_Organizacion_de_Computadores.txt.txt",
    "46_Matematica_Empresarial_+Ej.txt.txt",
    "147_Modelado_3D_1.txt.txt",
    "27_Estructuras_de_Datos.txt.txt",
    "157_Analisis_Numerico.txt.txt",
    "156_Inferencia_Estadistica.txt.txt",
    "168_Analisis_Real.txt.txt",
    "144_Programacion_Para_DiseÃ±o_2.txt.txt",
    "159_Combinatoria_y_Grafos.txt.txt",
    "37_Redes_+Lab.txt.txt",
    "146_Juegos_y_Narrativa.txt.txt",
    "167_Topologia_2.txt.txt",
    "33_DiseÃ±o_de_Sistemas.txt.txt",
    "158_Teoria_de_Numeros.txt.txt"
]

# Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = calculate_similarity_for_selected_documents(document_data, query_vector, selected_documents)

print(similarity_results)


                                          document  similarity
0                   146_Juegos_y_Narrativa.txt.txt    1.000000
1                        147_Modelado_3D_1.txt.txt    1.000000
2              4_Quimica_General_1_+Lab_Ej.txt.txt    0.999988
3                         35_Base_de_Datos.txt.txt    0.999977
4        58_Introduccion_a_la_Biologia_+Ej.txt.txt    0.999934
5            46_Matematica_Empresarial_+Ej.txt.txt    0.999917
6                  21_Programacion_de_Apps.txt.txt    0.999867
7      23_Fisica_para_Ingenieria_2_+Lab_Ej.txt.txt    0.999821
8                    101_Analisis_de_Datos.txt.txt    0.999781
9          98_Estadistica_Empresarial_+Lab.txt.txt    0.999748
10              26_Electronica_Basica_+Lab.txt.txt    0.999727
11               8_Programacion_en_C++_+Ej.txt.txt    0.999715
12              34_Inteligencia_Artificial.txt.txt    0.999704
13                 27_Estructuras_de_Datos.txt.txt    0.999656
14         14_Programacion_Avanzada_en_C++.txt.txt    0

In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1']].iloc[4].values

# Select specific document names
selected_documents = ["184_Ergonomia.txt.txt",
    "11_Ser_y_Cosmos.txt.txt",
    "89_Ensenanza_Integrada_de_CITIAM.txt.txt",
    "7_Ingles_Nivel_2.txt.txt",
    "171_Barra_para_danza_contemporanea.txt.txt",
    "129_Coloquios_ART.txt.txt",
    "10_Autoconocimiento.txt.txt",
    "119_Composicion_Visual_1.txt.txt",
    "79_Coloquios_EDU.txt.txt",
    "130_Arte_y_Contexto_Social.txt.txt",
    "178_Sonido.txt.txt",
    "125_Nuevos_Medios.txt.txt",
    "72_Fundamentos_de_la_Educacion.txt.txt",
    "85_Arte_y_Educacion.txt.txt",
    "138_Enfasis_3.txt.txt",
    "127_Laboratorio_de_Creacion_2.txt.txt",
    "176_Lenguaje_Visual_y_Montaje.txt.txt",
    "78_Planificacion_y_Evaluacion_2.txt.txt",
    "80_Ensenanza_de_Lenguaje.txt.txt",
    "60_Coloquios_Gastr.txt.txt",
    "118_Fundamentos_de_las_Artes.txt.txt",
    "91_Inclusion_y_Diversidad.txt.txt",
    "117_Herramientas_Digitales_1.txt.txt",
    "83_Ensenanza_de_Matematicas.txt.txt",
    "140_Arte_y_Educacion___Curaduria.txt.txt",
    "1_Escritura_Academica.txt.txt",
    "2_Taller_de_Ing._Cs._Computacion.txt.txt",
    "123_Fundamentos_de_Escultura.txt.txt",
    "126_Arte_Contemporaneo.txt.txt",
    "86_Bilingualism.txt.txt",
    "180_DiseÃ±o_de_Produccion.txt.txt",
    "169_Improvisacion.txt.txt",
    "13_Ingles_Nivel_4.txt.txt",
    "142_Produccion_&_Exhibicion.txt.txt",
    "81_Practica_1.txt.txt",
    "82_Metodologias_de_Ensenanza.txt.txt",
    "121_Dibujo_para_Arte_y_Diseno.txt.txt",
    "45_Proyecto_Integrador_CMP.txt.txt",
    "131_Taller_de_Investigacion.txt.txt",
    "84_Ensenanza_de_Ciencias_Sociales.txt.txt",
    "132_Enfasis_1.txt.txt",
    "20_Ingles_Nivel_6.txt.txt",
    "134_Enfasis_2.txt.txt",
    "174_Proyecto_final_en_Danza.txt.txt",
    "173_Performance.txt.txt",
    "41_Coloquios_ING.txt.txt",
    "88_Ensenanza_de_Ciencias.txt.txt",
    "29_Teoria_de_la_Computacion.txt.txt",
    "73_Teorias_del_Aprendizaje.txt.txt",
    "74_Desarrollo__NiÃ±o_y_Adolescente.txt.txt",
    "40_Practica_Pre-Profesional_PASEM.txt.txt",
    "172_Composicion.txt.txt",
    "136_Laboratorio_de_Creacion_3.txt.txt",
    "5_Cosmos.txt.txt",
    "122_Fotografia_1.txt.txt",
    "128_Taller_de_Arte_1.txt.txt",
    "170_Danza_Moderna_2.txt.txt",
    "103_Coloquios_adm.txt.txt",
    "120_Laboratorio_de_Creacion_1.txt.txt",
    "175_Lenguaje_del_Cine.txt.txt",
    "19_Ingles_Nivel_5.txt.txt",
    "177_Cinematografia.txt.txt",
    "133_Taller_de_Arte_2.txt.txt",
    "6_Ingles_Nivel_1.txt.txt",
    "179_Storytelling.txt.txt",
    "139_Taller_de_Arte_3.txt.txt",
    "141_Temas_en_Comunicacion_y_Arte.txt.txt",
    "124_Teoria_Critica_1__Arte_&_Media.txt.txt"
]

# Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = calculate_similarity_for_selected_documents(document_data, query_vector, selected_documents)

print(similarity_results)


                                     document  similarity
0          73_Teorias_del_Aprendizaje.txt.txt    0.999998
1                 85_Arte_y_Educacion.txt.txt    0.999958
2         83_Ensenanza_de_Matematicas.txt.txt    0.999942
3      72_Fundamentos_de_la_Educacion.txt.txt    0.999940
4   84_Ensenanza_de_Ciencias_Sociales.txt.txt    0.999913
..                                        ...         ...
61                   7_Ingles_Nivel_2.txt.txt    0.960069
62      176_Lenguaje_Visual_y_Montaje.txt.txt    0.951490
63       117_Herramientas_Digitales_1.txt.txt    0.950501
64              175_Lenguaje_del_Cine.txt.txt    0.945842
65                   179_Storytelling.txt.txt    0.942972

[66 rows x 2 columns]


## Manhattan ##

In [None]:
# Example usage
document_csv = "/content/Umap_2/umap_ncomp2_nneigh20_mindist0.7_metriccosine.csv"
query_csv = "/content/Umap_2/umap_query_ncomp2_nneigh20_mindist0.7_metriccosine.csv"

# Load data
document_data = pd.read_csv(document_csv)
query_data = pd.read_csv(query_csv)

In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1']].iloc[0].values

# Select specific document names
selected_documents = [
    "105_Marketing_Digital.txt.txt",
    "32_Emprendimiento.txt.txt",
    "24_Aprendizaje_y_Servicio_PASEC.txt.txt",
    "65_Alta_Cocina_Mundial.txt.txt",
    "110_Creatividad_Empresarial.txt.txt",
    "63_Servicio_y_Protocolo.txt.txt",
    "67_Innovacion_Culinaria.txt.txt",
    "116_Analisis_Estrategico_ADM.txt.txt",
    "47_Conceptos_y_Tecnicas_1.txt.txt",
    "62_Pasteleria.txt.txt",
    "100_Principios_de_Seguros.txt.txt",
    "56_Carniceria.txt.txt",
    "102_Principios_de_Finanzas.txt.txt",
    "25_Cultura_Gastronomica.txt.txt",
    "55_Administracion_de_A_&_B.txt.txt",
    "64_Practica_Culinaria_2.txt.txt",
    "135_Gestion_y_Produccion_Cultural.txt.txt",
    "99_Gerencia_de_Costos.txt.txt",
    "66_Reposteria_y_Chocolateria.txt.txt",
    "53_Conceptos_y_Tecnicas_2_+PRA.txt.txt",
    "54_Practica_Culinaria_1.txt.txt",
    "50_Nutricion_Humana_+Lab.txt.txt",
    "182_Control_de_Produccion.txt.txt",
    "106_Negociacion.txt.txt",
    "115_Tributacion_y_Entorno_Legal.txt.txt",
    "93_Practica_4.txt.txt",
    "69_Administracion_de_Eventos.txt.txt",
    "61_Alta_Cocina_Francesa.txt.txt",
    "97_Principios_de_Marketing.txt.txt",
    "109_Investigacion_de_Mercados.txt.txt",
    "57_Alta_Cocina_Ecuatoriana.txt.txt",
    "16_Introduccion_a_la_Economia.txt.txt",
    "75_Planificacion_y_Evaluacion_1.txt.txt",
    "68_Enologia_y_Cocteleria.txt.txt",
    "108_Economia_y_Negocios.txt.txt",
    "48_Principios_de_Administracion.txt.txt",
    "90_Practica_3.txt.txt",
    "52_Contabilidad_Empresarial.txt.txt",
    "94_Zoologia_+Lab.txt.txt",
    "113_Negocios_Internacionales.txt.txt",
    "112_Gestion_del_Talento.txt.txt",
    "77_Neurociencia_y_Educacion.txt.txt",
    "95_Fisiologia_+Lab.txt.txt",
    "114_Innovacion_y_Sustentabilidad.txt.txt",
    "39_Proyectos__Gerencia_y_Analisis.txt.txt",
    "87_Practica_2.txt.txt",
    "96_Evolucion.txt.txt",
    "59_Introduccion_al_Marketing_HSP.txt.txt",
    "111_Proyectos_Empresariales.txt.txt",
    "104_Finanzas_Corporativas.txt.txt",
    "92_Proyecto_Integrador_EDU.txt.txt",
    "38_Sistemas_Operativos.txt.txt",
    "70_Gerencia_Financiera_HSP.txt.txt",
    "49_Panaderia.txt.txt",
    "71_Identidad_Culinaria.txt.txt",
    "51_Introduccion_a_la_Hospitalidad.txt.txt"
]
 # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = get_similar_documents_manhattan(query_vector, document_data, selected_documents)

for i in range(len(similarity_results)):
    print(f"Document: {similarity_results[i][0]}, Distance: {similarity_results[i][1]}")

Document: 57_Alta_Cocina_Ecuatoriana.txt.txt, Distance: 0.6015510000000006
Document: 61_Alta_Cocina_Francesa.txt.txt, Distance: 1.1327442999999988
Document: 65_Alta_Cocina_Mundial.txt.txt, Distance: 1.2494280000000009
Document: 71_Identidad_Culinaria.txt.txt, Distance: 1.3709700000000016
Document: 53_Conceptos_y_Tecnicas_2_+PRA.txt.txt, Distance: 1.6911047000000012
Document: 47_Conceptos_y_Tecnicas_1.txt.txt, Distance: 1.733358
Document: 67_Innovacion_Culinaria.txt.txt, Distance: 1.7846484
Document: 49_Panaderia.txt.txt, Distance: 1.8068969999999993
Document: 114_Innovacion_y_Sustentabilidad.txt.txt, Distance: 2.014462000000001
Document: 24_Aprendizaje_y_Servicio_PASEC.txt.txt, Distance: 2.137798
Document: 68_Enologia_y_Cocteleria.txt.txt, Distance: 2.1540980000000003
Document: 64_Practica_Culinaria_2.txt.txt, Distance: 2.1562580000000002
Document: 54_Practica_Culinaria_1.txt.txt, Distance: 2.1701485000000007
Document: 66_Reposteria_y_Chocolateria.txt.txt, Distance: 2.2976031999999993


In [None]:
query_vector = query_data[['0', '1']].iloc[1].values

# Select specific document names
selected_documents = ["184_Ergonomia.txt.txt",
    "11_Ser_y_Cosmos.txt.txt",
    "89_Ensenanza_Integrada_de_CITIAM.txt.txt",
    "7_Ingles_Nivel_2.txt.txt",
    "171_Barra_para_danza_contemporanea.txt.txt",
    "129_Coloquios_ART.txt.txt",
    "10_Autoconocimiento.txt.txt",
    "119_Composicion_Visual_1.txt.txt",
    "79_Coloquios_EDU.txt.txt",
    "130_Arte_y_Contexto_Social.txt.txt",
    "178_Sonido.txt.txt",
    "125_Nuevos_Medios.txt.txt",
    "72_Fundamentos_de_la_Educacion.txt.txt",
    "85_Arte_y_Educacion.txt.txt",
    "138_Enfasis_3.txt.txt",
    "127_Laboratorio_de_Creacion_2.txt.txt",
    "176_Lenguaje_Visual_y_Montaje.txt.txt",
    "78_Planificacion_y_Evaluacion_2.txt.txt",
    "80_Ensenanza_de_Lenguaje.txt.txt",
    "60_Coloquios_Gastr.txt.txt",
    "118_Fundamentos_de_las_Artes.txt.txt",
    "91_Inclusion_y_Diversidad.txt.txt",
    "117_Herramientas_Digitales_1.txt.txt",
    "83_Ensenanza_de_Matematicas.txt.txt",
    "140_Arte_y_Educacion___Curaduria.txt.txt",
    "1_Escritura_Academica.txt.txt",
    "2_Taller_de_Ing._Cs._Computacion.txt.txt",
    "123_Fundamentos_de_Escultura.txt.txt",
    "126_Arte_Contemporaneo.txt.txt",
    "86_Bilingualism.txt.txt",
    "180_DiseÃ±o_de_Produccion.txt.txt",
    "169_Improvisacion.txt.txt",
    "13_Ingles_Nivel_4.txt.txt",
    "142_Produccion_&_Exhibicion.txt.txt",
    "81_Practica_1.txt.txt",
    "82_Metodologias_de_Ensenanza.txt.txt",
    "121_Dibujo_para_Arte_y_Diseno.txt.txt",
    "45_Proyecto_Integrador_CMP.txt.txt",
    "131_Taller_de_Investigacion.txt.txt",
    "84_Ensenanza_de_Ciencias_Sociales.txt.txt",
    "132_Enfasis_1.txt.txt",
    "20_Ingles_Nivel_6.txt.txt",
    "134_Enfasis_2.txt.txt",
    "174_Proyecto_final_en_Danza.txt.txt",
    "173_Performance.txt.txt",
    "41_Coloquios_ING.txt.txt",
    "88_Ensenanza_de_Ciencias.txt.txt",
    "29_Teoria_de_la_Computacion.txt.txt",
    "73_Teorias_del_Aprendizaje.txt.txt",
    "74_Desarrollo__NiÃ±o_y_Adolescente.txt.txt",
    "40_Practica_Pre-Profesional_PASEM.txt.txt",
    "172_Composicion.txt.txt",
    "136_Laboratorio_de_Creacion_3.txt.txt",
    "5_Cosmos.txt.txt",
    "122_Fotografia_1.txt.txt",
    "128_Taller_de_Arte_1.txt.txt",
    "170_Danza_Moderna_2.txt.txt",
    "103_Coloquios_adm.txt.txt",
    "120_Laboratorio_de_Creacion_1.txt.txt",
    "175_Lenguaje_del_Cine.txt.txt",
    "19_Ingles_Nivel_5.txt.txt",
    "177_Cinematografia.txt.txt",
    "133_Taller_de_Arte_2.txt.txt",
    "6_Ingles_Nivel_1.txt.txt",
    "179_Storytelling.txt.txt",
    "139_Taller_de_Arte_3.txt.txt",
    "141_Temas_en_Comunicacion_y_Arte.txt.txt",
    "124_Teoria_Critica_1__Arte_&_Media.txt.txt"
]
# Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = get_similar_documents_manhattan(query_vector, document_data, selected_documents)

for i in range(len(similarity_results)):
    print(f"Document: {similarity_results[i][0]}, Distance: {similarity_results[i][1]}")

Document: 172_Composicion.txt.txt, Distance: 0.7792937000000002
Document: 170_Danza_Moderna_2.txt.txt, Distance: 0.9084595000000011
Document: 169_Improvisacion.txt.txt, Distance: 0.9917580000000008
Document: 119_Composicion_Visual_1.txt.txt, Distance: 1.2326513000000006
Document: 173_Performance.txt.txt, Distance: 1.3686030000000002
Document: 122_Fotografia_1.txt.txt, Distance: 1.5272016000000006
Document: 126_Arte_Contemporaneo.txt.txt, Distance: 1.635685500000001
Document: 171_Barra_para_danza_contemporanea.txt.txt, Distance: 1.7619964000000001
Document: 177_Cinematografia.txt.txt, Distance: 1.8001930000000002
Document: 132_Enfasis_1.txt.txt, Distance: 1.8473630000000005
Document: 134_Enfasis_2.txt.txt, Distance: 2.1521063000000007
Document: 123_Fundamentos_de_Escultura.txt.txt, Distance: 2.4684705000000005
Document: 136_Laboratorio_de_Creacion_3.txt.txt, Distance: 2.5033059999999994
Document: 128_Taller_de_Arte_1.txt.txt, Distance: 2.5299309999999995
Document: 127_Laboratorio_de_Cre

In [None]:
query_vector = query_data[['0', '1']].iloc[2].values

# Select specific document names
selected_documents = ["184_Ergonomia.txt.txt",
    "11_Ser_y_Cosmos.txt.txt",
    "89_Ensenanza_Integrada_de_CITIAM.txt.txt",
    "7_Ingles_Nivel_2.txt.txt",
    "171_Barra_para_danza_contemporanea.txt.txt",
    "129_Coloquios_ART.txt.txt",
    "10_Autoconocimiento.txt.txt",
    "119_Composicion_Visual_1.txt.txt",
    "79_Coloquios_EDU.txt.txt",
    "130_Arte_y_Contexto_Social.txt.txt",
    "178_Sonido.txt.txt",
    "125_Nuevos_Medios.txt.txt",
    "72_Fundamentos_de_la_Educacion.txt.txt",
    "85_Arte_y_Educacion.txt.txt",
    "138_Enfasis_3.txt.txt",
    "127_Laboratorio_de_Creacion_2.txt.txt",
    "176_Lenguaje_Visual_y_Montaje.txt.txt",
    "78_Planificacion_y_Evaluacion_2.txt.txt",
    "80_Ensenanza_de_Lenguaje.txt.txt",
    "60_Coloquios_Gastr.txt.txt",
    "118_Fundamentos_de_las_Artes.txt.txt",
    "91_Inclusion_y_Diversidad.txt.txt",
    "117_Herramientas_Digitales_1.txt.txt",
    "83_Ensenanza_de_Matematicas.txt.txt",
    "140_Arte_y_Educacion___Curaduria.txt.txt",
    "1_Escritura_Academica.txt.txt",
    "2_Taller_de_Ing._Cs._Computacion.txt.txt",
    "123_Fundamentos_de_Escultura.txt.txt",
    "126_Arte_Contemporaneo.txt.txt",
    "86_Bilingualism.txt.txt",
    "180_DiseÃ±o_de_Produccion.txt.txt",
    "169_Improvisacion.txt.txt",
    "13_Ingles_Nivel_4.txt.txt",
    "142_Produccion_&_Exhibicion.txt.txt",
    "81_Practica_1.txt.txt",
    "82_Metodologias_de_Ensenanza.txt.txt",
    "121_Dibujo_para_Arte_y_Diseno.txt.txt",
    "45_Proyecto_Integrador_CMP.txt.txt",
    "131_Taller_de_Investigacion.txt.txt",
    "84_Ensenanza_de_Ciencias_Sociales.txt.txt",
    "132_Enfasis_1.txt.txt",
    "20_Ingles_Nivel_6.txt.txt",
    "134_Enfasis_2.txt.txt",
    "174_Proyecto_final_en_Danza.txt.txt",
    "173_Performance.txt.txt",
    "41_Coloquios_ING.txt.txt",
    "88_Ensenanza_de_Ciencias.txt.txt",
    "29_Teoria_de_la_Computacion.txt.txt",
    "73_Teorias_del_Aprendizaje.txt.txt",
    "74_Desarrollo__NiÃ±o_y_Adolescente.txt.txt",
    "40_Practica_Pre-Profesional_PASEM.txt.txt",
    "172_Composicion.txt.txt",
    "136_Laboratorio_de_Creacion_3.txt.txt",
    "5_Cosmos.txt.txt",
    "122_Fotografia_1.txt.txt",
    "128_Taller_de_Arte_1.txt.txt",
    "170_Danza_Moderna_2.txt.txt",
    "103_Coloquios_adm.txt.txt",
    "120_Laboratorio_de_Creacion_1.txt.txt",
    "175_Lenguaje_del_Cine.txt.txt",
    "19_Ingles_Nivel_5.txt.txt",
    "177_Cinematografia.txt.txt",
    "133_Taller_de_Arte_2.txt.txt",
    "6_Ingles_Nivel_1.txt.txt",
    "179_Storytelling.txt.txt",
    "139_Taller_de_Arte_3.txt.txt",
    "141_Temas_en_Comunicacion_y_Arte.txt.txt",
    "124_Teoria_Critica_1__Arte_&_Media.txt.txt"
]

  # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = get_similar_documents_manhattan(query_vector, document_data, selected_documents)

for i in range(len(similarity_results)):
    print(f"Document: {similarity_results[i][0]}, Distance: {similarity_results[i][1]}")

Document: 124_Teoria_Critica_1__Arte_&_Media.txt.txt, Distance: 0.41355399999999953
Document: 89_Ensenanza_Integrada_de_CITIAM.txt.txt, Distance: 0.8298549999999985
Document: 125_Nuevos_Medios.txt.txt, Distance: 1.0269044999999997
Document: 84_Ensenanza_de_Ciencias_Sociales.txt.txt, Distance: 1.111001
Document: 138_Enfasis_3.txt.txt, Distance: 1.1511382999999995
Document: 85_Arte_y_Educacion.txt.txt, Distance: 1.3391639999999985
Document: 134_Enfasis_2.txt.txt, Distance: 1.473509700000001
Document: 141_Temas_en_Comunicacion_y_Arte.txt.txt, Distance: 1.5068669999999997
Document: 79_Coloquios_EDU.txt.txt, Distance: 1.6370360000000002
Document: 11_Ser_y_Cosmos.txt.txt, Distance: 1.6673789999999986
Document: 132_Enfasis_1.txt.txt, Distance: 1.7782530000000012
Document: 129_Coloquios_ART.txt.txt, Distance: 1.7889147000000003
Document: 83_Ensenanza_de_Matematicas.txt.txt, Distance: 1.8311979999999988
Document: 171_Barra_para_danza_contemporanea.txt.txt, Distance: 1.8636196000000016
Document:

In [None]:
query_vector = query_data[['0', '1']].iloc[3].values

# Select specific document names
selected_documents = [
    "185_Ingenieria_de_la_Calidad_+_Lab.txt.txt",
    "34_Inteligencia_Artificial.txt.txt",
    "3_Calculo_Diferencial_+_Ej.txt.txt",
    "155_Logica_y_Teoria_de_Conjuntos.txt.txt",
    "26_Electronica_Basica_+Lab.txt.txt",
    "101_Analisis_de_Datos.txt.txt",
    "161_Algebra_Lineal_2.txt.txt",
    "183_Procesos,_Metodos_y_Estandares.txt.txt",
    "35_Base_de_Datos.txt.txt",
    "44_Aplicaciones_Distribuidas.txt.txt",
    "151_Ecuaciones_Diferenciales.txt.txt",
    "18_Calculo_Vectorial.txt.txt",
    "36_Aprendizaje_Automatico.txt.txt",
    "28_Probabilidad_y_Estadistica_+Ej.txt.txt",
    "58_Introduccion_a_la_Biologia_+Ej.txt.txt",
    "181_Inv._de_Operaciones_1_+Lab.txt.txt",
    "186_Sistemas_Lean.txt.txt",
    "4_Quimica_General_1_+Lab_Ej.txt.txt",
    "42_Seguridad_Informatica.txt.txt",
    "162_Algebra_Abstracta_1.txt.txt",
    "17_Fisica_para_Ingenieria_1_+Lab_Ej.txt.txt",
    "30_Programacion_Avanzada_de_Apps.txt.txt",
    "164_Analisis_Funcional.txt.txt",
    "153_Introduccion_a_Probabilidades.txt.txt",
    "150_Variable_Compleja.txt.txt",
    "165_Algebra_Abstracta_2.txt.txt",
    "15_Matematicas_Discretas.txt.txt",
    "12_Ingles_Nivel_3.txt.txt",
    "163_Geometria_Diferencial.txt.txt",
    "22_Algebra_Lineal_1_+Ej.txt.txt",
    "21_Programacion_de_Apps.txt.txt",
    "137_Matematicas_Cotidianas.txt.txt",
    "14_Programacion_Avanzada_en_C++.txt.txt",
    "154_Fundamentos_de_Geometria.txt.txt",
    "43_Mineria_de_Datos.txt.txt",
    "149_Teoria_de_Grupos.txt.txt",
    "152_Calculo_para_Ciencias_2.txt.txt",
    "8_Programacion_en_C++_+Ej.txt.txt",
    "143_Programacion_Para_DiseÃ±o_1.txt.txt",
    "98_Estadistica_Empresarial_+Lab.txt.txt",
    "160_Ecuaciones_Diferenciales_Parciales.txt.txt",
    "76_Estadistica_para_CCSS.txt.txt",
    "9_Calculo_Integral_+_Ej.txt.txt",
    "23_Fisica_para_Ingenieria_2_+Lab_Ej.txt.txt",
    "107_Operaciones_+Lab.txt.txt",
    "145_Programacion_Para_DiseÃ±o_3.txt.txt",
    "166_Topologia_1.txt.txt",
    "148_Calculo_para_Ciencias_1.txt.txt",
    "31_Organizacion_de_Computadores.txt.txt",
    "46_Matematica_Empresarial_+Ej.txt.txt",
    "147_Modelado_3D_1.txt.txt",
    "27_Estructuras_de_Datos.txt.txt",
    "157_Analisis_Numerico.txt.txt",
    "156_Inferencia_Estadistica.txt.txt",
    "168_Analisis_Real.txt.txt",
    "144_Programacion_Para_DiseÃ±o_2.txt.txt",
    "159_Combinatoria_y_Grafos.txt.txt",
    "37_Redes_+Lab.txt.txt",
    "146_Juegos_y_Narrativa.txt.txt",
    "167_Topologia_2.txt.txt",
    "33_DiseÃ±o_de_Sistemas.txt.txt",
    "158_Teoria_de_Numeros.txt.txt"
]
# Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = get_similar_documents_manhattan(query_vector, document_data, selected_documents)

for i in range(len(similarity_results)):
    print(f"Document: {similarity_results[i][0]}, Distance: {similarity_results[i][1]}")

Document: 98_Estadistica_Empresarial_+Lab.txt.txt, Distance: 0.5394824000000011
Document: 46_Matematica_Empresarial_+Ej.txt.txt, Distance: 1.1340205000000005
Document: 101_Analisis_de_Datos.txt.txt, Distance: 1.2301814000000002
Document: 43_Mineria_de_Datos.txt.txt, Distance: 1.4340764000000004
Document: 42_Seguridad_Informatica.txt.txt, Distance: 1.7113304
Document: 4_Quimica_General_1_+Lab_Ej.txt.txt, Distance: 1.818687100000001
Document: 76_Estadistica_para_CCSS.txt.txt, Distance: 1.865070400000001
Document: 28_Probabilidad_y_Estadistica_+Ej.txt.txt, Distance: 2.0952680000000004
Document: 17_Fisica_para_Ingenieria_1_+Lab_Ej.txt.txt, Distance: 2.168942200000001
Document: 8_Programacion_en_C++_+Ej.txt.txt, Distance: 2.1859443
Document: 156_Inferencia_Estadistica.txt.txt, Distance: 2.403042300000001
Document: 185_Ingenieria_de_la_Calidad_+_Lab.txt.txt, Distance: 2.403571600000001
Document: 58_Introduccion_a_la_Biologia_+Ej.txt.txt, Distance: 2.4612204000000006
Document: 159_Combinatori

In [None]:
query_vector = query_data[['0', '1']].iloc[4].values

# Select specific document names
selected_documents = ["184_Ergonomia.txt.txt",
    "11_Ser_y_Cosmos.txt.txt",
    "89_Ensenanza_Integrada_de_CITIAM.txt.txt",
    "7_Ingles_Nivel_2.txt.txt",
    "171_Barra_para_danza_contemporanea.txt.txt",
    "129_Coloquios_ART.txt.txt",
    "10_Autoconocimiento.txt.txt",
    "119_Composicion_Visual_1.txt.txt",
    "79_Coloquios_EDU.txt.txt",
    "130_Arte_y_Contexto_Social.txt.txt",
    "178_Sonido.txt.txt",
    "125_Nuevos_Medios.txt.txt",
    "72_Fundamentos_de_la_Educacion.txt.txt",
    "85_Arte_y_Educacion.txt.txt",
    "138_Enfasis_3.txt.txt",
    "127_Laboratorio_de_Creacion_2.txt.txt",
    "176_Lenguaje_Visual_y_Montaje.txt.txt",
    "78_Planificacion_y_Evaluacion_2.txt.txt",
    "80_Ensenanza_de_Lenguaje.txt.txt",
    "60_Coloquios_Gastr.txt.txt",
    "118_Fundamentos_de_las_Artes.txt.txt",
    "91_Inclusion_y_Diversidad.txt.txt",
    "117_Herramientas_Digitales_1.txt.txt",
    "83_Ensenanza_de_Matematicas.txt.txt",
    "140_Arte_y_Educacion___Curaduria.txt.txt",
    "1_Escritura_Academica.txt.txt",
    "2_Taller_de_Ing._Cs._Computacion.txt.txt",
    "123_Fundamentos_de_Escultura.txt.txt",
    "126_Arte_Contemporaneo.txt.txt",
    "86_Bilingualism.txt.txt",
    "180_DiseÃ±o_de_Produccion.txt.txt",
    "169_Improvisacion.txt.txt",
    "13_Ingles_Nivel_4.txt.txt",
    "142_Produccion_&_Exhibicion.txt.txt",
    "81_Practica_1.txt.txt",
    "82_Metodologias_de_Ensenanza.txt.txt",
    "121_Dibujo_para_Arte_y_Diseno.txt.txt",
    "45_Proyecto_Integrador_CMP.txt.txt",
    "131_Taller_de_Investigacion.txt.txt",
    "84_Ensenanza_de_Ciencias_Sociales.txt.txt",
    "132_Enfasis_1.txt.txt",
    "20_Ingles_Nivel_6.txt.txt",
    "134_Enfasis_2.txt.txt",
    "174_Proyecto_final_en_Danza.txt.txt",
    "173_Performance.txt.txt",
    "41_Coloquios_ING.txt.txt",
    "88_Ensenanza_de_Ciencias.txt.txt",
    "29_Teoria_de_la_Computacion.txt.txt",
    "73_Teorias_del_Aprendizaje.txt.txt",
    "74_Desarrollo__NiÃ±o_y_Adolescente.txt.txt",
    "40_Practica_Pre-Profesional_PASEM.txt.txt",
    "172_Composicion.txt.txt",
    "136_Laboratorio_de_Creacion_3.txt.txt",
    "5_Cosmos.txt.txt",
    "122_Fotografia_1.txt.txt",
    "128_Taller_de_Arte_1.txt.txt",
    "170_Danza_Moderna_2.txt.txt",
    "103_Coloquios_adm.txt.txt",
    "120_Laboratorio_de_Creacion_1.txt.txt",
    "175_Lenguaje_del_Cine.txt.txt",
    "19_Ingles_Nivel_5.txt.txt",
    "177_Cinematografia.txt.txt",
    "133_Taller_de_Arte_2.txt.txt",
    "6_Ingles_Nivel_1.txt.txt",
    "179_Storytelling.txt.txt",
    "139_Taller_de_Arte_3.txt.txt",
    "141_Temas_en_Comunicacion_y_Arte.txt.txt",
    "124_Teoria_Critica_1__Arte_&_Media.txt.txt"
]
 # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = get_similar_documents_manhattan(query_vector, document_data, selected_documents)

for i in range(len(similarity_results)):
    print(f"Document: {similarity_results[i][0]}, Distance: {similarity_results[i][1]}")

Document: 84_Ensenanza_de_Ciencias_Sociales.txt.txt, Distance: 0.6477695000000008
Document: 88_Ensenanza_de_Ciencias.txt.txt, Distance: 1.1826355
Document: 89_Ensenanza_Integrada_de_CITIAM.txt.txt, Distance: 1.1890685000000012
Document: 85_Arte_y_Educacion.txt.txt, Distance: 1.400510500000001
Document: 124_Teoria_Critica_1__Arte_&_Media.txt.txt, Distance: 1.7132755
Document: 83_Ensenanza_de_Matematicas.txt.txt, Distance: 1.7480585000000008
Document: 79_Coloquios_EDU.txt.txt, Distance: 1.8844185000000007
Document: 11_Ser_y_Cosmos.txt.txt, Distance: 1.9147614999999991
Document: 129_Coloquios_ART.txt.txt, Distance: 2.085398200000001
Document: 73_Teorias_del_Aprendizaje.txt.txt, Distance: 2.1263845000000003
Document: 80_Ensenanza_de_Lenguaje.txt.txt, Distance: 2.2494914999999995
Document: 138_Enfasis_3.txt.txt, Distance: 2.2552638000000016
Document: 140_Arte_y_Educacion___Curaduria.txt.txt, Distance: 2.2827504999999997
Document: 141_Temas_en_Comunicacion_y_Arte.txt.txt, Distance: 2.2864405