In [None]:
!pip install umap-learn

Collecting umap-learn
  Downloading umap_learn-0.5.7-py3-none-any.whl.metadata (21 kB)
Collecting pynndescent>=0.5 (from umap-learn)
  Downloading pynndescent-0.5.13-py3-none-any.whl.metadata (6.8 kB)
Downloading umap_learn-0.5.7-py3-none-any.whl (88 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.8/88.8 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pynndescent-0.5.13-py3-none-any.whl (56 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.9/56.9 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynndescent, umap-learn
Successfully installed pynndescent-0.5.13 umap-learn-0.5.7


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

def read_and_prepare_data(file_path, normalization_method='standard'):
   """
   Read CSV, remove first column, and normalize data

   Args:
       file_path (str): Path to CSV file
       normalization_method (str): 'minmax' or 'standard'

   Returns:
       tuple: Normalized DataFrame, first column
   """
   try:
       # Read CSV file
       df = pd.read_csv(file_path)

       # Extract first column
       first_column = df.iloc[:, 0]

       # Remove first column from DataFrame
       X = df.iloc[:, 1:]

       # Normalize data
       if normalization_method == 'minmax':
           scaler = MinMaxScaler()
           X_normalized = pd.DataFrame(
               scaler.fit_transform(X),
               columns=X.columns
           )
       elif normalization_method == 'standard':
           from sklearn.preprocessing import StandardScaler
           scaler = StandardScaler()
           X_normalized = pd.DataFrame(
               scaler.fit_transform(X),
               columns=X.columns
           )
       else:
           raise ValueError("Invalid normalization method")

       return X_normalized, first_column

   except Exception as e:
       print(f"Error processing data: {e}")
       return None, None

In [None]:
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.manifold import Isomap
from itertools import product

def run_isomap_with_tsne(X_scaled, X_query, param_grid, first_column_doc, first_column_query):
    """Run Isomap with different parameter combinations and save the reduced space."""
    results = []
    min_error = float('inf')  # Track the minimum reconstruction error
    best_idx = None           # Track the index of the best parameter combination

    # Folder for saving CSVs
    output_folder = "Isomap_Results"
    os.makedirs(output_folder, exist_ok=True)

    # Convert inputs to numpy arrays if they are DataFrames
    if isinstance(X_scaled, pd.DataFrame):
        X_scaled = X_scaled.to_numpy()
    if isinstance(X_query, pd.DataFrame):
        X_query = X_query.to_numpy()

    # Get the total number of parameter combinations
    param_combinations = list(product(
        param_grid['n_components'],
        param_grid['n_neighbors']
    ))

    # Iterate through each combination of n_components and n_neighbors
    for idx, (n_components, n_neighbors) in enumerate(param_combinations):
        try:
            # Validate parameters
            if n_components > X_scaled.shape[1]:
                print(f"Skipping n_components={n_components}: exceeds feature dimensions.")
                continue
            if n_neighbors >= X_scaled.shape[0]:
                print(f"Skipping n_neighbors={n_neighbors}: exceeds number of data points.")
                continue

            # Run Isomap with the new parameters
            isomap_model = Isomap(
                n_neighbors=n_neighbors,
                n_components=n_components
            )
            isomap_transformed_data = isomap_model.fit_transform(X_scaled)

            # Create DataFrame with first column
            output_df = pd.DataFrame(isomap_transformed_data)
            output_df.insert(0, 'original_column', first_column_doc)

            # Save the reduced space to a CSV file
            file_name = f"isomap_ncomp{n_components}_nneigh{n_neighbors}.csv"
            file_path = os.path.join(output_folder, file_name)
            pd.DataFrame(output_df).to_csv(file_path, index=False)
            print(f"Saved Isomap doc result to {file_path}")

            # Transform the query data using the same Isomap model
            isomap_transformed_data_query = isomap_model.transform(X_query)

            output_df_query = pd.DataFrame(isomap_transformed_data_query)
            output_df_query.insert(0, 'original_column', first_column_query)

            file_name_query = f"isomap_query_ncomp{n_components}_nneigh{n_neighbors}.csv"
            file_path_query = os.path.join(output_folder, file_name_query)
            pd.DataFrame(output_df_query).to_csv(file_path_query, index=False)
            print(f"Saved Isomap query result to {file_path_query}")

        except ValueError as ve:
            print(f"ValueError for n_components={n_components}, n_neighbors={n_neighbors}: {str(ve)}")
        except Exception as e:
            print(f"Unexpected error for n_components={n_components}, n_neighbors={n_neighbors}: {str(e)}")

    return results


In [None]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min
from sklearn.preprocessing import StandardScaler

def cluster_documents_and_assign_queries(doc_csv, query_csv, n_clusters, output_csv):
    """
    Clusters documents using K-Means and assigns queries to the closest cluster.

    Parameters:
    - doc_csv: Path to the CSV file containing reduced dimensionality of documents.
    - query_csv: Path to the CSV file containing reduced dimensionality of queries.
    - n_clusters: Number of clusters for K-Means.
    - output_csv: File path to save query-to-cluster assignment results.
    """
    # Load the reduced dimensions for documents and queries
    doc_data = pd.read_csv(doc_csv)
    query_data = pd.read_csv(query_csv)

    # Extract feature vectors and document/query names
    document_vectors = doc_data[['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
                                 '11', '12', '13', '14', '15', '16', '17', '18', '19']].values

    document_names = doc_data['original_column'].values  # Document names

    query_vectors = query_data[['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
                                 '11', '12', '13', '14', '15', '16', '17', '18', '19']].values
    query_names = query_data['original_column'].values  # Query names

    # Apply K-Means clustering to documents
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    kmeans.fit(document_vectors)

    # Get the cluster assignments for documents
    doc_clusters = kmeans.labels_

    # Assign queries to the closest cluster
    query_cluster_indices, _ = pairwise_distances_argmin_min(query_vectors, kmeans.cluster_centers_)

    # Prepare query-cluster matches
    query_matches = []
    for query_name, query_cluster_idx in zip(query_names, query_cluster_indices):
        # Get documents belonging to the same cluster
        matched_docs = [
            document_names[idx] for idx, cluster in enumerate(doc_clusters) if cluster == query_cluster_idx
        ]
        query_matches.append({
            'Query': query_name,
            'Assigned_Cluster': query_cluster_idx,
            'Matched_Documents': ', '.join(matched_docs)  # Join matched document names
        })

    # Save results to a CSV
    matches_df = pd.DataFrame(query_matches)
    matches_df.to_csv(output_csv, index=False)
    print(f"Query-to-cluster assignments saved to {output_csv}")


# For tf_idf test best isomap:
20 comp,

40 neigh,

## Data preparation ##

In [None]:
file_path_documents = 'tf_idf_documentos.csv'
X_doc, first_column_doc = read_and_prepare_data(file_path_documents, 'standard')

file_path_queries = 'queries_vector.csv'
X_queries, first_column_queries = read_and_prepare_data(file_path_queries, 'standard')

## Isomap ##

In [None]:
param_grid = {
    'n_components': [20],
    'n_neighbors': [40]
}

run_isomap_with_tsne(X_doc, X_queries, param_grid, first_column_doc, first_column_queries)

Saved Isomap doc result to Isomap_Results/isomap_ncomp20_nneigh40.csv
Saved Isomap query result to Isomap_Results/isomap_query_ncomp20_nneigh40.csv


[]

## Clustering ##

In [None]:
# Example usage
doc_csv = "/content/Isomap_Results/isomap_ncomp20_nneigh40.csv"   # CSV with reduced dimensions of documents
query_csv = "/content/Isomap_Results/isomap_query_ncomp20_nneigh40.csv"   # CSV with reduced dimensions of queries
output_csv = "query_cluster_matches_isomap.csv"  # Output CSV with matches
n_clusters = 22  # Number of neighbors for KNN

cluster_documents_and_assign_queries(doc_csv, query_csv, n_clusters, output_csv)


Query-to-cluster assignments saved to query_cluster_matches_isomap.csv


## Similarities with clustering results ##

In [None]:
import pandas as pd
import numpy as np

# Load the queries CSV
query_csv = "/content/Isomap_Results/isomap_ncomp20_nneigh40.csv"
query_data = pd.read_csv(query_csv)

document_csv = "/content/Isomap_Results/isomap_query_ncomp20_nneigh40.csv"
document_data = pd.read_csv(document_csv)


In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

def calculate_similarity_for_selected_documents(documents_df, query_vector, selected_documents):
    # Filter the DataFrame to include only the selected documents
    selected_df = documents_df[documents_df['original_column'].isin(selected_documents)]

    # Extract the vectors of the selected documents
    selected_vectors = selected_df.iloc[:, 1:].values  # Excludes 'original_column'

    # Compute cosine similarity between each selected document vector and the query vector
    similarities = cosine_similarity(selected_vectors, query_vector.reshape(1, -1)).flatten()

    # Create a result DataFrame with document names and their similarity scores
    results = pd.DataFrame({
        'document': selected_df['original_column'].values,
        'similarity': similarities
    })

    # Sort the results by similarity in descending order
    results = results.sort_values(by='similarity', ascending=False).reset_index(drop=True)

    return results

In [None]:
import numpy as np

def get_similar_documents_manhattan(query_vector, document_data, selected_documents):

    # Filter the document data to include only the selected documents
    filtered_data = document_data[document_data['original_column'].isin(selected_documents)]

    # Extract the document names and their corresponding vectors
    document_names = filtered_data['original_column'].values
    document_vectors = filtered_data[['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
                                 '11', '12', '13', '14', '15', '16', '17', '18', '19']].values  # Adjust if there are more dimensions

    # Compute Manhattan distances between the query vector and all selected document vectors
    distances = np.sum(np.abs(document_vectors - query_vector), axis=1)

    # Combine document names with their distances and sort by distance
    results = list(zip(document_names, distances))
    results.sort(key=lambda x: x[1])  # Sort by distance (ascending)

    return results


### Cosine Similarity ###

In [None]:
# Example usage
document_csv = "/content/Isomap_Results/isomap_ncomp20_nneigh40.csv"
query_csv = "/content/Isomap_Results/isomap_query_ncomp20_nneigh40.csv"

# Load data
document_data = pd.read_csv(document_csv)
query_data = pd.read_csv(query_csv)

# Example: Use the first query vector
query_vector = query_data[['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
                                 '11', '12', '13', '14', '15', '16', '17', '18', '19']].iloc[0].values

# Select specific document names
selected_documents = [
    "185_Ingenieria_de_la_Calidad_+_Lab.txt.txt",
    "24_Aprendizaje_y_Servicio_PASEC.txt.txt",
    "11_Ser_y_Cosmos.txt.txt",
    "155_Logica_y_Teoria_de_Conjuntos.txt.txt",
    "171_Barra_para_danza_contemporanea.txt.txt",
    "129_Coloquios_ART.txt.txt",
    "10_Autoconocimiento.txt.txt",
    "35_Base_de_Datos.txt.txt",
    "130_Arte_y_Contexto_Social.txt.txt",
    "44_Aplicaciones_Distribuidas.txt.txt",
    "62_Pasteleria.txt.txt",
    "151_Ecuaciones_Diferenciales.txt.txt",
    "125_Nuevos_Medios.txt.txt",
    "100_Principios_de_Seguros.txt.txt",
    "72_Fundamentos_de_la_Educacion.txt.txt",
    "102_Principios_de_Finanzas.txt.txt",
    "181_Inv._de_Operaciones_1_+Lab.txt.txt",
    "4_Quimica_General_1_+Lab_Ej.txt.txt",
    "138_Enfasis_3.txt.txt",
    "135_Gestion_y_Produccion_Cultural.txt.txt",
    "99_Gerencia_de_Costos.txt.txt",
    "17_Fisica_para_Ingenieria_1_+Lab_Ej.txt.txt",
    "127_Laboratorio_de_Creacion_2.txt.txt",
    "176_Lenguaje_Visual_y_Montaje.txt.txt",
    "80_Ensenanza_de_Lenguaje.txt.txt",
    "66_Reposteria_y_Chocolateria.txt.txt",
    "30_Programacion_Avanzada_de_Apps.txt.txt",
    "53_Conceptos_y_Tecnicas_2_+PRA.txt.txt",
    "50_Nutricion_Humana_+Lab.txt.txt",
    "182_Control_de_Produccion.txt.txt",
    "118_Fundamentos_de_las_Artes.txt.txt",
    "117_Herramientas_Digitales_1.txt.txt",
    "165_Algebra_Abstracta_2.txt.txt",
    "15_Matematicas_Discretas.txt.txt",
    "115_Tributacion_y_Entorno_Legal.txt.txt",
    "2_Taller_de_Ing._Cs._Computacion.txt.txt",
    "12_Ingles_Nivel_3.txt.txt",
    "126_Arte_Contemporaneo.txt.txt",
    "21_Programacion_de_Apps.txt.txt",
    "14_Programacion_Avanzada_en_C++.txt.txt",
    "154_Fundamentos_de_Geometria.txt.txt",
    "149_Teoria_de_Grupos.txt.txt",
    "152_Calculo_para_Ciencias_2.txt.txt",
    "16_Introduccion_a_la_Economia.txt.txt",
    "68_Enologia_y_Cocteleria.txt.txt",
    "143_Programacion_Para_DiseÃ±o_1.txt.txt",
    "108_Economia_y_Negocios.txt.txt",
    "98_Estadistica_Empresarial_+Lab.txt.txt",
    "121_Dibujo_para_Arte_y_Diseno.txt.txt",
    "45_Proyecto_Integrador_CMP.txt.txt",
    "131_Taller_de_Investigacion.txt.txt",
    "9_Calculo_Integral_+_Ej.txt.txt",
    "23_Fisica_para_Ingenieria_2_+Lab_Ej.txt.txt",
    "107_Operaciones_+Lab.txt.txt",
    "145_Programacion_Para_DiseÃ±o_3.txt.txt",
    "94_Zoologia_+Lab.txt.txt",
    "112_Gestion_del_Talento.txt.txt",
    "77_Neurociencia_y_Educacion.txt.txt",
    "166_Topologia_1.txt.txt",
    "148_Calculo_para_Ciencias_1.txt.txt",
    "31_Organizacion_de_Computadores.txt.txt",
    "46_Matematica_Empresarial_+Ej.txt.txt",
    "39_Proyectos__Gerencia_y_Analisis.txt.txt",
    "27_Estructuras_de_Datos.txt.txt",
    "41_Coloquios_ING.txt.txt",
    "111_Proyectos_Empresariales.txt.txt",
    "157_Analisis_Numerico.txt.txt",
    "104_Finanzas_Corporativas.txt.txt",
    "29_Teoria_de_la_Computacion.txt.txt",
    "92_Proyecto_Integrador_EDU.txt.txt",
    "40_Practica_Pre-Profesional_PASEM.txt.txt",
    "70_Gerencia_Financiera_HSP.txt.txt",
    "136_Laboratorio_de_Creacion_3.txt.txt",
    "5_Cosmos.txt.txt",
    "144_Programacion_Para_DiseÃ±o_2.txt.txt",
    "159_Combinatoria_y_Grafos.txt.txt",
    "103_Coloquios_adm.txt.txt",
    "120_Laboratorio_de_Creacion_1.txt.txt",
    "175_Lenguaje_del_Cine.txt.txt",
    "133_Taller_de_Arte_2.txt.txt",
    "37_Redes_+Lab.txt.txt",
    "146_Juegos_y_Narrativa.txt.txt",
    "167_Topologia_2.txt.txt",
    "139_Taller_de_Arte_3.txt.txt",
    "141_Temas_en_Comunicacion_y_Arte.txt.txt",
    "124_Teoria_Critica_1__Arte_&_Media.txt.txt"
]

  # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = calculate_similarity_for_selected_documents(document_data, query_vector, selected_documents)

print(similarity_results)

                                     document  similarity
0   40_Practica_Pre-Profesional_PASEM.txt.txt    0.730726
1                       62_Pasteleria.txt.txt    0.721608
2     115_Tributacion_y_Entorno_Legal.txt.txt    0.708558
3        66_Reposteria_y_Chocolateria.txt.txt    0.615532
4     24_Aprendizaje_y_Servicio_PASEC.txt.txt    0.588786
..                                        ...         ...
78   30_Programacion_Avanzada_de_Apps.txt.txt   -0.305380
79       151_Ecuaciones_Diferenciales.txt.txt   -0.326050
80           15_Matematicas_Discretas.txt.txt   -0.326085
81            27_Estructuras_de_Datos.txt.txt   -0.340267
82      46_Matematica_Empresarial_+Ej.txt.txt   -0.374975

[83 rows x 2 columns]


In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
                                 '11', '12', '13', '14', '15', '16', '17', '18', '19']].iloc[1].values

# Select specific document names
selected_documents = [
    "185_Ingenieria_de_la_Calidad_+_Lab.txt.txt",
    "24_Aprendizaje_y_Servicio_PASEC.txt.txt",
    "11_Ser_y_Cosmos.txt.txt",
    "155_Logica_y_Teoria_de_Conjuntos.txt.txt",
    "171_Barra_para_danza_contemporanea.txt.txt",
    "129_Coloquios_ART.txt.txt",
    "10_Autoconocimiento.txt.txt",
    "35_Base_de_Datos.txt.txt",
    "130_Arte_y_Contexto_Social.txt.txt",
    "44_Aplicaciones_Distribuidas.txt.txt",
    "62_Pasteleria.txt.txt",
    "151_Ecuaciones_Diferenciales.txt.txt",
    "125_Nuevos_Medios.txt.txt",
    "100_Principios_de_Seguros.txt.txt",
    "72_Fundamentos_de_la_Educacion.txt.txt",
    "102_Principios_de_Finanzas.txt.txt",
    "181_Inv._de_Operaciones_1_+Lab.txt.txt",
    "4_Quimica_General_1_+Lab_Ej.txt.txt",
    "138_Enfasis_3.txt.txt",
    "135_Gestion_y_Produccion_Cultural.txt.txt",
    "99_Gerencia_de_Costos.txt.txt",
    "17_Fisica_para_Ingenieria_1_+Lab_Ej.txt.txt",
    "127_Laboratorio_de_Creacion_2.txt.txt",
    "176_Lenguaje_Visual_y_Montaje.txt.txt",
    "80_Ensenanza_de_Lenguaje.txt.txt",
    "66_Reposteria_y_Chocolateria.txt.txt",
    "30_Programacion_Avanzada_de_Apps.txt.txt",
    "53_Conceptos_y_Tecnicas_2_+PRA.txt.txt",
    "50_Nutricion_Humana_+Lab.txt.txt",
    "182_Control_de_Produccion.txt.txt",
    "118_Fundamentos_de_las_Artes.txt.txt",
    "117_Herramientas_Digitales_1.txt.txt",
    "165_Algebra_Abstracta_2.txt.txt",
    "15_Matematicas_Discretas.txt.txt",
    "115_Tributacion_y_Entorno_Legal.txt.txt",
    "2_Taller_de_Ing._Cs._Computacion.txt.txt",
    "12_Ingles_Nivel_3.txt.txt",
    "126_Arte_Contemporaneo.txt.txt",
    "21_Programacion_de_Apps.txt.txt",
    "14_Programacion_Avanzada_en_C++.txt.txt",
    "154_Fundamentos_de_Geometria.txt.txt",
    "149_Teoria_de_Grupos.txt.txt",
    "152_Calculo_para_Ciencias_2.txt.txt",
    "16_Introduccion_a_la_Economia.txt.txt",
    "68_Enologia_y_Cocteleria.txt.txt",
    "143_Programacion_Para_DiseÃ±o_1.txt.txt",
    "108_Economia_y_Negocios.txt.txt",
    "98_Estadistica_Empresarial_+Lab.txt.txt",
    "121_Dibujo_para_Arte_y_Diseno.txt.txt",
    "45_Proyecto_Integrador_CMP.txt.txt",
    "131_Taller_de_Investigacion.txt.txt",
    "9_Calculo_Integral_+_Ej.txt.txt",
    "23_Fisica_para_Ingenieria_2_+Lab_Ej.txt.txt",
    "107_Operaciones_+Lab.txt.txt",
    "145_Programacion_Para_DiseÃ±o_3.txt.txt",
    "94_Zoologia_+Lab.txt.txt",
    "112_Gestion_del_Talento.txt.txt",
    "77_Neurociencia_y_Educacion.txt.txt",
    "166_Topologia_1.txt.txt",
    "148_Calculo_para_Ciencias_1.txt.txt",
    "31_Organizacion_de_Computadores.txt.txt",
    "46_Matematica_Empresarial_+Ej.txt.txt",
    "39_Proyectos__Gerencia_y_Analisis.txt.txt",
    "27_Estructuras_de_Datos.txt.txt",
    "41_Coloquios_ING.txt.txt",
    "111_Proyectos_Empresariales.txt.txt",
    "157_Analisis_Numerico.txt.txt",
    "104_Finanzas_Corporativas.txt.txt",
    "29_Teoria_de_la_Computacion.txt.txt",
    "92_Proyecto_Integrador_EDU.txt.txt",
    "40_Practica_Pre-Profesional_PASEM.txt.txt",
    "70_Gerencia_Financiera_HSP.txt.txt",
    "136_Laboratorio_de_Creacion_3.txt.txt",
    "5_Cosmos.txt.txt",
    "144_Programacion_Para_DiseÃ±o_2.txt.txt",
    "159_Combinatoria_y_Grafos.txt.txt",
    "103_Coloquios_adm.txt.txt",
    "120_Laboratorio_de_Creacion_1.txt.txt",
    "175_Lenguaje_del_Cine.txt.txt",
    "133_Taller_de_Arte_2.txt.txt",
    "37_Redes_+Lab.txt.txt",
    "146_Juegos_y_Narrativa.txt.txt",
    "167_Topologia_2.txt.txt",
    "139_Taller_de_Arte_3.txt.txt",
    "141_Temas_en_Comunicacion_y_Arte.txt.txt",
    "124_Teoria_Critica_1__Arte_&_Media.txt.txt"
]# Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = calculate_similarity_for_selected_documents(document_data, query_vector, selected_documents)

print(similarity_results)

                                       document  similarity
0         176_Lenguaje_Visual_y_Montaje.txt.txt    0.963064
1              68_Enologia_y_Cocteleria.txt.txt    0.899014
2         16_Introduccion_a_la_Economia.txt.txt    0.591522
3   23_Fisica_para_Ingenieria_2_+Lab_Ej.txt.txt    0.502837
4            70_Gerencia_Financiera_HSP.txt.txt    0.502753
..                                          ...         ...
78  17_Fisica_para_Ingenieria_1_+Lab_Ej.txt.txt   -0.368171
79        46_Matematica_Empresarial_+Ej.txt.txt   -0.402494
80          148_Calculo_para_Ciencias_1.txt.txt   -0.417601
81              9_Calculo_Integral_+_Ej.txt.txt   -0.460875
82         151_Ecuaciones_Diferenciales.txt.txt   -0.604394

[83 rows x 2 columns]


In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
                                 '11', '12', '13', '14', '15', '16', '17', '18', '19']].iloc[2].values

# Select specific document names
selected_documents = [
    "185_Ingenieria_de_la_Calidad_+_Lab.txt.txt",
    "24_Aprendizaje_y_Servicio_PASEC.txt.txt",
    "11_Ser_y_Cosmos.txt.txt",
    "155_Logica_y_Teoria_de_Conjuntos.txt.txt",
    "171_Barra_para_danza_contemporanea.txt.txt",
    "129_Coloquios_ART.txt.txt",
    "10_Autoconocimiento.txt.txt",
    "35_Base_de_Datos.txt.txt",
    "130_Arte_y_Contexto_Social.txt.txt",
    "44_Aplicaciones_Distribuidas.txt.txt",
    "62_Pasteleria.txt.txt",
    "151_Ecuaciones_Diferenciales.txt.txt",
    "125_Nuevos_Medios.txt.txt",
    "100_Principios_de_Seguros.txt.txt",
    "72_Fundamentos_de_la_Educacion.txt.txt",
    "102_Principios_de_Finanzas.txt.txt",
    "181_Inv._de_Operaciones_1_+Lab.txt.txt",
    "4_Quimica_General_1_+Lab_Ej.txt.txt",
    "138_Enfasis_3.txt.txt",
    "135_Gestion_y_Produccion_Cultural.txt.txt",
    "99_Gerencia_de_Costos.txt.txt",
    "17_Fisica_para_Ingenieria_1_+Lab_Ej.txt.txt",
    "127_Laboratorio_de_Creacion_2.txt.txt",
    "176_Lenguaje_Visual_y_Montaje.txt.txt",
    "80_Ensenanza_de_Lenguaje.txt.txt",
    "66_Reposteria_y_Chocolateria.txt.txt",
    "30_Programacion_Avanzada_de_Apps.txt.txt",
    "53_Conceptos_y_Tecnicas_2_+PRA.txt.txt",
    "50_Nutricion_Humana_+Lab.txt.txt",
    "182_Control_de_Produccion.txt.txt",
    "118_Fundamentos_de_las_Artes.txt.txt",
    "117_Herramientas_Digitales_1.txt.txt",
    "165_Algebra_Abstracta_2.txt.txt",
    "15_Matematicas_Discretas.txt.txt",
    "115_Tributacion_y_Entorno_Legal.txt.txt",
    "2_Taller_de_Ing._Cs._Computacion.txt.txt",
    "12_Ingles_Nivel_3.txt.txt",
    "126_Arte_Contemporaneo.txt.txt",
    "21_Programacion_de_Apps.txt.txt",
    "14_Programacion_Avanzada_en_C++.txt.txt",
    "154_Fundamentos_de_Geometria.txt.txt",
    "149_Teoria_de_Grupos.txt.txt",
    "152_Calculo_para_Ciencias_2.txt.txt",
    "16_Introduccion_a_la_Economia.txt.txt",
    "68_Enologia_y_Cocteleria.txt.txt",
    "143_Programacion_Para_DiseÃ±o_1.txt.txt",
    "108_Economia_y_Negocios.txt.txt",
    "98_Estadistica_Empresarial_+Lab.txt.txt",
    "121_Dibujo_para_Arte_y_Diseno.txt.txt",
    "45_Proyecto_Integrador_CMP.txt.txt",
    "131_Taller_de_Investigacion.txt.txt",
    "9_Calculo_Integral_+_Ej.txt.txt",
    "23_Fisica_para_Ingenieria_2_+Lab_Ej.txt.txt",
    "107_Operaciones_+Lab.txt.txt",
    "145_Programacion_Para_DiseÃ±o_3.txt.txt",
    "94_Zoologia_+Lab.txt.txt",
    "112_Gestion_del_Talento.txt.txt",
    "77_Neurociencia_y_Educacion.txt.txt",
    "166_Topologia_1.txt.txt",
    "148_Calculo_para_Ciencias_1.txt.txt",
    "31_Organizacion_de_Computadores.txt.txt",
    "46_Matematica_Empresarial_+Ej.txt.txt",
    "39_Proyectos__Gerencia_y_Analisis.txt.txt",
    "27_Estructuras_de_Datos.txt.txt",
    "41_Coloquios_ING.txt.txt",
    "111_Proyectos_Empresariales.txt.txt",
    "157_Analisis_Numerico.txt.txt",
    "104_Finanzas_Corporativas.txt.txt",
    "29_Teoria_de_la_Computacion.txt.txt",
    "92_Proyecto_Integrador_EDU.txt.txt",
    "40_Practica_Pre-Profesional_PASEM.txt.txt",
    "70_Gerencia_Financiera_HSP.txt.txt",
    "136_Laboratorio_de_Creacion_3.txt.txt",
    "5_Cosmos.txt.txt",
    "144_Programacion_Para_DiseÃ±o_2.txt.txt",
    "159_Combinatoria_y_Grafos.txt.txt",
    "103_Coloquios_adm.txt.txt",
    "120_Laboratorio_de_Creacion_1.txt.txt",
    "175_Lenguaje_del_Cine.txt.txt",
    "133_Taller_de_Arte_2.txt.txt",
    "37_Redes_+Lab.txt.txt",
    "146_Juegos_y_Narrativa.txt.txt",
    "167_Topologia_2.txt.txt",
    "139_Taller_de_Arte_3.txt.txt",
    "141_Temas_en_Comunicacion_y_Arte.txt.txt",
    "124_Teoria_Critica_1__Arte_&_Media.txt.txt"
] # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = calculate_similarity_for_selected_documents(document_data, query_vector, selected_documents)

print(similarity_results)

                                       document  similarity
0         176_Lenguaje_Visual_y_Montaje.txt.txt    0.961646
1              68_Enologia_y_Cocteleria.txt.txt    0.896495
2         16_Introduccion_a_la_Economia.txt.txt    0.601362
3            70_Gerencia_Financiera_HSP.txt.txt    0.510397
4   23_Fisica_para_Ingenieria_2_+Lab_Ej.txt.txt    0.498132
..                                          ...         ...
78  17_Fisica_para_Ingenieria_1_+Lab_Ej.txt.txt   -0.366285
79        46_Matematica_Empresarial_+Ej.txt.txt   -0.398824
80          148_Calculo_para_Ciencias_1.txt.txt   -0.416983
81              9_Calculo_Integral_+_Ej.txt.txt   -0.461238
82         151_Ecuaciones_Diferenciales.txt.txt   -0.595339

[83 rows x 2 columns]


In [None]:
# Example: Use the first query vector
# Example: Use the first query vector
query_vector = query_data[['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
                                 '11', '12', '13', '14', '15', '16', '17', '18', '19']].iloc[3].values

# Select specific document names
selected_documents = [
    "185_Ingenieria_de_la_Calidad_+_Lab.txt.txt",
    "24_Aprendizaje_y_Servicio_PASEC.txt.txt",
    "11_Ser_y_Cosmos.txt.txt",
    "155_Logica_y_Teoria_de_Conjuntos.txt.txt",
    "171_Barra_para_danza_contemporanea.txt.txt",
    "129_Coloquios_ART.txt.txt",
    "10_Autoconocimiento.txt.txt",
    "35_Base_de_Datos.txt.txt",
    "130_Arte_y_Contexto_Social.txt.txt",
    "44_Aplicaciones_Distribuidas.txt.txt",
    "62_Pasteleria.txt.txt",
    "151_Ecuaciones_Diferenciales.txt.txt",
    "125_Nuevos_Medios.txt.txt",
    "100_Principios_de_Seguros.txt.txt",
    "72_Fundamentos_de_la_Educacion.txt.txt",
    "102_Principios_de_Finanzas.txt.txt",
    "181_Inv._de_Operaciones_1_+Lab.txt.txt",
    "4_Quimica_General_1_+Lab_Ej.txt.txt",
    "138_Enfasis_3.txt.txt",
    "135_Gestion_y_Produccion_Cultural.txt.txt",
    "99_Gerencia_de_Costos.txt.txt",
    "17_Fisica_para_Ingenieria_1_+Lab_Ej.txt.txt",
    "127_Laboratorio_de_Creacion_2.txt.txt",
    "176_Lenguaje_Visual_y_Montaje.txt.txt",
    "80_Ensenanza_de_Lenguaje.txt.txt",
    "66_Reposteria_y_Chocolateria.txt.txt",
    "30_Programacion_Avanzada_de_Apps.txt.txt",
    "53_Conceptos_y_Tecnicas_2_+PRA.txt.txt",
    "50_Nutricion_Humana_+Lab.txt.txt",
    "182_Control_de_Produccion.txt.txt",
    "118_Fundamentos_de_las_Artes.txt.txt",
    "117_Herramientas_Digitales_1.txt.txt",
    "165_Algebra_Abstracta_2.txt.txt",
    "15_Matematicas_Discretas.txt.txt",
    "115_Tributacion_y_Entorno_Legal.txt.txt",
    "2_Taller_de_Ing._Cs._Computacion.txt.txt",
    "12_Ingles_Nivel_3.txt.txt",
    "126_Arte_Contemporaneo.txt.txt",
    "21_Programacion_de_Apps.txt.txt",
    "14_Programacion_Avanzada_en_C++.txt.txt",
    "154_Fundamentos_de_Geometria.txt.txt",
    "149_Teoria_de_Grupos.txt.txt",
    "152_Calculo_para_Ciencias_2.txt.txt",
    "16_Introduccion_a_la_Economia.txt.txt",
    "68_Enologia_y_Cocteleria.txt.txt",
    "143_Programacion_Para_DiseÃ±o_1.txt.txt",
    "108_Economia_y_Negocios.txt.txt",
    "98_Estadistica_Empresarial_+Lab.txt.txt",
    "121_Dibujo_para_Arte_y_Diseno.txt.txt",
    "45_Proyecto_Integrador_CMP.txt.txt",
    "131_Taller_de_Investigacion.txt.txt",
    "9_Calculo_Integral_+_Ej.txt.txt",
    "23_Fisica_para_Ingenieria_2_+Lab_Ej.txt.txt",
    "107_Operaciones_+Lab.txt.txt",
    "145_Programacion_Para_DiseÃ±o_3.txt.txt",
    "94_Zoologia_+Lab.txt.txt",
    "112_Gestion_del_Talento.txt.txt",
    "77_Neurociencia_y_Educacion.txt.txt",
    "166_Topologia_1.txt.txt",
    "148_Calculo_para_Ciencias_1.txt.txt",
    "31_Organizacion_de_Computadores.txt.txt",
    "46_Matematica_Empresarial_+Ej.txt.txt",
    "39_Proyectos__Gerencia_y_Analisis.txt.txt",
    "27_Estructuras_de_Datos.txt.txt",
    "41_Coloquios_ING.txt.txt",
    "111_Proyectos_Empresariales.txt.txt",
    "157_Analisis_Numerico.txt.txt",
    "104_Finanzas_Corporativas.txt.txt",
    "29_Teoria_de_la_Computacion.txt.txt",
    "92_Proyecto_Integrador_EDU.txt.txt",
    "40_Practica_Pre-Profesional_PASEM.txt.txt",
    "70_Gerencia_Financiera_HSP.txt.txt",
    "136_Laboratorio_de_Creacion_3.txt.txt",
    "5_Cosmos.txt.txt",
    "144_Programacion_Para_DiseÃ±o_2.txt.txt",
    "159_Combinatoria_y_Grafos.txt.txt",
    "103_Coloquios_adm.txt.txt",
    "120_Laboratorio_de_Creacion_1.txt.txt",
    "175_Lenguaje_del_Cine.txt.txt",
    "133_Taller_de_Arte_2.txt.txt",
    "37_Redes_+Lab.txt.txt",
    "146_Juegos_y_Narrativa.txt.txt",
    "167_Topologia_2.txt.txt",
    "139_Taller_de_Arte_3.txt.txt",
    "141_Temas_en_Comunicacion_y_Arte.txt.txt",
    "124_Teoria_Critica_1__Arte_&_Media.txt.txt"
]  # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = calculate_similarity_for_selected_documents(document_data, query_vector, selected_documents)

print(similarity_results)

                                       document  similarity
0       115_Tributacion_y_Entorno_Legal.txt.txt    0.725218
1                         37_Redes_+Lab.txt.txt    0.624569
2                      35_Base_de_Datos.txt.txt    0.547567
3       31_Organizacion_de_Computadores.txt.txt    0.467280
4   17_Fisica_para_Ingenieria_1_+Lab_Ej.txt.txt    0.461078
..                                          ...         ...
78         154_Fundamentos_de_Geometria.txt.txt   -0.398298
79           70_Gerencia_Financiera_HSP.txt.txt   -0.412113
80    39_Proyectos__Gerencia_y_Analisis.txt.txt   -0.434631
81                 149_Teoria_de_Grupos.txt.txt   -0.528700
82                175_Lenguaje_del_Cine.txt.txt   -0.627896

[83 rows x 2 columns]


In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
                                 '11', '12', '13', '14', '15', '16', '17', '18', '19']].iloc[4].values

# Select specific document names
selected_documents = [
    "185_Ingenieria_de_la_Calidad_+_Lab.txt.txt",
    "24_Aprendizaje_y_Servicio_PASEC.txt.txt",
    "11_Ser_y_Cosmos.txt.txt",
    "155_Logica_y_Teoria_de_Conjuntos.txt.txt",
    "171_Barra_para_danza_contemporanea.txt.txt",
    "129_Coloquios_ART.txt.txt",
    "10_Autoconocimiento.txt.txt",
    "35_Base_de_Datos.txt.txt",
    "130_Arte_y_Contexto_Social.txt.txt",
    "44_Aplicaciones_Distribuidas.txt.txt",
    "62_Pasteleria.txt.txt",
    "151_Ecuaciones_Diferenciales.txt.txt",
    "125_Nuevos_Medios.txt.txt",
    "100_Principios_de_Seguros.txt.txt",
    "72_Fundamentos_de_la_Educacion.txt.txt",
    "102_Principios_de_Finanzas.txt.txt",
    "181_Inv._de_Operaciones_1_+Lab.txt.txt",
    "4_Quimica_General_1_+Lab_Ej.txt.txt",
    "138_Enfasis_3.txt.txt",
    "135_Gestion_y_Produccion_Cultural.txt.txt",
    "99_Gerencia_de_Costos.txt.txt",
    "17_Fisica_para_Ingenieria_1_+Lab_Ej.txt.txt",
    "127_Laboratorio_de_Creacion_2.txt.txt",
    "176_Lenguaje_Visual_y_Montaje.txt.txt",
    "80_Ensenanza_de_Lenguaje.txt.txt",
    "66_Reposteria_y_Chocolateria.txt.txt",
    "30_Programacion_Avanzada_de_Apps.txt.txt",
    "53_Conceptos_y_Tecnicas_2_+PRA.txt.txt",
    "50_Nutricion_Humana_+Lab.txt.txt",
    "182_Control_de_Produccion.txt.txt",
    "118_Fundamentos_de_las_Artes.txt.txt",
    "117_Herramientas_Digitales_1.txt.txt",
    "165_Algebra_Abstracta_2.txt.txt",
    "15_Matematicas_Discretas.txt.txt",
    "115_Tributacion_y_Entorno_Legal.txt.txt",
    "2_Taller_de_Ing._Cs._Computacion.txt.txt",
    "12_Ingles_Nivel_3.txt.txt",
    "126_Arte_Contemporaneo.txt.txt",
    "21_Programacion_de_Apps.txt.txt",
    "14_Programacion_Avanzada_en_C++.txt.txt",
    "154_Fundamentos_de_Geometria.txt.txt",
    "149_Teoria_de_Grupos.txt.txt",
    "152_Calculo_para_Ciencias_2.txt.txt",
    "16_Introduccion_a_la_Economia.txt.txt",
    "68_Enologia_y_Cocteleria.txt.txt",
    "143_Programacion_Para_DiseÃ±o_1.txt.txt",
    "108_Economia_y_Negocios.txt.txt",
    "98_Estadistica_Empresarial_+Lab.txt.txt",
    "121_Dibujo_para_Arte_y_Diseno.txt.txt",
    "45_Proyecto_Integrador_CMP.txt.txt",
    "131_Taller_de_Investigacion.txt.txt",
    "9_Calculo_Integral_+_Ej.txt.txt",
    "23_Fisica_para_Ingenieria_2_+Lab_Ej.txt.txt",
    "107_Operaciones_+Lab.txt.txt",
    "145_Programacion_Para_DiseÃ±o_3.txt.txt",
    "94_Zoologia_+Lab.txt.txt",
    "112_Gestion_del_Talento.txt.txt",
    "77_Neurociencia_y_Educacion.txt.txt",
    "166_Topologia_1.txt.txt",
    "148_Calculo_para_Ciencias_1.txt.txt",
    "31_Organizacion_de_Computadores.txt.txt",
    "46_Matematica_Empresarial_+Ej.txt.txt",
    "39_Proyectos__Gerencia_y_Analisis.txt.txt",
    "27_Estructuras_de_Datos.txt.txt",
    "41_Coloquios_ING.txt.txt",
    "111_Proyectos_Empresariales.txt.txt",
    "157_Analisis_Numerico.txt.txt",
    "104_Finanzas_Corporativas.txt.txt",
    "29_Teoria_de_la_Computacion.txt.txt",
    "92_Proyecto_Integrador_EDU.txt.txt",
    "40_Practica_Pre-Profesional_PASEM.txt.txt",
    "70_Gerencia_Financiera_HSP.txt.txt",
    "136_Laboratorio_de_Creacion_3.txt.txt",
    "5_Cosmos.txt.txt",
    "144_Programacion_Para_DiseÃ±o_2.txt.txt",
    "159_Combinatoria_y_Grafos.txt.txt",
    "103_Coloquios_adm.txt.txt",
    "120_Laboratorio_de_Creacion_1.txt.txt",
    "175_Lenguaje_del_Cine.txt.txt",
    "133_Taller_de_Arte_2.txt.txt",
    "37_Redes_+Lab.txt.txt",
    "146_Juegos_y_Narrativa.txt.txt",
    "167_Topologia_2.txt.txt",
    "139_Taller_de_Arte_3.txt.txt",
    "141_Temas_en_Comunicacion_y_Arte.txt.txt",
    "124_Teoria_Critica_1__Arte_&_Media.txt.txt"
]  # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = calculate_similarity_for_selected_documents(document_data, query_vector, selected_documents)

print(similarity_results)

                                       document  similarity
0         176_Lenguaje_Visual_y_Montaje.txt.txt    0.966404
1              68_Enologia_y_Cocteleria.txt.txt    0.905029
2         16_Introduccion_a_la_Economia.txt.txt    0.618343
3            70_Gerencia_Financiera_HSP.txt.txt    0.524619
4   23_Fisica_para_Ingenieria_2_+Lab_Ej.txt.txt    0.490882
..                                          ...         ...
78  17_Fisica_para_Ingenieria_1_+Lab_Ej.txt.txt   -0.368558
79        46_Matematica_Empresarial_+Ej.txt.txt   -0.394822
80          148_Calculo_para_Ciencias_1.txt.txt   -0.431029
81              9_Calculo_Integral_+_Ej.txt.txt   -0.465632
82         151_Ecuaciones_Diferenciales.txt.txt   -0.591854

[83 rows x 2 columns]


## Mahatthan ##

In [None]:
# Example usage
document_csv = "/content/Isomap_Results/isomap_ncomp20_nneigh40.csv"
query_csv = "/content/Isomap_Results/isomap_query_ncomp20_nneigh40.csv"

# Load data
document_data = pd.read_csv(document_csv)
query_data = pd.read_csv(query_csv)

# Example: Use the first query vector
query_vector = query_data[['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
                                 '11', '12', '13', '14', '15', '16', '17', '18', '19']].iloc[0].values

# Select specific document names
selected_documents = [
    "185_Ingenieria_de_la_Calidad_+_Lab.txt.txt",
    "24_Aprendizaje_y_Servicio_PASEC.txt.txt",
    "11_Ser_y_Cosmos.txt.txt",
    "155_Logica_y_Teoria_de_Conjuntos.txt.txt",
    "171_Barra_para_danza_contemporanea.txt.txt",
    "129_Coloquios_ART.txt.txt",
    "10_Autoconocimiento.txt.txt",
    "35_Base_de_Datos.txt.txt",
    "130_Arte_y_Contexto_Social.txt.txt",
    "44_Aplicaciones_Distribuidas.txt.txt",
    "62_Pasteleria.txt.txt",
    "151_Ecuaciones_Diferenciales.txt.txt",
    "125_Nuevos_Medios.txt.txt",
    "100_Principios_de_Seguros.txt.txt",
    "72_Fundamentos_de_la_Educacion.txt.txt",
    "102_Principios_de_Finanzas.txt.txt",
    "181_Inv._de_Operaciones_1_+Lab.txt.txt",
    "4_Quimica_General_1_+Lab_Ej.txt.txt",
    "138_Enfasis_3.txt.txt",
    "135_Gestion_y_Produccion_Cultural.txt.txt",
    "99_Gerencia_de_Costos.txt.txt",
    "17_Fisica_para_Ingenieria_1_+Lab_Ej.txt.txt",
    "127_Laboratorio_de_Creacion_2.txt.txt",
    "176_Lenguaje_Visual_y_Montaje.txt.txt",
    "80_Ensenanza_de_Lenguaje.txt.txt",
    "66_Reposteria_y_Chocolateria.txt.txt",
    "30_Programacion_Avanzada_de_Apps.txt.txt",
    "53_Conceptos_y_Tecnicas_2_+PRA.txt.txt",
    "50_Nutricion_Humana_+Lab.txt.txt",
    "182_Control_de_Produccion.txt.txt",
    "118_Fundamentos_de_las_Artes.txt.txt",
    "117_Herramientas_Digitales_1.txt.txt",
    "165_Algebra_Abstracta_2.txt.txt",
    "15_Matematicas_Discretas.txt.txt",
    "115_Tributacion_y_Entorno_Legal.txt.txt",
    "2_Taller_de_Ing._Cs._Computacion.txt.txt",
    "12_Ingles_Nivel_3.txt.txt",
    "126_Arte_Contemporaneo.txt.txt",
    "21_Programacion_de_Apps.txt.txt",
    "14_Programacion_Avanzada_en_C++.txt.txt",
    "154_Fundamentos_de_Geometria.txt.txt",
    "149_Teoria_de_Grupos.txt.txt",
    "152_Calculo_para_Ciencias_2.txt.txt",
    "16_Introduccion_a_la_Economia.txt.txt",
    "68_Enologia_y_Cocteleria.txt.txt",
    "143_Programacion_Para_DiseÃ±o_1.txt.txt",
    "108_Economia_y_Negocios.txt.txt",
    "98_Estadistica_Empresarial_+Lab.txt.txt",
    "121_Dibujo_para_Arte_y_Diseno.txt.txt",
    "45_Proyecto_Integrador_CMP.txt.txt",
    "131_Taller_de_Investigacion.txt.txt",
    "9_Calculo_Integral_+_Ej.txt.txt",
    "23_Fisica_para_Ingenieria_2_+Lab_Ej.txt.txt",
    "107_Operaciones_+Lab.txt.txt",
    "145_Programacion_Para_DiseÃ±o_3.txt.txt",
    "94_Zoologia_+Lab.txt.txt",
    "112_Gestion_del_Talento.txt.txt",
    "77_Neurociencia_y_Educacion.txt.txt",
    "166_Topologia_1.txt.txt",
    "148_Calculo_para_Ciencias_1.txt.txt",
    "31_Organizacion_de_Computadores.txt.txt",
    "46_Matematica_Empresarial_+Ej.txt.txt",
    "39_Proyectos__Gerencia_y_Analisis.txt.txt",
    "27_Estructuras_de_Datos.txt.txt",
    "41_Coloquios_ING.txt.txt",
    "111_Proyectos_Empresariales.txt.txt",
    "157_Analisis_Numerico.txt.txt",
    "104_Finanzas_Corporativas.txt.txt",
    "29_Teoria_de_la_Computacion.txt.txt",
    "92_Proyecto_Integrador_EDU.txt.txt",
    "40_Practica_Pre-Profesional_PASEM.txt.txt",
    "70_Gerencia_Financiera_HSP.txt.txt",
    "136_Laboratorio_de_Creacion_3.txt.txt",
    "5_Cosmos.txt.txt",
    "144_Programacion_Para_DiseÃ±o_2.txt.txt",
    "159_Combinatoria_y_Grafos.txt.txt",
    "103_Coloquios_adm.txt.txt",
    "120_Laboratorio_de_Creacion_1.txt.txt",
    "175_Lenguaje_del_Cine.txt.txt",
    "133_Taller_de_Arte_2.txt.txt",
    "37_Redes_+Lab.txt.txt",
    "146_Juegos_y_Narrativa.txt.txt",
    "167_Topologia_2.txt.txt",
    "139_Taller_de_Arte_3.txt.txt",
    "141_Temas_en_Comunicacion_y_Arte.txt.txt",
    "124_Teoria_Critica_1__Arte_&_Media.txt.txt"
]
  # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = get_similar_documents_manhattan(query_vector, document_data, selected_documents)

for i in range(len(similarity_results)):
    print(f"Document: {similarity_results[i][0]}, Distance: {similarity_results[i][1]}")

Document: 45_Proyecto_Integrador_CMP.txt.txt, Distance: 28.61984854940128
Document: 133_Taller_de_Arte_2.txt.txt, Distance: 29.055564225606734
Document: 139_Taller_de_Arte_3.txt.txt, Distance: 30.259776550839817
Document: 111_Proyectos_Empresariales.txt.txt, Distance: 30.51488693826722
Document: 131_Taller_de_Investigacion.txt.txt, Distance: 30.52534842184309
Document: 130_Arte_y_Contexto_Social.txt.txt, Distance: 30.573478030897686
Document: 14_Programacion_Avanzada_en_C++.txt.txt, Distance: 30.57354188291964
Document: 185_Ingenieria_de_la_Calidad_+_Lab.txt.txt, Distance: 30.731494027337682
Document: 148_Calculo_para_Ciencias_1.txt.txt, Distance: 30.999008357391013
Document: 127_Laboratorio_de_Creacion_2.txt.txt, Distance: 31.130665587726902
Document: 182_Control_de_Produccion.txt.txt, Distance: 31.30099935581531
Document: 135_Gestion_y_Produccion_Cultural.txt.txt, Distance: 31.345631417785242
Document: 102_Principios_de_Finanzas.txt.txt, Distance: 31.944836667660688
Document: 124_Teo

In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
                                 '11', '12', '13', '14', '15', '16', '17', '18', '19']].iloc[1].values

# Select specific document names
selected_documents = [
    "185_Ingenieria_de_la_Calidad_+_Lab.txt.txt",
    "24_Aprendizaje_y_Servicio_PASEC.txt.txt",
    "11_Ser_y_Cosmos.txt.txt",
    "155_Logica_y_Teoria_de_Conjuntos.txt.txt",
    "171_Barra_para_danza_contemporanea.txt.txt",
    "129_Coloquios_ART.txt.txt",
    "10_Autoconocimiento.txt.txt",
    "35_Base_de_Datos.txt.txt",
    "130_Arte_y_Contexto_Social.txt.txt",
    "44_Aplicaciones_Distribuidas.txt.txt",
    "62_Pasteleria.txt.txt",
    "151_Ecuaciones_Diferenciales.txt.txt",
    "125_Nuevos_Medios.txt.txt",
    "100_Principios_de_Seguros.txt.txt",
    "72_Fundamentos_de_la_Educacion.txt.txt",
    "102_Principios_de_Finanzas.txt.txt",
    "181_Inv._de_Operaciones_1_+Lab.txt.txt",
    "4_Quimica_General_1_+Lab_Ej.txt.txt",
    "138_Enfasis_3.txt.txt",
    "135_Gestion_y_Produccion_Cultural.txt.txt",
    "99_Gerencia_de_Costos.txt.txt",
    "17_Fisica_para_Ingenieria_1_+Lab_Ej.txt.txt",
    "127_Laboratorio_de_Creacion_2.txt.txt",
    "176_Lenguaje_Visual_y_Montaje.txt.txt",
    "80_Ensenanza_de_Lenguaje.txt.txt",
    "66_Reposteria_y_Chocolateria.txt.txt",
    "30_Programacion_Avanzada_de_Apps.txt.txt",
    "53_Conceptos_y_Tecnicas_2_+PRA.txt.txt",
    "50_Nutricion_Humana_+Lab.txt.txt",
    "182_Control_de_Produccion.txt.txt",
    "118_Fundamentos_de_las_Artes.txt.txt",
    "117_Herramientas_Digitales_1.txt.txt",
    "165_Algebra_Abstracta_2.txt.txt",
    "15_Matematicas_Discretas.txt.txt",
    "115_Tributacion_y_Entorno_Legal.txt.txt",
    "2_Taller_de_Ing._Cs._Computacion.txt.txt",
    "12_Ingles_Nivel_3.txt.txt",
    "126_Arte_Contemporaneo.txt.txt",
    "21_Programacion_de_Apps.txt.txt",
    "14_Programacion_Avanzada_en_C++.txt.txt",
    "154_Fundamentos_de_Geometria.txt.txt",
    "149_Teoria_de_Grupos.txt.txt",
    "152_Calculo_para_Ciencias_2.txt.txt",
    "16_Introduccion_a_la_Economia.txt.txt",
    "68_Enologia_y_Cocteleria.txt.txt",
    "143_Programacion_Para_DiseÃ±o_1.txt.txt",
    "108_Economia_y_Negocios.txt.txt",
    "98_Estadistica_Empresarial_+Lab.txt.txt",
    "121_Dibujo_para_Arte_y_Diseno.txt.txt",
    "45_Proyecto_Integrador_CMP.txt.txt",
    "131_Taller_de_Investigacion.txt.txt",
    "9_Calculo_Integral_+_Ej.txt.txt",
    "23_Fisica_para_Ingenieria_2_+Lab_Ej.txt.txt",
    "107_Operaciones_+Lab.txt.txt",
    "145_Programacion_Para_DiseÃ±o_3.txt.txt",
    "94_Zoologia_+Lab.txt.txt",
    "112_Gestion_del_Talento.txt.txt",
    "77_Neurociencia_y_Educacion.txt.txt",
    "166_Topologia_1.txt.txt",
    "148_Calculo_para_Ciencias_1.txt.txt",
    "31_Organizacion_de_Computadores.txt.txt",
    "46_Matematica_Empresarial_+Ej.txt.txt",
    "39_Proyectos__Gerencia_y_Analisis.txt.txt",
    "27_Estructuras_de_Datos.txt.txt",
    "41_Coloquios_ING.txt.txt",
    "111_Proyectos_Empresariales.txt.txt",
    "157_Analisis_Numerico.txt.txt",
    "104_Finanzas_Corporativas.txt.txt",
    "29_Teoria_de_la_Computacion.txt.txt",
    "92_Proyecto_Integrador_EDU.txt.txt",
    "40_Practica_Pre-Profesional_PASEM.txt.txt",
    "70_Gerencia_Financiera_HSP.txt.txt",
    "136_Laboratorio_de_Creacion_3.txt.txt",
    "5_Cosmos.txt.txt",
    "144_Programacion_Para_DiseÃ±o_2.txt.txt",
    "159_Combinatoria_y_Grafos.txt.txt",
    "103_Coloquios_adm.txt.txt",
    "120_Laboratorio_de_Creacion_1.txt.txt",
    "175_Lenguaje_del_Cine.txt.txt",
    "133_Taller_de_Arte_2.txt.txt",
    "37_Redes_+Lab.txt.txt",
    "146_Juegos_y_Narrativa.txt.txt",
    "167_Topologia_2.txt.txt",
    "139_Taller_de_Arte_3.txt.txt",
    "141_Temas_en_Comunicacion_y_Arte.txt.txt",
    "124_Teoria_Critica_1__Arte_&_Media.txt.txt"
]  # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = get_similar_documents_manhattan(query_vector, document_data, selected_documents)

for i in range(len(similarity_results)):
    print(f"Document: {similarity_results[i][0]}, Distance: {similarity_results[i][1]}")

Document: 45_Proyecto_Integrador_CMP.txt.txt, Distance: 27.678502556181382
Document: 131_Taller_de_Investigacion.txt.txt, Distance: 27.942420305467405
Document: 139_Taller_de_Arte_3.txt.txt, Distance: 28.104180866393172
Document: 133_Taller_de_Arte_2.txt.txt, Distance: 28.45511766178793
Document: 182_Control_de_Produccion.txt.txt, Distance: 28.699320011017228
Document: 185_Ingenieria_de_la_Calidad_+_Lab.txt.txt, Distance: 28.72839556646399
Document: 127_Laboratorio_de_Creacion_2.txt.txt, Distance: 29.109499703600726
Document: 117_Herramientas_Digitales_1.txt.txt, Distance: 29.511434263588797
Document: 102_Principios_de_Finanzas.txt.txt, Distance: 29.542075408142285
Document: 135_Gestion_y_Produccion_Cultural.txt.txt, Distance: 29.715776869754706
Document: 111_Proyectos_Empresariales.txt.txt, Distance: 30.00270141073925
Document: 176_Lenguaje_Visual_y_Montaje.txt.txt, Distance: 30.0313369934732
Document: 99_Gerencia_de_Costos.txt.txt, Distance: 30.130180489269126
Document: 136_Laborator

In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
                                 '11', '12', '13', '14', '15', '16', '17', '18', '19']].iloc[2].values

# Select specific document names
selected_documents = [
    "185_Ingenieria_de_la_Calidad_+_Lab.txt.txt",
    "24_Aprendizaje_y_Servicio_PASEC.txt.txt",
    "11_Ser_y_Cosmos.txt.txt",
    "155_Logica_y_Teoria_de_Conjuntos.txt.txt",
    "171_Barra_para_danza_contemporanea.txt.txt",
    "129_Coloquios_ART.txt.txt",
    "10_Autoconocimiento.txt.txt",
    "35_Base_de_Datos.txt.txt",
    "130_Arte_y_Contexto_Social.txt.txt",
    "44_Aplicaciones_Distribuidas.txt.txt",
    "62_Pasteleria.txt.txt",
    "151_Ecuaciones_Diferenciales.txt.txt",
    "125_Nuevos_Medios.txt.txt",
    "100_Principios_de_Seguros.txt.txt",
    "72_Fundamentos_de_la_Educacion.txt.txt",
    "102_Principios_de_Finanzas.txt.txt",
    "181_Inv._de_Operaciones_1_+Lab.txt.txt",
    "4_Quimica_General_1_+Lab_Ej.txt.txt",
    "138_Enfasis_3.txt.txt",
    "135_Gestion_y_Produccion_Cultural.txt.txt",
    "99_Gerencia_de_Costos.txt.txt",
    "17_Fisica_para_Ingenieria_1_+Lab_Ej.txt.txt",
    "127_Laboratorio_de_Creacion_2.txt.txt",
    "176_Lenguaje_Visual_y_Montaje.txt.txt",
    "80_Ensenanza_de_Lenguaje.txt.txt",
    "66_Reposteria_y_Chocolateria.txt.txt",
    "30_Programacion_Avanzada_de_Apps.txt.txt",
    "53_Conceptos_y_Tecnicas_2_+PRA.txt.txt",
    "50_Nutricion_Humana_+Lab.txt.txt",
    "182_Control_de_Produccion.txt.txt",
    "118_Fundamentos_de_las_Artes.txt.txt",
    "117_Herramientas_Digitales_1.txt.txt",
    "165_Algebra_Abstracta_2.txt.txt",
    "15_Matematicas_Discretas.txt.txt",
    "115_Tributacion_y_Entorno_Legal.txt.txt",
    "2_Taller_de_Ing._Cs._Computacion.txt.txt",
    "12_Ingles_Nivel_3.txt.txt",
    "126_Arte_Contemporaneo.txt.txt",
    "21_Programacion_de_Apps.txt.txt",
    "14_Programacion_Avanzada_en_C++.txt.txt",
    "154_Fundamentos_de_Geometria.txt.txt",
    "149_Teoria_de_Grupos.txt.txt",
    "152_Calculo_para_Ciencias_2.txt.txt",
    "16_Introduccion_a_la_Economia.txt.txt",
    "68_Enologia_y_Cocteleria.txt.txt",
    "143_Programacion_Para_DiseÃ±o_1.txt.txt",
    "108_Economia_y_Negocios.txt.txt",
    "98_Estadistica_Empresarial_+Lab.txt.txt",
    "121_Dibujo_para_Arte_y_Diseno.txt.txt",
    "45_Proyecto_Integrador_CMP.txt.txt",
    "131_Taller_de_Investigacion.txt.txt",
    "9_Calculo_Integral_+_Ej.txt.txt",
    "23_Fisica_para_Ingenieria_2_+Lab_Ej.txt.txt",
    "107_Operaciones_+Lab.txt.txt",
    "145_Programacion_Para_DiseÃ±o_3.txt.txt",
    "94_Zoologia_+Lab.txt.txt",
    "112_Gestion_del_Talento.txt.txt",
    "77_Neurociencia_y_Educacion.txt.txt",
    "166_Topologia_1.txt.txt",
    "148_Calculo_para_Ciencias_1.txt.txt",
    "31_Organizacion_de_Computadores.txt.txt",
    "46_Matematica_Empresarial_+Ej.txt.txt",
    "39_Proyectos__Gerencia_y_Analisis.txt.txt",
    "27_Estructuras_de_Datos.txt.txt",
    "41_Coloquios_ING.txt.txt",
    "111_Proyectos_Empresariales.txt.txt",
    "157_Analisis_Numerico.txt.txt",
    "104_Finanzas_Corporativas.txt.txt",
    "29_Teoria_de_la_Computacion.txt.txt",
    "92_Proyecto_Integrador_EDU.txt.txt",
    "40_Practica_Pre-Profesional_PASEM.txt.txt",
    "70_Gerencia_Financiera_HSP.txt.txt",
    "136_Laboratorio_de_Creacion_3.txt.txt",
    "5_Cosmos.txt.txt",
    "144_Programacion_Para_DiseÃ±o_2.txt.txt",
    "159_Combinatoria_y_Grafos.txt.txt",
    "103_Coloquios_adm.txt.txt",
    "120_Laboratorio_de_Creacion_1.txt.txt",
    "175_Lenguaje_del_Cine.txt.txt",
    "133_Taller_de_Arte_2.txt.txt",
    "37_Redes_+Lab.txt.txt",
    "146_Juegos_y_Narrativa.txt.txt",
    "167_Topologia_2.txt.txt",
    "139_Taller_de_Arte_3.txt.txt",
    "141_Temas_en_Comunicacion_y_Arte.txt.txt",
    "124_Teoria_Critica_1__Arte_&_Media.txt.txt"
]  # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = get_similar_documents_manhattan(query_vector, document_data, selected_documents)

for i in range(len(similarity_results)):
    print(f"Document: {similarity_results[i][0]}, Distance: {similarity_results[i][1]}")

Document: 45_Proyecto_Integrador_CMP.txt.txt, Distance: 27.630208986228084
Document: 131_Taller_de_Investigacion.txt.txt, Distance: 27.868032376908047
Document: 139_Taller_de_Arte_3.txt.txt, Distance: 27.917742983540215
Document: 133_Taller_de_Arte_2.txt.txt, Distance: 28.22453862316029
Document: 185_Ingenieria_de_la_Calidad_+_Lab.txt.txt, Distance: 28.441672876896728
Document: 182_Control_de_Produccion.txt.txt, Distance: 28.518986316136573
Document: 127_Laboratorio_de_Creacion_2.txt.txt, Distance: 28.830415955849315
Document: 102_Principios_de_Finanzas.txt.txt, Distance: 29.255352718575015
Document: 135_Gestion_y_Produccion_Cultural.txt.txt, Distance: 29.323108413866148
Document: 117_Herramientas_Digitales_1.txt.txt, Distance: 29.34130241229846
Document: 111_Proyectos_Empresariales.txt.txt, Distance: 29.715978721171982
Document: 99_Gerencia_de_Costos.txt.txt, Distance: 29.76773602378484
Document: 14_Programacion_Avanzada_en_C++.txt.txt, Distance: 30.069566604625635
Document: 124_Teori

In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
                                 '11', '12', '13', '14', '15', '16', '17', '18', '19']].iloc[3].values

# Select specific document names
selected_documents = [
    "185_Ingenieria_de_la_Calidad_+_Lab.txt.txt",
    "24_Aprendizaje_y_Servicio_PASEC.txt.txt",
    "11_Ser_y_Cosmos.txt.txt",
    "155_Logica_y_Teoria_de_Conjuntos.txt.txt",
    "171_Barra_para_danza_contemporanea.txt.txt",
    "129_Coloquios_ART.txt.txt",
    "10_Autoconocimiento.txt.txt",
    "35_Base_de_Datos.txt.txt",
    "130_Arte_y_Contexto_Social.txt.txt",
    "44_Aplicaciones_Distribuidas.txt.txt",
    "62_Pasteleria.txt.txt",
    "151_Ecuaciones_Diferenciales.txt.txt",
    "125_Nuevos_Medios.txt.txt",
    "100_Principios_de_Seguros.txt.txt",
    "72_Fundamentos_de_la_Educacion.txt.txt",
    "102_Principios_de_Finanzas.txt.txt",
    "181_Inv._de_Operaciones_1_+Lab.txt.txt",
    "4_Quimica_General_1_+Lab_Ej.txt.txt",
    "138_Enfasis_3.txt.txt",
    "135_Gestion_y_Produccion_Cultural.txt.txt",
    "99_Gerencia_de_Costos.txt.txt",
    "17_Fisica_para_Ingenieria_1_+Lab_Ej.txt.txt",
    "127_Laboratorio_de_Creacion_2.txt.txt",
    "176_Lenguaje_Visual_y_Montaje.txt.txt",
    "80_Ensenanza_de_Lenguaje.txt.txt",
    "66_Reposteria_y_Chocolateria.txt.txt",
    "30_Programacion_Avanzada_de_Apps.txt.txt",
    "53_Conceptos_y_Tecnicas_2_+PRA.txt.txt",
    "50_Nutricion_Humana_+Lab.txt.txt",
    "182_Control_de_Produccion.txt.txt",
    "118_Fundamentos_de_las_Artes.txt.txt",
    "117_Herramientas_Digitales_1.txt.txt",
    "165_Algebra_Abstracta_2.txt.txt",
    "15_Matematicas_Discretas.txt.txt",
    "115_Tributacion_y_Entorno_Legal.txt.txt",
    "2_Taller_de_Ing._Cs._Computacion.txt.txt",
    "12_Ingles_Nivel_3.txt.txt",
    "126_Arte_Contemporaneo.txt.txt",
    "21_Programacion_de_Apps.txt.txt",
    "14_Programacion_Avanzada_en_C++.txt.txt",
    "154_Fundamentos_de_Geometria.txt.txt",
    "149_Teoria_de_Grupos.txt.txt",
    "152_Calculo_para_Ciencias_2.txt.txt",
    "16_Introduccion_a_la_Economia.txt.txt",
    "68_Enologia_y_Cocteleria.txt.txt",
    "143_Programacion_Para_DiseÃ±o_1.txt.txt",
    "108_Economia_y_Negocios.txt.txt",
    "98_Estadistica_Empresarial_+Lab.txt.txt",
    "121_Dibujo_para_Arte_y_Diseno.txt.txt",
    "45_Proyecto_Integrador_CMP.txt.txt",
    "131_Taller_de_Investigacion.txt.txt",
    "9_Calculo_Integral_+_Ej.txt.txt",
    "23_Fisica_para_Ingenieria_2_+Lab_Ej.txt.txt",
    "107_Operaciones_+Lab.txt.txt",
    "145_Programacion_Para_DiseÃ±o_3.txt.txt",
    "94_Zoologia_+Lab.txt.txt",
    "112_Gestion_del_Talento.txt.txt",
    "77_Neurociencia_y_Educacion.txt.txt",
    "166_Topologia_1.txt.txt",
    "148_Calculo_para_Ciencias_1.txt.txt",
    "31_Organizacion_de_Computadores.txt.txt",
    "46_Matematica_Empresarial_+Ej.txt.txt",
    "39_Proyectos__Gerencia_y_Analisis.txt.txt",
    "27_Estructuras_de_Datos.txt.txt",
    "41_Coloquios_ING.txt.txt",
    "111_Proyectos_Empresariales.txt.txt",
    "157_Analisis_Numerico.txt.txt",
    "104_Finanzas_Corporativas.txt.txt",
    "29_Teoria_de_la_Computacion.txt.txt",
    "92_Proyecto_Integrador_EDU.txt.txt",
    "40_Practica_Pre-Profesional_PASEM.txt.txt",
    "70_Gerencia_Financiera_HSP.txt.txt",
    "136_Laboratorio_de_Creacion_3.txt.txt",
    "5_Cosmos.txt.txt",
    "144_Programacion_Para_DiseÃ±o_2.txt.txt",
    "159_Combinatoria_y_Grafos.txt.txt",
    "103_Coloquios_adm.txt.txt",
    "120_Laboratorio_de_Creacion_1.txt.txt",
    "175_Lenguaje_del_Cine.txt.txt",
    "133_Taller_de_Arte_2.txt.txt",
    "37_Redes_+Lab.txt.txt",
    "146_Juegos_y_Narrativa.txt.txt",
    "167_Topologia_2.txt.txt",
    "139_Taller_de_Arte_3.txt.txt",
    "141_Temas_en_Comunicacion_y_Arte.txt.txt",
    "124_Teoria_Critica_1__Arte_&_Media.txt.txt"
]  # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = get_similar_documents_manhattan(query_vector, document_data, selected_documents)

for i in range(len(similarity_results)):
    print(f"Document: {similarity_results[i][0]}, Distance: {similarity_results[i][1]}")

Document: 131_Taller_de_Investigacion.txt.txt, Distance: 33.99120048523996
Document: 14_Programacion_Avanzada_en_C++.txt.txt, Distance: 34.09665376799487
Document: 133_Taller_de_Arte_2.txt.txt, Distance: 34.96208804938308
Document: 185_Ingenieria_de_la_Calidad_+_Lab.txt.txt, Distance: 35.07928391402657
Document: 102_Principios_de_Finanzas.txt.txt, Distance: 35.22498278005551
Document: 127_Laboratorio_de_Creacion_2.txt.txt, Distance: 35.33555709681799
Document: 148_Calculo_para_Ciencias_1.txt.txt, Distance: 35.40242448508953
Document: 139_Taller_de_Arte_3.txt.txt, Distance: 35.42845334538103
Document: 9_Calculo_Integral_+_Ej.txt.txt, Distance: 35.57213560183388
Document: 135_Gestion_y_Produccion_Cultural.txt.txt, Distance: 35.61097251534226
Document: 111_Proyectos_Empresariales.txt.txt, Distance: 35.69050341785657
Document: 130_Arte_y_Contexto_Social.txt.txt, Distance: 35.715762836479506
Document: 118_Fundamentos_de_las_Artes.txt.txt, Distance: 35.77888453665886
Document: 45_Proyecto_In

In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
                                 '11', '12', '13', '14', '15', '16', '17', '18', '19']].iloc[4].values

# Select specific document names
selected_documents = [
    "185_Ingenieria_de_la_Calidad_+_Lab.txt.txt",
    "24_Aprendizaje_y_Servicio_PASEC.txt.txt",
    "11_Ser_y_Cosmos.txt.txt",
    "155_Logica_y_Teoria_de_Conjuntos.txt.txt",
    "171_Barra_para_danza_contemporanea.txt.txt",
    "129_Coloquios_ART.txt.txt",
    "10_Autoconocimiento.txt.txt",
    "35_Base_de_Datos.txt.txt",
    "130_Arte_y_Contexto_Social.txt.txt",
    "44_Aplicaciones_Distribuidas.txt.txt",
    "62_Pasteleria.txt.txt",
    "151_Ecuaciones_Diferenciales.txt.txt",
    "125_Nuevos_Medios.txt.txt",
    "100_Principios_de_Seguros.txt.txt",
    "72_Fundamentos_de_la_Educacion.txt.txt",
    "102_Principios_de_Finanzas.txt.txt",
    "181_Inv._de_Operaciones_1_+Lab.txt.txt",
    "4_Quimica_General_1_+Lab_Ej.txt.txt",
    "138_Enfasis_3.txt.txt",
    "135_Gestion_y_Produccion_Cultural.txt.txt",
    "99_Gerencia_de_Costos.txt.txt",
    "17_Fisica_para_Ingenieria_1_+Lab_Ej.txt.txt",
    "127_Laboratorio_de_Creacion_2.txt.txt",
    "176_Lenguaje_Visual_y_Montaje.txt.txt",
    "80_Ensenanza_de_Lenguaje.txt.txt",
    "66_Reposteria_y_Chocolateria.txt.txt",
    "30_Programacion_Avanzada_de_Apps.txt.txt",
    "53_Conceptos_y_Tecnicas_2_+PRA.txt.txt",
    "50_Nutricion_Humana_+Lab.txt.txt",
    "182_Control_de_Produccion.txt.txt",
    "118_Fundamentos_de_las_Artes.txt.txt",
    "117_Herramientas_Digitales_1.txt.txt",
    "165_Algebra_Abstracta_2.txt.txt",
    "15_Matematicas_Discretas.txt.txt",
    "115_Tributacion_y_Entorno_Legal.txt.txt",
    "2_Taller_de_Ing._Cs._Computacion.txt.txt",
    "12_Ingles_Nivel_3.txt.txt",
    "126_Arte_Contemporaneo.txt.txt",
    "21_Programacion_de_Apps.txt.txt",
    "14_Programacion_Avanzada_en_C++.txt.txt",
    "154_Fundamentos_de_Geometria.txt.txt",
    "149_Teoria_de_Grupos.txt.txt",
    "152_Calculo_para_Ciencias_2.txt.txt",
    "16_Introduccion_a_la_Economia.txt.txt",
    "68_Enologia_y_Cocteleria.txt.txt",
    "143_Programacion_Para_DiseÃ±o_1.txt.txt",
    "108_Economia_y_Negocios.txt.txt",
    "98_Estadistica_Empresarial_+Lab.txt.txt",
    "121_Dibujo_para_Arte_y_Diseno.txt.txt",
    "45_Proyecto_Integrador_CMP.txt.txt",
    "131_Taller_de_Investigacion.txt.txt",
    "9_Calculo_Integral_+_Ej.txt.txt",
    "23_Fisica_para_Ingenieria_2_+Lab_Ej.txt.txt",
    "107_Operaciones_+Lab.txt.txt",
    "145_Programacion_Para_DiseÃ±o_3.txt.txt",
    "94_Zoologia_+Lab.txt.txt",
    "112_Gestion_del_Talento.txt.txt",
    "77_Neurociencia_y_Educacion.txt.txt",
    "166_Topologia_1.txt.txt",
    "148_Calculo_para_Ciencias_1.txt.txt",
    "31_Organizacion_de_Computadores.txt.txt",
    "46_Matematica_Empresarial_+Ej.txt.txt",
    "39_Proyectos__Gerencia_y_Analisis.txt.txt",
    "27_Estructuras_de_Datos.txt.txt",
    "41_Coloquios_ING.txt.txt",
    "111_Proyectos_Empresariales.txt.txt",
    "157_Analisis_Numerico.txt.txt",
    "104_Finanzas_Corporativas.txt.txt",
    "29_Teoria_de_la_Computacion.txt.txt",
    "92_Proyecto_Integrador_EDU.txt.txt",
    "40_Practica_Pre-Profesional_PASEM.txt.txt",
    "70_Gerencia_Financiera_HSP.txt.txt",
    "136_Laboratorio_de_Creacion_3.txt.txt",
    "5_Cosmos.txt.txt",
    "144_Programacion_Para_DiseÃ±o_2.txt.txt",
    "159_Combinatoria_y_Grafos.txt.txt",
    "103_Coloquios_adm.txt.txt",
    "120_Laboratorio_de_Creacion_1.txt.txt",
    "175_Lenguaje_del_Cine.txt.txt",
    "133_Taller_de_Arte_2.txt.txt",
    "37_Redes_+Lab.txt.txt",
    "146_Juegos_y_Narrativa.txt.txt",
    "167_Topologia_2.txt.txt",
    "139_Taller_de_Arte_3.txt.txt",
    "141_Temas_en_Comunicacion_y_Arte.txt.txt",
    "124_Teoria_Critica_1__Arte_&_Media.txt.txt"
]  # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = get_similar_documents_manhattan(query_vector, document_data, selected_documents)

for i in range(len(similarity_results)):
    print(f"Document: {similarity_results[i][0]}, Distance: {similarity_results[i][1]}")

Document: 45_Proyecto_Integrador_CMP.txt.txt, Distance: 27.57433640373703
Document: 131_Taller_de_Investigacion.txt.txt, Distance: 28.00597215749878
Document: 139_Taller_de_Arte_3.txt.txt, Distance: 28.055682764130943
Document: 185_Ingenieria_de_la_Calidad_+_Lab.txt.txt, Distance: 28.31177512752643
Document: 133_Taller_de_Arte_2.txt.txt, Distance: 28.362478403751016
Document: 182_Control_de_Produccion.txt.txt, Distance: 28.848497183041104
Document: 102_Principios_de_Finanzas.txt.txt, Distance: 28.983214654698727
Document: 135_Gestion_y_Produccion_Cultural.txt.txt, Distance: 29.083544833509038
Document: 127_Laboratorio_de_Creacion_2.txt.txt, Distance: 29.157269596570423
Document: 99_Gerencia_de_Costos.txt.txt, Distance: 29.396541173969283
Document: 117_Herramientas_Digitales_1.txt.txt, Distance: 29.502736332909418
Document: 111_Proyectos_Empresariales.txt.txt, Distance: 29.652710924116715
Document: 124_Teoria_Critica_1__Arte_&_Media.txt.txt, Distance: 29.658153270646306
Document: 141_Te

# For tf_idf_2 test best umap:
2 comp,

20 neigh,

min_dist 0.7,

metric cosine

## Data preparation ##

In [None]:
file_path_documents = 'tf_idf_documentos_2.csv'
X_doc, first_column_doc = read_and_prepare_data(file_path_documents, 'standard')

file_path_queries = 'queries_vector_2.csv'
X_queries, first_column_queries = read_and_prepare_data(file_path_queries, 'standard')

## Isomap ##

In [None]:
param_grid = {
    'n_components': [30],
    'n_neighbors': [60]
}

run_isomap_with_tsne(X_doc, X_queries, param_grid, first_column_doc, first_column_queries)

Saved Isomap doc result to Isomap_Results/isomap_ncomp30_nneigh60.csv
Saved Isomap query result to Isomap_Results/isomap_query_ncomp30_nneigh60.csv


[]

## Clustering ##

In [None]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min
from sklearn.preprocessing import StandardScaler

def cluster_documents_and_assign_queries(doc_csv, query_csv, n_clusters, output_csv):
    """
    Clusters documents using K-Means and assigns queries to the closest cluster.

    Parameters:
    - doc_csv: Path to the CSV file containing reduced dimensionality of documents.
    - query_csv: Path to the CSV file containing reduced dimensionality of queries.
    - n_clusters: Number of clusters for K-Means.
    - output_csv: File path to save query-to-cluster assignment results.
    """
    # Load the reduced dimensions for documents and queries
    doc_data = pd.read_csv(doc_csv)
    query_data = pd.read_csv(query_csv)

    # Extract feature vectors and document/query names
    document_vectors = doc_data[['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
                                 '11', '12', '13', '14', '15', '16', '17', '18', '19',
                                '20', '21', '22', '23', '24', '25', '26', '27', '28', '29']].values

    document_names = doc_data['original_column'].values  # Document names

    query_vectors = query_data[['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
                                 '11', '12', '13', '14', '15', '16', '17', '18', '19',
                                '20', '21', '22', '23', '24', '25', '26', '27', '28', '29']].values
    query_names = query_data['original_column'].values  # Query names

    # Apply K-Means clustering to documents
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    kmeans.fit(document_vectors)

    # Get the cluster assignments for documents
    doc_clusters = kmeans.labels_

    # Assign queries to the closest cluster
    query_cluster_indices, _ = pairwise_distances_argmin_min(query_vectors, kmeans.cluster_centers_)

    # Prepare query-cluster matches
    query_matches = []
    for query_name, query_cluster_idx in zip(query_names, query_cluster_indices):
        # Get documents belonging to the same cluster
        matched_docs = [
            document_names[idx] for idx, cluster in enumerate(doc_clusters) if cluster == query_cluster_idx
        ]
        query_matches.append({
            'Query': query_name,
            'Assigned_Cluster': query_cluster_idx,
            'Matched_Documents': ', '.join(matched_docs)  # Join matched document names
        })

    # Save results to a CSV
    matches_df = pd.DataFrame(query_matches)
    matches_df.to_csv(output_csv, index=False)
    print(f"Query-to-cluster assignments saved to {output_csv}")


In [None]:
# Example usage
doc_csv = "/content/Isomap_Results/isomap_ncomp30_nneigh60.csv"   # CSV with reduced dimensions of documents
query_csv = "/content/Isomap_Results/isomap_query_ncomp30_nneigh60.csv"   # CSV with reduced dimensions of queries
output_csv = "query_cluster_matches_2.csv"  # Output CSV with matches
n_clusters = 22  # Number of neighbors for KNN

cluster_documents_and_assign_queries(doc_csv, query_csv, n_clusters, output_csv)

Query-to-cluster assignments saved to query_cluster_matches_2.csv


## Similarity ##

## Cosine ##

In [None]:
import numpy as np

def get_similar_documents_manhattan(query_vector, document_data, selected_documents):

    # Filter the document data to include only the selected documents
    filtered_data = document_data[document_data['original_column'].isin(selected_documents)]

    # Extract the document names and their corresponding vectors
    document_names = filtered_data['original_column'].values
    document_vectors = filtered_data[['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
                                 '11', '12', '13', '14', '15', '16', '17', '18', '19',
                                '20', '21', '22', '23', '24', '25', '26', '27', '28', '29']].values  # Adjust if there are more dimensions

    # Compute Manhattan distances between the query vector and all selected document vectors
    distances = np.sum(np.abs(document_vectors - query_vector), axis=1)

    # Combine document names with their distances and sort by distance
    results = list(zip(document_names, distances))
    results.sort(key=lambda x: x[1])  # Sort by distance (ascending)

    return results


In [None]:
# Example usage
document_csv = "/content/Isomap_Results/isomap_ncomp30_nneigh60.csv"
query_csv = "/content/Isomap_Results/isomap_query_ncomp30_nneigh60.csv"

# Load data
document_data = pd.read_csv(document_csv)
query_data = pd.read_csv(query_csv)

In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
                                 '11', '12', '13', '14', '15', '16', '17', '18', '19',
                                '20', '21', '22', '23', '24', '25', '26', '27', '28', '29']].iloc[0].values

selected_documents = [
    "184_Ergonomia.txt.txt", "105_Marketing_Digital.txt.txt", "32_Emprendimiento.txt.txt",
    "185_Ingenieria_de_la_Calidad_+_Lab.txt.txt", "24_Aprendizaje_y_Servicio_PASEC.txt.txt",
    "3_Calculo_Diferencial_+_Ej.txt.txt", "11_Ser_y_Cosmos.txt.txt",
    "89_Ensenanza_Integrada_de_CITIAM.txt.txt", "155_Logica_y_Teoria_de_Conjuntos.txt.txt",
    "26_Electronica_Basica_+Lab.txt.txt", "7_Ingles_Nivel_2.txt.txt",
    "65_Alta_Cocina_Mundial.txt.txt", "110_Creatividad_Empresarial.txt.txt",
    "161_Algebra_Lineal_2.txt.txt", "183_Procesos,_Metodos_y_Estandares.txt.txt",
    "129_Coloquios_ART.txt.txt", "116_Analisis_Estrategico_ADM.txt.txt",
    "10_Autoconocimiento.txt.txt", "47_Conceptos_y_Tecnicas_1.txt.txt", "35_Base_de_Datos.txt.txt",
    "119_Composicion_Visual_1.txt.txt", "79_Coloquios_EDU.txt.txt",
    "130_Arte_y_Contexto_Social.txt.txt", "44_Aplicaciones_Distribuidas.txt.txt",
    "62_Pasteleria.txt.txt", "151_Ecuaciones_Diferenciales.txt.txt", "18_Calculo_Vectorial.txt.txt",
    "125_Nuevos_Medios.txt.txt", "100_Principios_de_Seguros.txt.txt",
    "72_Fundamentos_de_la_Educacion.txt.txt", "58_Introduccion_a_la_Biologia_+Ej.txt.txt",
    "102_Principios_de_Finanzas.txt.txt", "181_Inv._de_Operaciones_1_+Lab.txt.txt",
    "186_Sistemas_Lean.txt.txt", "4_Quimica_General_1_+Lab_Ej.txt.txt", "138_Enfasis_3.txt.txt",
    "135_Gestion_y_Produccion_Cultural.txt.txt", "162_Algebra_Abstracta_1.txt.txt",
    "99_Gerencia_de_Costos.txt.txt", "127_Laboratorio_de_Creacion_2.txt.txt",
    "176_Lenguaje_Visual_y_Montaje.txt.txt", "66_Reposteria_y_Chocolateria.txt.txt",
    "30_Programacion_Avanzada_de_Apps.txt.txt", "53_Conceptos_y_Tecnicas_2_+PRA.txt.txt",
    "50_Nutricion_Humana_+Lab.txt.txt", "182_Control_de_Produccion.txt.txt",
    "118_Fundamentos_de_las_Artes.txt.txt", "164_Analisis_Funcional.txt.txt",
    "153_Introduccion_a_Probabilidades.txt.txt", "117_Herramientas_Digitales_1.txt.txt",
    "150_Variable_Compleja.txt.txt", "83_Ensenanza_de_Matematicas.txt.txt",
    "165_Algebra_Abstracta_2.txt.txt", "140_Arte_y_Educacion___Curaduria.txt.txt",
    "1_Escritura_Academica.txt.txt", "15_Matematicas_Discretas.txt.txt",
    "115_Tributacion_y_Entorno_Legal.txt.txt", "163_Geometria_Diferencial.txt.txt",
    "22_Algebra_Lineal_1_+Ej.txt.txt", "123_Fundamentos_de_Escultura.txt.txt",
    "126_Arte_Contemporaneo.txt.txt", "86_Bilingualism.txt.txt",
    "180_DiseÃ±o_de_Produccion.txt.txt", "97_Principios_de_Marketing.txt.txt",
    "109_Investigacion_de_Mercados.txt.txt", "21_Programacion_de_Apps.txt.txt",
    "137_Matematicas_Cotidianas.txt.txt", "14_Programacion_Avanzada_en_C++.txt.txt",
    "154_Fundamentos_de_Geometria.txt.txt", "13_Ingles_Nivel_4.txt.txt",
    "149_Teoria_de_Grupos.txt.txt", "142_Produccion_&_Exhibicion.txt.txt",
    "152_Calculo_para_Ciencias_2.txt.txt", "81_Practica_1.txt.txt",
    "16_Introduccion_a_la_Economia.txt.txt", "75_Planificacion_y_Evaluacion_1.txt.txt",
    "8_Programacion_en_C++_+Ej.txt.txt", "143_Programacion_Para_DiseÃ±o_1.txt.txt",
    "108_Economia_y_Negocios.txt.txt", "98_Estadistica_Empresarial_+Lab.txt.txt",
    "82_Metodologias_de_Ensenanza.txt.txt", "160_Ecuaciones_Diferenciales_Parciales.txt.txt",
    "121_Dibujo_para_Arte_y_Diseno.txt.txt", "131_Taller_de_Investigacion.txt.txt",
    "9_Calculo_Integral_+_Ej.txt.txt", "132_Enfasis_1.txt.txt", "107_Operaciones_+Lab.txt.txt",
    "145_Programacion_Para_DiseÃ±o_3.txt.txt", "113_Negocios_Internacionales.txt.txt",
    "112_Gestion_del_Talento.txt.txt", "77_Neurociencia_y_Educacion.txt.txt",
    "166_Topologia_1.txt.txt", "134_Enfasis_2.txt.txt", "148_Calculo_para_Ciencias_1.txt.txt",
    "46_Matematica_Empresarial_+Ej.txt.txt", "147_Modelado_3D_1.txt.txt",
    "114_Innovacion_y_Sustentabilidad.txt.txt", "39_Proyectos__Gerencia_y_Analisis.txt.txt",
    "27_Estructuras_de_Datos.txt.txt", "174_Proyecto_final_en_Danza.txt.txt",
    "173_Performance.txt.txt", "111_Proyectos_Empresariales.txt.txt", "157_Analisis_Numerico.txt.txt",
    "29_Teoria_de_la_Computacion.txt.txt", "168_Analisis_Real.txt.txt",
    "92_Proyecto_Integrador_EDU.txt.txt", "38_Sistemas_Operativos.txt.txt",
    "40_Practica_Pre-Profesional_PASEM.txt.txt", "136_Laboratorio_de_Creacion_3.txt.txt",
    "5_Cosmos.txt.txt", "128_Taller_de_Arte_1.txt.txt", "49_Panaderia.txt.txt",
    "144_Programacion_Para_DiseÃ±o_2.txt.txt", "103_Coloquios_adm.txt.txt",
    "120_Laboratorio_de_Creacion_1.txt.txt", "175_Lenguaje_del_Cine.txt.txt",
    "133_Taller_de_Arte_2.txt.txt", "146_Juegos_y_Narrativa.txt.txt",
    "167_Topologia_2.txt.txt", "139_Taller_de_Arte_3.txt.txt",
    "33_DiseÃ±o_de_Sistemas.txt.txt", "141_Temas_en_Comunicacion_y_Arte.txt.txt",
    "124_Teoria_Critica_1__Arte_&_Media.txt.txt", "158_Teoria_de_Numeros.txt.txt"
]
 # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = calculate_similarity_for_selected_documents(document_data, query_vector, selected_documents)

print(similarity_results)

                                document  similarity
0               35_Base_de_Datos.txt.txt    0.891697
1                173_Performance.txt.txt    0.465911
2              147_Modelado_3D_1.txt.txt    0.465778
3      163_Geometria_Diferencial.txt.txt    0.433202
4          105_Marketing_Digital.txt.txt    0.411399
..                                   ...         ...
114  110_Creatividad_Empresarial.txt.txt   -0.323733
115      162_Algebra_Abstracta_1.txt.txt   -0.327588
116  142_Produccion_&_Exhibicion.txt.txt   -0.339769
117  29_Teoria_de_la_Computacion.txt.txt   -0.444330
118             79_Coloquios_EDU.txt.txt   -0.465267

[119 rows x 2 columns]


In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
                                 '11', '12', '13', '14', '15', '16', '17', '18', '19',
                                '20', '21', '22', '23', '24', '25', '26', '27', '28', '29']].iloc[1].values

selected_documents = [
    "184_Ergonomia.txt.txt", "105_Marketing_Digital.txt.txt", "32_Emprendimiento.txt.txt",
    "185_Ingenieria_de_la_Calidad_+_Lab.txt.txt", "24_Aprendizaje_y_Servicio_PASEC.txt.txt",
    "3_Calculo_Diferencial_+_Ej.txt.txt", "11_Ser_y_Cosmos.txt.txt",
    "89_Ensenanza_Integrada_de_CITIAM.txt.txt", "155_Logica_y_Teoria_de_Conjuntos.txt.txt",
    "26_Electronica_Basica_+Lab.txt.txt", "7_Ingles_Nivel_2.txt.txt",
    "65_Alta_Cocina_Mundial.txt.txt", "110_Creatividad_Empresarial.txt.txt",
    "161_Algebra_Lineal_2.txt.txt", "183_Procesos,_Metodos_y_Estandares.txt.txt",
    "129_Coloquios_ART.txt.txt", "116_Analisis_Estrategico_ADM.txt.txt",
    "10_Autoconocimiento.txt.txt", "47_Conceptos_y_Tecnicas_1.txt.txt", "35_Base_de_Datos.txt.txt",
    "119_Composicion_Visual_1.txt.txt", "79_Coloquios_EDU.txt.txt",
    "130_Arte_y_Contexto_Social.txt.txt", "44_Aplicaciones_Distribuidas.txt.txt",
    "62_Pasteleria.txt.txt", "151_Ecuaciones_Diferenciales.txt.txt", "18_Calculo_Vectorial.txt.txt",
    "125_Nuevos_Medios.txt.txt", "100_Principios_de_Seguros.txt.txt",
    "72_Fundamentos_de_la_Educacion.txt.txt", "58_Introduccion_a_la_Biologia_+Ej.txt.txt",
    "102_Principios_de_Finanzas.txt.txt", "181_Inv._de_Operaciones_1_+Lab.txt.txt",
    "186_Sistemas_Lean.txt.txt", "4_Quimica_General_1_+Lab_Ej.txt.txt", "138_Enfasis_3.txt.txt",
    "135_Gestion_y_Produccion_Cultural.txt.txt", "162_Algebra_Abstracta_1.txt.txt",
    "99_Gerencia_de_Costos.txt.txt", "127_Laboratorio_de_Creacion_2.txt.txt",
    "176_Lenguaje_Visual_y_Montaje.txt.txt", "66_Reposteria_y_Chocolateria.txt.txt",
    "30_Programacion_Avanzada_de_Apps.txt.txt", "53_Conceptos_y_Tecnicas_2_+PRA.txt.txt",
    "50_Nutricion_Humana_+Lab.txt.txt", "182_Control_de_Produccion.txt.txt",
    "118_Fundamentos_de_las_Artes.txt.txt", "164_Analisis_Funcional.txt.txt",
    "153_Introduccion_a_Probabilidades.txt.txt", "117_Herramientas_Digitales_1.txt.txt",
    "150_Variable_Compleja.txt.txt", "83_Ensenanza_de_Matematicas.txt.txt",
    "165_Algebra_Abstracta_2.txt.txt", "140_Arte_y_Educacion___Curaduria.txt.txt",
    "1_Escritura_Academica.txt.txt", "15_Matematicas_Discretas.txt.txt",
    "115_Tributacion_y_Entorno_Legal.txt.txt", "163_Geometria_Diferencial.txt.txt",
    "22_Algebra_Lineal_1_+Ej.txt.txt", "123_Fundamentos_de_Escultura.txt.txt",
    "126_Arte_Contemporaneo.txt.txt", "86_Bilingualism.txt.txt",
    "180_DiseÃ±o_de_Produccion.txt.txt", "97_Principios_de_Marketing.txt.txt",
    "109_Investigacion_de_Mercados.txt.txt", "21_Programacion_de_Apps.txt.txt",
    "137_Matematicas_Cotidianas.txt.txt", "14_Programacion_Avanzada_en_C++.txt.txt",
    "154_Fundamentos_de_Geometria.txt.txt", "13_Ingles_Nivel_4.txt.txt",
    "149_Teoria_de_Grupos.txt.txt", "142_Produccion_&_Exhibicion.txt.txt",
    "152_Calculo_para_Ciencias_2.txt.txt", "81_Practica_1.txt.txt",
    "16_Introduccion_a_la_Economia.txt.txt", "75_Planificacion_y_Evaluacion_1.txt.txt",
    "8_Programacion_en_C++_+Ej.txt.txt", "143_Programacion_Para_DiseÃ±o_1.txt.txt",
    "108_Economia_y_Negocios.txt.txt", "98_Estadistica_Empresarial_+Lab.txt.txt",
    "82_Metodologias_de_Ensenanza.txt.txt", "160_Ecuaciones_Diferenciales_Parciales.txt.txt",
    "121_Dibujo_para_Arte_y_Diseno.txt.txt", "131_Taller_de_Investigacion.txt.txt",
    "9_Calculo_Integral_+_Ej.txt.txt", "132_Enfasis_1.txt.txt", "107_Operaciones_+Lab.txt.txt",
    "145_Programacion_Para_DiseÃ±o_3.txt.txt", "113_Negocios_Internacionales.txt.txt",
    "112_Gestion_del_Talento.txt.txt", "77_Neurociencia_y_Educacion.txt.txt",
    "166_Topologia_1.txt.txt", "134_Enfasis_2.txt.txt", "148_Calculo_para_Ciencias_1.txt.txt",
    "46_Matematica_Empresarial_+Ej.txt.txt", "147_Modelado_3D_1.txt.txt",
    "114_Innovacion_y_Sustentabilidad.txt.txt", "39_Proyectos__Gerencia_y_Analisis.txt.txt",
    "27_Estructuras_de_Datos.txt.txt", "174_Proyecto_final_en_Danza.txt.txt",
    "173_Performance.txt.txt", "111_Proyectos_Empresariales.txt.txt", "157_Analisis_Numerico.txt.txt",
    "29_Teoria_de_la_Computacion.txt.txt", "168_Analisis_Real.txt.txt",
    "92_Proyecto_Integrador_EDU.txt.txt", "38_Sistemas_Operativos.txt.txt",
    "40_Practica_Pre-Profesional_PASEM.txt.txt", "136_Laboratorio_de_Creacion_3.txt.txt",
    "5_Cosmos.txt.txt", "128_Taller_de_Arte_1.txt.txt", "49_Panaderia.txt.txt",
    "144_Programacion_Para_DiseÃ±o_2.txt.txt", "103_Coloquios_adm.txt.txt",
    "120_Laboratorio_de_Creacion_1.txt.txt", "175_Lenguaje_del_Cine.txt.txt",
    "133_Taller_de_Arte_2.txt.txt", "146_Juegos_y_Narrativa.txt.txt",
    "167_Topologia_2.txt.txt", "139_Taller_de_Arte_3.txt.txt",
    "33_DiseÃ±o_de_Sistemas.txt.txt", "141_Temas_en_Comunicacion_y_Arte.txt.txt",
    "124_Teoria_Critica_1__Arte_&_Media.txt.txt", "158_Teoria_de_Numeros.txt.txt"
]
 # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = calculate_similarity_for_selected_documents(document_data, query_vector, selected_documents)

print(similarity_results)

                                document  similarity
0               35_Base_de_Datos.txt.txt    0.893749
1                173_Performance.txt.txt    0.473995
2              147_Modelado_3D_1.txt.txt    0.461875
3      163_Geometria_Diferencial.txt.txt    0.430295
4          105_Marketing_Digital.txt.txt    0.423445
..                                   ...         ...
114         139_Taller_de_Arte_3.txt.txt   -0.328842
115      162_Algebra_Abstracta_1.txt.txt   -0.329418
116  142_Produccion_&_Exhibicion.txt.txt   -0.353557
117  29_Teoria_de_la_Computacion.txt.txt   -0.453476
118             79_Coloquios_EDU.txt.txt   -0.461198

[119 rows x 2 columns]


In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
                                 '11', '12', '13', '14', '15', '16', '17', '18', '19',
                                '20', '21', '22', '23', '24', '25', '26', '27', '28', '29']].iloc[2].values

selected_documents = [
    "184_Ergonomia.txt.txt", "105_Marketing_Digital.txt.txt", "32_Emprendimiento.txt.txt",
    "185_Ingenieria_de_la_Calidad_+_Lab.txt.txt", "24_Aprendizaje_y_Servicio_PASEC.txt.txt",
    "3_Calculo_Diferencial_+_Ej.txt.txt", "11_Ser_y_Cosmos.txt.txt",
    "89_Ensenanza_Integrada_de_CITIAM.txt.txt", "155_Logica_y_Teoria_de_Conjuntos.txt.txt",
    "26_Electronica_Basica_+Lab.txt.txt", "7_Ingles_Nivel_2.txt.txt",
    "65_Alta_Cocina_Mundial.txt.txt", "110_Creatividad_Empresarial.txt.txt",
    "161_Algebra_Lineal_2.txt.txt", "183_Procesos,_Metodos_y_Estandares.txt.txt",
    "129_Coloquios_ART.txt.txt", "116_Analisis_Estrategico_ADM.txt.txt",
    "10_Autoconocimiento.txt.txt", "47_Conceptos_y_Tecnicas_1.txt.txt", "35_Base_de_Datos.txt.txt",
    "119_Composicion_Visual_1.txt.txt", "79_Coloquios_EDU.txt.txt",
    "130_Arte_y_Contexto_Social.txt.txt", "44_Aplicaciones_Distribuidas.txt.txt",
    "62_Pasteleria.txt.txt", "151_Ecuaciones_Diferenciales.txt.txt", "18_Calculo_Vectorial.txt.txt",
    "125_Nuevos_Medios.txt.txt", "100_Principios_de_Seguros.txt.txt",
    "72_Fundamentos_de_la_Educacion.txt.txt", "58_Introduccion_a_la_Biologia_+Ej.txt.txt",
    "102_Principios_de_Finanzas.txt.txt", "181_Inv._de_Operaciones_1_+Lab.txt.txt",
    "186_Sistemas_Lean.txt.txt", "4_Quimica_General_1_+Lab_Ej.txt.txt", "138_Enfasis_3.txt.txt",
    "135_Gestion_y_Produccion_Cultural.txt.txt", "162_Algebra_Abstracta_1.txt.txt",
    "99_Gerencia_de_Costos.txt.txt", "127_Laboratorio_de_Creacion_2.txt.txt",
    "176_Lenguaje_Visual_y_Montaje.txt.txt", "66_Reposteria_y_Chocolateria.txt.txt",
    "30_Programacion_Avanzada_de_Apps.txt.txt", "53_Conceptos_y_Tecnicas_2_+PRA.txt.txt",
    "50_Nutricion_Humana_+Lab.txt.txt", "182_Control_de_Produccion.txt.txt",
    "118_Fundamentos_de_las_Artes.txt.txt", "164_Analisis_Funcional.txt.txt",
    "153_Introduccion_a_Probabilidades.txt.txt", "117_Herramientas_Digitales_1.txt.txt",
    "150_Variable_Compleja.txt.txt", "83_Ensenanza_de_Matematicas.txt.txt",
    "165_Algebra_Abstracta_2.txt.txt", "140_Arte_y_Educacion___Curaduria.txt.txt",
    "1_Escritura_Academica.txt.txt", "15_Matematicas_Discretas.txt.txt",
    "115_Tributacion_y_Entorno_Legal.txt.txt", "163_Geometria_Diferencial.txt.txt",
    "22_Algebra_Lineal_1_+Ej.txt.txt", "123_Fundamentos_de_Escultura.txt.txt",
    "126_Arte_Contemporaneo.txt.txt", "86_Bilingualism.txt.txt",
    "180_DiseÃ±o_de_Produccion.txt.txt", "97_Principios_de_Marketing.txt.txt",
    "109_Investigacion_de_Mercados.txt.txt", "21_Programacion_de_Apps.txt.txt",
    "137_Matematicas_Cotidianas.txt.txt", "14_Programacion_Avanzada_en_C++.txt.txt",
    "154_Fundamentos_de_Geometria.txt.txt", "13_Ingles_Nivel_4.txt.txt",
    "149_Teoria_de_Grupos.txt.txt", "142_Produccion_&_Exhibicion.txt.txt",
    "152_Calculo_para_Ciencias_2.txt.txt", "81_Practica_1.txt.txt",
    "16_Introduccion_a_la_Economia.txt.txt", "75_Planificacion_y_Evaluacion_1.txt.txt",
    "8_Programacion_en_C++_+Ej.txt.txt", "143_Programacion_Para_DiseÃ±o_1.txt.txt",
    "108_Economia_y_Negocios.txt.txt", "98_Estadistica_Empresarial_+Lab.txt.txt",
    "82_Metodologias_de_Ensenanza.txt.txt", "160_Ecuaciones_Diferenciales_Parciales.txt.txt",
    "121_Dibujo_para_Arte_y_Diseno.txt.txt", "131_Taller_de_Investigacion.txt.txt",
    "9_Calculo_Integral_+_Ej.txt.txt", "132_Enfasis_1.txt.txt", "107_Operaciones_+Lab.txt.txt",
    "145_Programacion_Para_DiseÃ±o_3.txt.txt", "113_Negocios_Internacionales.txt.txt",
    "112_Gestion_del_Talento.txt.txt", "77_Neurociencia_y_Educacion.txt.txt",
    "166_Topologia_1.txt.txt", "134_Enfasis_2.txt.txt", "148_Calculo_para_Ciencias_1.txt.txt",
    "46_Matematica_Empresarial_+Ej.txt.txt", "147_Modelado_3D_1.txt.txt",
    "114_Innovacion_y_Sustentabilidad.txt.txt", "39_Proyectos__Gerencia_y_Analisis.txt.txt",
    "27_Estructuras_de_Datos.txt.txt", "174_Proyecto_final_en_Danza.txt.txt",
    "173_Performance.txt.txt", "111_Proyectos_Empresariales.txt.txt", "157_Analisis_Numerico.txt.txt",
    "29_Teoria_de_la_Computacion.txt.txt", "168_Analisis_Real.txt.txt",
    "92_Proyecto_Integrador_EDU.txt.txt", "38_Sistemas_Operativos.txt.txt",
    "40_Practica_Pre-Profesional_PASEM.txt.txt", "136_Laboratorio_de_Creacion_3.txt.txt",
    "5_Cosmos.txt.txt", "128_Taller_de_Arte_1.txt.txt", "49_Panaderia.txt.txt",
    "144_Programacion_Para_DiseÃ±o_2.txt.txt", "103_Coloquios_adm.txt.txt",
    "120_Laboratorio_de_Creacion_1.txt.txt", "175_Lenguaje_del_Cine.txt.txt",
    "133_Taller_de_Arte_2.txt.txt", "146_Juegos_y_Narrativa.txt.txt",
    "167_Topologia_2.txt.txt", "139_Taller_de_Arte_3.txt.txt",
    "33_DiseÃ±o_de_Sistemas.txt.txt", "141_Temas_en_Comunicacion_y_Arte.txt.txt",
    "124_Teoria_Critica_1__Arte_&_Media.txt.txt", "158_Teoria_de_Numeros.txt.txt"
]
 # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = calculate_similarity_for_selected_documents(document_data, query_vector, selected_documents)

print(similarity_results)

                                     document  similarity
0                    35_Base_de_Datos.txt.txt    0.904918
1                     173_Performance.txt.txt    0.471540
2                   147_Modelado_3D_1.txt.txt    0.445354
3               105_Marketing_Digital.txt.txt    0.424314
4           163_Geometria_Diferencial.txt.txt    0.420373
..                                        ...         ...
114  140_Arte_y_Educacion___Curaduria.txt.txt   -0.345099
115           162_Algebra_Abstracta_1.txt.txt   -0.346395
116       142_Produccion_&_Exhibicion.txt.txt   -0.363616
117                  79_Coloquios_EDU.txt.txt   -0.412112
118       29_Teoria_de_la_Computacion.txt.txt   -0.464562

[119 rows x 2 columns]


In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
                                 '11', '12', '13', '14', '15', '16', '17', '18', '19',
                                '20', '21', '22', '23', '24', '25', '26', '27', '28', '29']].iloc[3].values

selected_documents = [
    "184_Ergonomia.txt.txt", "105_Marketing_Digital.txt.txt", "32_Emprendimiento.txt.txt",
    "185_Ingenieria_de_la_Calidad_+_Lab.txt.txt", "24_Aprendizaje_y_Servicio_PASEC.txt.txt",
    "3_Calculo_Diferencial_+_Ej.txt.txt", "11_Ser_y_Cosmos.txt.txt",
    "89_Ensenanza_Integrada_de_CITIAM.txt.txt", "155_Logica_y_Teoria_de_Conjuntos.txt.txt",
    "26_Electronica_Basica_+Lab.txt.txt", "7_Ingles_Nivel_2.txt.txt",
    "65_Alta_Cocina_Mundial.txt.txt", "110_Creatividad_Empresarial.txt.txt",
    "161_Algebra_Lineal_2.txt.txt", "183_Procesos,_Metodos_y_Estandares.txt.txt",
    "129_Coloquios_ART.txt.txt", "116_Analisis_Estrategico_ADM.txt.txt",
    "10_Autoconocimiento.txt.txt", "47_Conceptos_y_Tecnicas_1.txt.txt", "35_Base_de_Datos.txt.txt",
    "119_Composicion_Visual_1.txt.txt", "79_Coloquios_EDU.txt.txt",
    "130_Arte_y_Contexto_Social.txt.txt", "44_Aplicaciones_Distribuidas.txt.txt",
    "62_Pasteleria.txt.txt", "151_Ecuaciones_Diferenciales.txt.txt", "18_Calculo_Vectorial.txt.txt",
    "125_Nuevos_Medios.txt.txt", "100_Principios_de_Seguros.txt.txt",
    "72_Fundamentos_de_la_Educacion.txt.txt", "58_Introduccion_a_la_Biologia_+Ej.txt.txt",
    "102_Principios_de_Finanzas.txt.txt", "181_Inv._de_Operaciones_1_+Lab.txt.txt",
    "186_Sistemas_Lean.txt.txt", "4_Quimica_General_1_+Lab_Ej.txt.txt", "138_Enfasis_3.txt.txt",
    "135_Gestion_y_Produccion_Cultural.txt.txt", "162_Algebra_Abstracta_1.txt.txt",
    "99_Gerencia_de_Costos.txt.txt", "127_Laboratorio_de_Creacion_2.txt.txt",
    "176_Lenguaje_Visual_y_Montaje.txt.txt", "66_Reposteria_y_Chocolateria.txt.txt",
    "30_Programacion_Avanzada_de_Apps.txt.txt", "53_Conceptos_y_Tecnicas_2_+PRA.txt.txt",
    "50_Nutricion_Humana_+Lab.txt.txt", "182_Control_de_Produccion.txt.txt",
    "118_Fundamentos_de_las_Artes.txt.txt", "164_Analisis_Funcional.txt.txt",
    "153_Introduccion_a_Probabilidades.txt.txt", "117_Herramientas_Digitales_1.txt.txt",
    "150_Variable_Compleja.txt.txt", "83_Ensenanza_de_Matematicas.txt.txt",
    "165_Algebra_Abstracta_2.txt.txt", "140_Arte_y_Educacion___Curaduria.txt.txt",
    "1_Escritura_Academica.txt.txt", "15_Matematicas_Discretas.txt.txt",
    "115_Tributacion_y_Entorno_Legal.txt.txt", "163_Geometria_Diferencial.txt.txt",
    "22_Algebra_Lineal_1_+Ej.txt.txt", "123_Fundamentos_de_Escultura.txt.txt",
    "126_Arte_Contemporaneo.txt.txt", "86_Bilingualism.txt.txt",
    "180_DiseÃ±o_de_Produccion.txt.txt", "97_Principios_de_Marketing.txt.txt",
    "109_Investigacion_de_Mercados.txt.txt", "21_Programacion_de_Apps.txt.txt",
    "137_Matematicas_Cotidianas.txt.txt", "14_Programacion_Avanzada_en_C++.txt.txt",
    "154_Fundamentos_de_Geometria.txt.txt", "13_Ingles_Nivel_4.txt.txt",
    "149_Teoria_de_Grupos.txt.txt", "142_Produccion_&_Exhibicion.txt.txt",
    "152_Calculo_para_Ciencias_2.txt.txt", "81_Practica_1.txt.txt",
    "16_Introduccion_a_la_Economia.txt.txt", "75_Planificacion_y_Evaluacion_1.txt.txt",
    "8_Programacion_en_C++_+Ej.txt.txt", "143_Programacion_Para_DiseÃ±o_1.txt.txt",
    "108_Economia_y_Negocios.txt.txt", "98_Estadistica_Empresarial_+Lab.txt.txt",
    "82_Metodologias_de_Ensenanza.txt.txt", "160_Ecuaciones_Diferenciales_Parciales.txt.txt",
    "121_Dibujo_para_Arte_y_Diseno.txt.txt", "131_Taller_de_Investigacion.txt.txt",
    "9_Calculo_Integral_+_Ej.txt.txt", "132_Enfasis_1.txt.txt", "107_Operaciones_+Lab.txt.txt",
    "145_Programacion_Para_DiseÃ±o_3.txt.txt", "113_Negocios_Internacionales.txt.txt",
    "112_Gestion_del_Talento.txt.txt", "77_Neurociencia_y_Educacion.txt.txt",
    "166_Topologia_1.txt.txt", "134_Enfasis_2.txt.txt", "148_Calculo_para_Ciencias_1.txt.txt",
    "46_Matematica_Empresarial_+Ej.txt.txt", "147_Modelado_3D_1.txt.txt",
    "114_Innovacion_y_Sustentabilidad.txt.txt", "39_Proyectos__Gerencia_y_Analisis.txt.txt",
    "27_Estructuras_de_Datos.txt.txt", "174_Proyecto_final_en_Danza.txt.txt",
    "173_Performance.txt.txt", "111_Proyectos_Empresariales.txt.txt", "157_Analisis_Numerico.txt.txt",
    "29_Teoria_de_la_Computacion.txt.txt", "168_Analisis_Real.txt.txt",
    "92_Proyecto_Integrador_EDU.txt.txt", "38_Sistemas_Operativos.txt.txt",
    "40_Practica_Pre-Profesional_PASEM.txt.txt", "136_Laboratorio_de_Creacion_3.txt.txt",
    "5_Cosmos.txt.txt", "128_Taller_de_Arte_1.txt.txt", "49_Panaderia.txt.txt",
    "144_Programacion_Para_DiseÃ±o_2.txt.txt", "103_Coloquios_adm.txt.txt",
    "120_Laboratorio_de_Creacion_1.txt.txt", "175_Lenguaje_del_Cine.txt.txt",
    "133_Taller_de_Arte_2.txt.txt", "146_Juegos_y_Narrativa.txt.txt",
    "167_Topologia_2.txt.txt", "139_Taller_de_Arte_3.txt.txt",
    "33_DiseÃ±o_de_Sistemas.txt.txt", "141_Temas_en_Comunicacion_y_Arte.txt.txt",
    "124_Teoria_Critica_1__Arte_&_Media.txt.txt", "158_Teoria_de_Numeros.txt.txt"
]
 # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = calculate_similarity_for_selected_documents(document_data, query_vector, selected_documents)

print(similarity_results)

                                document  similarity
0               35_Base_de_Datos.txt.txt    0.878489
1              147_Modelado_3D_1.txt.txt    0.452091
2                173_Performance.txt.txt    0.446584
3          105_Marketing_Digital.txt.txt    0.424455
4      163_Geometria_Diferencial.txt.txt    0.415937
..                                   ...         ...
114  110_Creatividad_Empresarial.txt.txt   -0.321059
115            129_Coloquios_ART.txt.txt   -0.323833
116  142_Produccion_&_Exhibicion.txt.txt   -0.338475
117  29_Teoria_de_la_Computacion.txt.txt   -0.449818
118             79_Coloquios_EDU.txt.txt   -0.480538

[119 rows x 2 columns]


In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
                                 '11', '12', '13', '14', '15', '16', '17', '18', '19',
                                '20', '21', '22', '23', '24', '25', '26', '27', '28', '29']].iloc[4].values

selected_documents = [
    "184_Ergonomia.txt.txt", "105_Marketing_Digital.txt.txt", "32_Emprendimiento.txt.txt",
    "185_Ingenieria_de_la_Calidad_+_Lab.txt.txt", "24_Aprendizaje_y_Servicio_PASEC.txt.txt",
    "3_Calculo_Diferencial_+_Ej.txt.txt", "11_Ser_y_Cosmos.txt.txt",
    "89_Ensenanza_Integrada_de_CITIAM.txt.txt", "155_Logica_y_Teoria_de_Conjuntos.txt.txt",
    "26_Electronica_Basica_+Lab.txt.txt", "7_Ingles_Nivel_2.txt.txt",
    "65_Alta_Cocina_Mundial.txt.txt", "110_Creatividad_Empresarial.txt.txt",
    "161_Algebra_Lineal_2.txt.txt", "183_Procesos,_Metodos_y_Estandares.txt.txt",
    "129_Coloquios_ART.txt.txt", "116_Analisis_Estrategico_ADM.txt.txt",
    "10_Autoconocimiento.txt.txt", "47_Conceptos_y_Tecnicas_1.txt.txt", "35_Base_de_Datos.txt.txt",
    "119_Composicion_Visual_1.txt.txt", "79_Coloquios_EDU.txt.txt",
    "130_Arte_y_Contexto_Social.txt.txt", "44_Aplicaciones_Distribuidas.txt.txt",
    "62_Pasteleria.txt.txt", "151_Ecuaciones_Diferenciales.txt.txt", "18_Calculo_Vectorial.txt.txt",
    "125_Nuevos_Medios.txt.txt", "100_Principios_de_Seguros.txt.txt",
    "72_Fundamentos_de_la_Educacion.txt.txt", "58_Introduccion_a_la_Biologia_+Ej.txt.txt",
    "102_Principios_de_Finanzas.txt.txt", "181_Inv._de_Operaciones_1_+Lab.txt.txt",
    "186_Sistemas_Lean.txt.txt", "4_Quimica_General_1_+Lab_Ej.txt.txt", "138_Enfasis_3.txt.txt",
    "135_Gestion_y_Produccion_Cultural.txt.txt", "162_Algebra_Abstracta_1.txt.txt",
    "99_Gerencia_de_Costos.txt.txt", "127_Laboratorio_de_Creacion_2.txt.txt",
    "176_Lenguaje_Visual_y_Montaje.txt.txt", "66_Reposteria_y_Chocolateria.txt.txt",
    "30_Programacion_Avanzada_de_Apps.txt.txt", "53_Conceptos_y_Tecnicas_2_+PRA.txt.txt",
    "50_Nutricion_Humana_+Lab.txt.txt", "182_Control_de_Produccion.txt.txt",
    "118_Fundamentos_de_las_Artes.txt.txt", "164_Analisis_Funcional.txt.txt",
    "153_Introduccion_a_Probabilidades.txt.txt", "117_Herramientas_Digitales_1.txt.txt",
    "150_Variable_Compleja.txt.txt", "83_Ensenanza_de_Matematicas.txt.txt",
    "165_Algebra_Abstracta_2.txt.txt", "140_Arte_y_Educacion___Curaduria.txt.txt",
    "1_Escritura_Academica.txt.txt", "15_Matematicas_Discretas.txt.txt",
    "115_Tributacion_y_Entorno_Legal.txt.txt", "163_Geometria_Diferencial.txt.txt",
    "22_Algebra_Lineal_1_+Ej.txt.txt", "123_Fundamentos_de_Escultura.txt.txt",
    "126_Arte_Contemporaneo.txt.txt", "86_Bilingualism.txt.txt",
    "180_DiseÃ±o_de_Produccion.txt.txt", "97_Principios_de_Marketing.txt.txt",
    "109_Investigacion_de_Mercados.txt.txt", "21_Programacion_de_Apps.txt.txt",
    "137_Matematicas_Cotidianas.txt.txt", "14_Programacion_Avanzada_en_C++.txt.txt",
    "154_Fundamentos_de_Geometria.txt.txt", "13_Ingles_Nivel_4.txt.txt",
    "149_Teoria_de_Grupos.txt.txt", "142_Produccion_&_Exhibicion.txt.txt",
    "152_Calculo_para_Ciencias_2.txt.txt", "81_Practica_1.txt.txt",
    "16_Introduccion_a_la_Economia.txt.txt", "75_Planificacion_y_Evaluacion_1.txt.txt",
    "8_Programacion_en_C++_+Ej.txt.txt", "143_Programacion_Para_DiseÃ±o_1.txt.txt",
    "108_Economia_y_Negocios.txt.txt", "98_Estadistica_Empresarial_+Lab.txt.txt",
    "82_Metodologias_de_Ensenanza.txt.txt", "160_Ecuaciones_Diferenciales_Parciales.txt.txt",
    "121_Dibujo_para_Arte_y_Diseno.txt.txt", "131_Taller_de_Investigacion.txt.txt",
    "9_Calculo_Integral_+_Ej.txt.txt", "132_Enfasis_1.txt.txt", "107_Operaciones_+Lab.txt.txt",
    "145_Programacion_Para_DiseÃ±o_3.txt.txt", "113_Negocios_Internacionales.txt.txt",
    "112_Gestion_del_Talento.txt.txt", "77_Neurociencia_y_Educacion.txt.txt",
    "166_Topologia_1.txt.txt", "134_Enfasis_2.txt.txt", "148_Calculo_para_Ciencias_1.txt.txt",
    "46_Matematica_Empresarial_+Ej.txt.txt", "147_Modelado_3D_1.txt.txt",
    "114_Innovacion_y_Sustentabilidad.txt.txt", "39_Proyectos__Gerencia_y_Analisis.txt.txt",
    "27_Estructuras_de_Datos.txt.txt", "174_Proyecto_final_en_Danza.txt.txt",
    "173_Performance.txt.txt", "111_Proyectos_Empresariales.txt.txt", "157_Analisis_Numerico.txt.txt",
    "29_Teoria_de_la_Computacion.txt.txt", "168_Analisis_Real.txt.txt",
    "92_Proyecto_Integrador_EDU.txt.txt", "38_Sistemas_Operativos.txt.txt",
    "40_Practica_Pre-Profesional_PASEM.txt.txt", "136_Laboratorio_de_Creacion_3.txt.txt",
    "5_Cosmos.txt.txt", "128_Taller_de_Arte_1.txt.txt", "49_Panaderia.txt.txt",
    "144_Programacion_Para_DiseÃ±o_2.txt.txt", "103_Coloquios_adm.txt.txt",
    "120_Laboratorio_de_Creacion_1.txt.txt", "175_Lenguaje_del_Cine.txt.txt",
    "133_Taller_de_Arte_2.txt.txt", "146_Juegos_y_Narrativa.txt.txt",
    "167_Topologia_2.txt.txt", "139_Taller_de_Arte_3.txt.txt",
    "33_DiseÃ±o_de_Sistemas.txt.txt", "141_Temas_en_Comunicacion_y_Arte.txt.txt",
    "124_Teoria_Critica_1__Arte_&_Media.txt.txt", "158_Teoria_de_Numeros.txt.txt"
]
 # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = calculate_similarity_for_selected_documents(document_data, query_vector, selected_documents)

print(similarity_results)

                                    document  similarity
0               149_Teoria_de_Grupos.txt.txt    0.614751
1              158_Teoria_de_Numeros.txt.txt    0.554971
2         3_Calculo_Diferencial_+_Ej.txt.txt    0.408417
3            162_Algebra_Abstracta_1.txt.txt    0.399886
4        77_Neurociencia_y_Educacion.txt.txt    0.398040
..                                       ...         ...
114        47_Conceptos_y_Tecnicas_1.txt.txt   -0.212004
115      174_Proyecto_final_en_Danza.txt.txt   -0.241216
116                 7_Ingles_Nivel_2.txt.txt   -0.247637
117  75_Planificacion_y_Evaluacion_1.txt.txt   -0.319870
118  115_Tributacion_y_Entorno_Legal.txt.txt   -0.399241

[119 rows x 2 columns]


## Manhattan ##

In [None]:
# Example usage
document_csv = "/content/Isomap_Results/isomap_ncomp30_nneigh60.csv"
query_csv = "/content/Isomap_Results/isomap_query_ncomp30_nneigh60.csv"

# Load data
document_data = pd.read_csv(document_csv)
query_data = pd.read_csv(query_csv)

In [None]:
# Example: Use the first query vector
query_vector = query_data[['0', '1']].iloc[0].values

# Select specific document names
selected_documents = [
                      "119_Composicion_Visual_1.txt.txt",
                      "178_Sonido.txt.txt",
                      "176_Lenguaje_Visual_y_Montaje.txt.txt",
                      "117_Herramientas_Digitales_1.txt.txt",
                      "123_Fundamentos_de_Escultura.txt.txt",
                      "180_DiseÃ±o_de_Produccion.txt.txt",
                      "122_Fotografia_1.txt.txt",
                      "175_Lenguaje_del_Cine.txt.txt",
                      "177_Cinematografia.txt.txt",
                      "179_Storytelling.txt.txt",
                      "146_Juegos_y_Narrativa.txt.txt"
                  ]   # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = get_similar_documents_manhattan(query_vector, document_data, selected_documents)

for i in range(len(similarity_results)):
    print(f"Document: {similarity_results[i][0]}, Distance: {similarity_results[i][1]}")

Document: 146_Juegos_y_Narrativa.txt.txt, Distance: 0.45240840000000127
Document: 117_Herramientas_Digitales_1.txt.txt, Distance: 1.2186301999999998
Document: 179_Storytelling.txt.txt, Distance: 1.4422106000000001
Document: 176_Lenguaje_Visual_y_Montaje.txt.txt, Distance: 1.489178100000001
Document: 175_Lenguaje_del_Cine.txt.txt, Distance: 1.4893066000000008
Document: 178_Sonido.txt.txt, Distance: 1.8220969
Document: 123_Fundamentos_de_Escultura.txt.txt, Distance: 2.162317099999999
Document: 177_Cinematografia.txt.txt, Distance: 2.385340600000001
Document: 122_Fotografia_1.txt.txt, Distance: 2.6583320000000006
Document: 119_Composicion_Visual_1.txt.txt, Distance: 2.9528823000000006


In [None]:
query_vector = query_data[['0', '1']].iloc[1].values

# Select specific document names
selected_documents = [
    "184_Ergonomia.txt.txt",
    "105_Marketing_Digital.txt.txt",
    "10_Autoconocimiento.txt.txt",
    "102_Principios_de_Finanzas.txt.txt",
    "135_Gestion_y_Produccion_Cultural.txt.txt",
    "60_Coloquios_Gastr.txt.txt",
    "91_Inclusion_y_Diversidad.txt.txt",
    "2_Taller_de_Ing._Cs._Computacion.txt.txt",
    "93_Practica_4.txt.txt",
    "12_Ingles_Nivel_3.txt.txt",
    "45_Proyecto_Integrador_CMP.txt.txt",
    "94_Zoologia_+Lab.txt.txt",
    "95_Fisiologia_+Lab.txt.txt",
    "174_Proyecto_final_en_Danza.txt.txt",
    "29_Teoria_de_la_Computacion.txt.txt",
    "74_Desarrollo__NiÃ±o_y_Adolescente.txt.txt",
    "92_Proyecto_Integrador_EDU.txt.txt",
    "40_Practica_Pre-Profesional_PASEM.txt.txt",
    "103_Coloquios_adm.txt.txt"
]# Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = get_similar_documents_manhattan(query_vector, document_data, selected_documents)

for i in range(len(similarity_results)):
    print(f"Document: {similarity_results[i][0]}, Distance: {similarity_results[i][1]}")

Document: 40_Practica_Pre-Profesional_PASEM.txt.txt, Distance: 0.4184138999999991
Document: 92_Proyecto_Integrador_EDU.txt.txt, Distance: 0.593443999999999
Document: 60_Coloquios_Gastr.txt.txt, Distance: 0.6148730000000002
Document: 91_Inclusion_y_Diversidad.txt.txt, Distance: 0.6762636999999998
Document: 174_Proyecto_final_en_Danza.txt.txt, Distance: 0.8033939999999999
Document: 45_Proyecto_Integrador_CMP.txt.txt, Distance: 0.9017907000000003
Document: 93_Practica_4.txt.txt, Distance: 0.9466757000000001
Document: 103_Coloquios_adm.txt.txt, Distance: 1.0314983999999994
Document: 29_Teoria_de_la_Computacion.txt.txt, Distance: 1.2279879999999999
Document: 102_Principios_de_Finanzas.txt.txt, Distance: 1.236044699999999
Document: 10_Autoconocimiento.txt.txt, Distance: 1.403194
Document: 105_Marketing_Digital.txt.txt, Distance: 1.8352795999999998
Document: 184_Ergonomia.txt.txt, Distance: 1.9760065999999998
Document: 95_Fisiologia_+Lab.txt.txt, Distance: 2.0317824999999985
Document: 2_Talle

In [None]:
query_vector = query_data[['0', '1']].iloc[2].values

# Select specific document names
selected_documents = [
    "65_Alta_Cocina_Mundial.txt.txt",
    "110_Creatividad_Empresarial.txt.txt",
    "67_Innovacion_Culinaria.txt.txt",
    "47_Conceptos_y_Tecnicas_1.txt.txt",
    "62_Pasteleria.txt.txt",
    "25_Cultura_Gastronomica.txt.txt",
    "64_Practica_Culinaria_2.txt.txt",
    "66_Reposteria_y_Chocolateria.txt.txt",
    "53_Conceptos_y_Tecnicas_2_+PRA.txt.txt",
    "54_Practica_Culinaria_1.txt.txt",
    "115_Tributacion_y_Entorno_Legal.txt.txt",
    "61_Alta_Cocina_Francesa.txt.txt",
    "57_Alta_Cocina_Ecuatoriana.txt.txt",
    "68_Enologia_y_Cocteleria.txt.txt",
    "113_Negocios_Internacionales.txt.txt",
    "114_Innovacion_y_Sustentabilidad.txt.txt",
    "104_Finanzas_Corporativas.txt.txt",
    "49_Panaderia.txt.txt",
    "71_Identidad_Culinaria.txt.txt"
]
  # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = get_similar_documents_manhattan(query_vector, document_data, selected_documents)

for i in range(len(similarity_results)):
    print(f"Document: {similarity_results[i][0]}, Distance: {similarity_results[i][1]}")

Document: 57_Alta_Cocina_Ecuatoriana.txt.txt, Distance: 0.7091524999999992
Document: 61_Alta_Cocina_Francesa.txt.txt, Distance: 1.1071017999999997
Document: 65_Alta_Cocina_Mundial.txt.txt, Distance: 1.3570294999999994
Document: 71_Identidad_Culinaria.txt.txt, Distance: 1.4785715000000001
Document: 47_Conceptos_y_Tecnicas_1.txt.txt, Distance: 1.5584915000000006
Document: 49_Panaderia.txt.txt, Distance: 1.6320305
Document: 53_Conceptos_y_Tecnicas_2_+PRA.txt.txt, Distance: 1.7987061999999998
Document: 67_Innovacion_Culinaria.txt.txt, Distance: 1.8922498999999986
Document: 114_Innovacion_y_Sustentabilidad.txt.txt, Distance: 2.1220634999999994
Document: 66_Reposteria_y_Chocolateria.txt.txt, Distance: 2.1227367
Document: 68_Enologia_y_Cocteleria.txt.txt, Distance: 2.261699499999999
Document: 64_Practica_Culinaria_2.txt.txt, Distance: 2.263859499999999
Document: 54_Practica_Culinaria_1.txt.txt, Distance: 2.2777499999999993
Document: 62_Pasteleria.txt.txt, Distance: 2.5125789000000003
Document

In [None]:
query_vector = query_data[['0', '1']].iloc[3].values

# Select specific document names
selected_documents = [
    "171_Barra_para_danza_contemporanea.txt.txt",
    "129_Coloquios_ART.txt.txt",
    "125_Nuevos_Medios.txt.txt",
    "138_Enfasis_3.txt.txt",
    "126_Arte_Contemporaneo.txt.txt",
    "169_Improvisacion.txt.txt",
    "132_Enfasis_1.txt.txt",
    "134_Enfasis_2.txt.txt",
    "173_Performance.txt.txt",
    "172_Composicion.txt.txt",
    "170_Danza_Moderna_2.txt.txt",
    "141_Temas_en_Comunicacion_y_Arte.txt.txt",
    "124_Teoria_Critica_1__Arte_&_Media.txt.txt"
] # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = get_similar_documents_manhattan(query_vector, document_data, selected_documents)

for i in range(len(similarity_results)):
    print(f"Document: {similarity_results[i][0]}, Distance: {similarity_results[i][1]}")

Document: 124_Teoria_Critica_1__Arte_&_Media.txt.txt, Distance: 0.956863499999999
Document: 125_Nuevos_Medios.txt.txt, Distance: 1.0140289999999998
Document: 171_Barra_para_danza_contemporanea.txt.txt, Distance: 1.4878191000000012
Document: 138_Enfasis_3.txt.txt, Distance: 1.7025657999999986
Document: 132_Enfasis_1.txt.txt, Distance: 1.7663184999999988
Document: 126_Arte_Contemporaneo.txt.txt, Distance: 1.8422349999999996
Document: 134_Enfasis_2.txt.txt, Distance: 2.023867199999998
Document: 141_Temas_en_Comunicacion_y_Arte.txt.txt, Distance: 2.058294499999999
Document: 173_Performance.txt.txt, Distance: 2.2474324999999995
Document: 169_Improvisacion.txt.txt, Distance: 2.2580575000000005
Document: 129_Coloquios_ART.txt.txt, Distance: 2.3403421999999994
Document: 170_Danza_Moderna_2.txt.txt, Distance: 2.341356
Document: 172_Composicion.txt.txt, Distance: 3.2253032


In [None]:
query_vector = query_data[['0', '1']].iloc[4].values

# Select specific document names
selected_documents = [
    "3_Calculo_Diferencial_+_Ej.txt.txt",
    "155_Logica_y_Teoria_de_Conjuntos.txt.txt",
    "161_Algebra_Lineal_2.txt.txt",
    "18_Calculo_Vectorial.txt.txt",
    "162_Algebra_Abstracta_1.txt.txt",
    "164_Analisis_Funcional.txt.txt",
    "153_Introduccion_a_Probabilidades.txt.txt",
    "150_Variable_Compleja.txt.txt",
    "165_Algebra_Abstracta_2.txt.txt",
    "15_Matematicas_Discretas.txt.txt",
    "163_Geometria_Diferencial.txt.txt",
    "22_Algebra_Lineal_1_+Ej.txt.txt",
    "137_Matematicas_Cotidianas.txt.txt",
    "154_Fundamentos_de_Geometria.txt.txt",
    "149_Teoria_de_Grupos.txt.txt",
    "160_Ecuaciones_Diferenciales_Parciales.txt.txt",
    "9_Calculo_Integral_+_Ej.txt.txt",
    "166_Topologia_1.txt.txt",
    "168_Analisis_Real.txt.txt",
    "167_Topologia_2.txt.txt",
    "158_Teoria_de_Numeros.txt.txt"
] # Replace with the actual document names

# Calculate similarity for selected documents
similarity_results = get_similar_documents_manhattan(query_vector, document_data, selected_documents)

for i in range(len(similarity_results)):
    print(f"Document: {similarity_results[i][0]}, Distance: {similarity_results[i][1]}")

Document: 168_Analisis_Real.txt.txt, Distance: 1.1237294
Document: 165_Algebra_Abstracta_2.txt.txt, Distance: 1.152350200000001
Document: 166_Topologia_1.txt.txt, Distance: 1.2636655000000008
Document: 164_Analisis_Funcional.txt.txt, Distance: 1.3497130999999998
Document: 149_Teoria_de_Grupos.txt.txt, Distance: 1.490054860000001
Document: 167_Topologia_2.txt.txt, Distance: 1.5399034000000011
Document: 154_Fundamentos_de_Geometria.txt.txt, Distance: 1.6287046
Document: 162_Algebra_Abstracta_1.txt.txt, Distance: 1.6528060000000004
Document: 161_Algebra_Lineal_2.txt.txt, Distance: 1.7140755999999988
Document: 160_Ecuaciones_Diferenciales_Parciales.txt.txt, Distance: 1.757041
Document: 22_Algebra_Lineal_1_+Ej.txt.txt, Distance: 1.8207981000000002
Document: 18_Calculo_Vectorial.txt.txt, Distance: 1.9915599999999998
Document: 163_Geometria_Diferencial.txt.txt, Distance: 2.009983
Document: 158_Teoria_de_Numeros.txt.txt, Distance: 2.2741416
Document: 9_Calculo_Integral_+_Ej.txt.txt, Distance: 