In [1]:
import pandas as pd
import scipy.io
import numpy as np
import scipy.sparse
import os
import src.preprocessing.data_cleaning as data_cleaning
import src.preprocessing.normalization as normalization
import src.preprocessing.feature_selection as feature_selection
import src.preprocessing.dim_reduction as dim_reduction
import src.analysis.clustering as clustering
import src.analysis.cell_identification as cell_identification
import src.evaluation.evaluation as evaluation

def load_results(results_path, tissue):
    """
    Load and display the first rows of the saved results CSV.

    Parameters:
    -----------
    results_path : str
        The base directory where results are stored.
    tissue : str
        The tissue type for which results are to be loaded.

    Returns:
    --------
    pd.DataFrame
        The loaded results dataframe.
    """
    results_file_path = f'{results_path}/{tissue}_results/{tissue}_all_results.csv'
    
    if os.path.exists(results_file_path):
        df = pd.read_csv(results_file_path)
        return df
    else:
        print(f"File not found: {results_file_path}")
        return None

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
results_df = load_results("./results", "Tumor")
print("Results loaded")

Results loaded


In [7]:
print(results_df.iloc[-1])

pipeline_id                                        FHMC_NB_SHVG_UMAP_MMC_MBA
barcodes                   CID3586_AAACCTGAGAGTAATC,CID3586_AAACCTGAGGCTC...
clusters                   1,1,1,1,3,1,3,8,1,1,0,1,1,0,1,3,1,1,1,1,0,1,1,...
cell_types                 T-cells,T-cells,T-cells,T-cells,Myeloid,T-cell...
Silhouette_Score                                                    0.472135
Davies_Bouldin_Index                                                1.011322
Calinski_Harabasz_Score                                         35643.218968
ARI                                                                      1.0
NMI                                                                      1.0
V_measure                                                                1.0
Accuracy                                                            0.513576
Precision                                                           0.669535
Recall                                                              0.513576

In [4]:
print(results_df['clusters'].head())

0    6,5,8,1,5,0,3,7,6,2,2,2,8,6,2,2,2,4,1,6,3,0,8,...
1    6,5,8,1,5,0,3,7,6,2,2,2,8,6,2,2,2,4,1,6,3,0,8,...
2    6,5,8,1,5,0,3,7,6,2,2,2,8,6,2,2,2,4,1,6,3,0,8,...
3    0,1,5,2,3,4,5,7,5,8,5,6,5,10,7,7,8,10,1,4,4,4,...
4    0,1,5,2,3,4,5,7,5,8,5,6,5,10,7,7,8,10,1,4,4,4,...
Name: clusters, dtype: object


In [5]:
results_df["clusters"] = results_df["clusters"].str.split(",").apply(lambda x: list(map(int, x)))  # Convertir a enteros
print(results_df['clusters'].head())

0    [6, 5, 8, 1, 5, 0, 3, 7, 6, 2, 2, 2, 8, 6, 2, ...
1    [6, 5, 8, 1, 5, 0, 3, 7, 6, 2, 2, 2, 8, 6, 2, ...
2    [6, 5, 8, 1, 5, 0, 3, 7, 6, 2, 2, 2, 8, 6, 2, ...
3    [0, 1, 5, 2, 3, 4, 5, 7, 5, 8, 5, 6, 5, 10, 7,...
4    [0, 1, 5, 2, 3, 4, 5, 7, 5, 8, 5, 6, 5, 10, 7,...
Name: clusters, dtype: object
