In [None]:
#done 23 Aug
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
import json
import openai
from dotenv import load_dotenv
import matplotlib.pyplot as plt
import scanpy as sc
import pandas as pd
import pickle
from datetime import datetime, timedelta
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage, AIMessage, ChatMessage
import scvi
from matplotlib.pyplot import rc_context
sc.set_figure_params(dpi=100)
scvi._settings.seed = 0
from scvi.model import SCVI
import plotly.express as px
import plotly.graph_objects as go
import ast
import matplotlib
import warnings
import numpy as np
# Suppress all warnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)

openai.api_key = 'sk-xxx'
openai.base_url= "xxx/v1/"

# Founction tools of scChat

In [45]:
current_adata = None
base_annotated_adata = None
adata = None
resolution = 0.5
sample_mapping = None
SGP = None
global function_flag
function_flag = False
global display_flag
display_flag = False


In [47]:
def find_and_load_sample_mapping(directory):
    global sample_mapping

    for root, dirs, files in os.walk(directory):
        for file in files:
            if file == 'sample_mapping.json':
                file_path = os.path.join(root, file)
                
                with open(file_path, 'r') as f:
                    sample_mapping = json.load(f)
                
                print(f"'sample_mapping.json' found and loaded from {file_path}")
                return sample_mapping
    
    # If the file wasn't found
    return None

#tag True for RAG, False for marker genes list
def get_rag_and_markers(tag, specification):
    # specification = None
    # # file_path = "../media/specification.json"
    # file_path = "media/specification.json"
    # if os.path.exists(file_path):
    #     with open(file_path, 'r') as file:
    #         specification = json.load(file)
    #         # print("BASE:", specification)
    # else:
    #     print ("specification not found")
    #     return "-"

    base_file_path = os.path.join("schatbot/scChat_RAG", specification['marker'].lower())
    file_paths = []
    
    for tissue in specification['tissue']:
        file_path = os.path.join(base_file_path, tissue.lower(), specification['condition'] + '.json')
        file_paths.append(file_path)
    
    print("Constructed file paths:", file_paths)

    for file_path in file_paths:
        if os.path.exists(file_path):
            print(f"File found: {file_path}")
            with open(file_path, 'r') as file:
                data = json.load(file)
                # print(data)
        else:
            print(f"File not found: {file_path}")
            continue
    
    combined_data = {}

    # Iterate through the file paths
    for file_path in file_paths:
        if os.path.exists(file_path):
            print(f"File found: {file_path}")
            with open(file_path, 'r') as file:
                data = json.load(file)

                if tag:  # If tag is true, combine all data from the files
                    for cell_type, cell_data in data.items():
                        if cell_type not in combined_data:
                            combined_data[cell_type] = cell_data
                        else:
                            combined_data[cell_type]['markers'].extend(cell_data['markers'])

                else:  # If tag is false, retrieve only marker name + list of genes
                    for cell_type, cell_data in data.items():
                        if cell_type not in combined_data:
                            combined_data[cell_type] = {'genes': []}
                        combined_data[cell_type]['genes'].extend([marker['gene'] for marker in cell_data['markers']])
        
        else:
            print(f"File not found: {file_path}")

    fptr = open("testop.txt", "w")
    fptr.write(json.dumps(combined_data, indent=4))
    # print("Combined data:", json.dumps(combined_data, indent=4))
    return combined_data

def filter_existing_genes(adata, gene_list):
    existing_genes = [gene for gene in gene_list if gene in adata.raw.var_names]
    return existing_genes

def extract_top_genes_stats(adata, groupby='leiden', n_genes=25):
    result = adata.uns['rank_genes_groups']
    gene_names = result['names']
    pvals = result['pvals']
    pvals_adj = result['pvals_adj']
    logfoldchanges = result['logfoldchanges']
    
    top_genes_stats = {group: {} for group in gene_names.dtype.names}
    
    for group in gene_names.dtype.names:
        top_genes_stats[group]['gene'] = gene_names[group][:n_genes]
        top_genes_stats[group]['pval'] = pvals[group][:n_genes]
        top_genes_stats[group]['pval_adj'] = pvals_adj[group][:n_genes]
        top_genes_stats[group]['logfoldchange'] = logfoldchanges[group][:n_genes]
    
    top_genes_stats_df = pd.concat({group: pd.DataFrame(top_genes_stats[group])
                                    for group in top_genes_stats}, axis=0)

    top_genes_stats_df = top_genes_stats_df.reset_index()
    
    top_genes_stats_df = top_genes_stats_df.rename(columns={'level_0': 'cluster', 'level_1': 'index'})
    return top_genes_stats_df

# Define the statistical extraction and API interaction functions
def calculate_cluster_statistics(adata, category, n_genes=25, specification=None):
    #adding
    global sample_mapping
    base_markers = get_rag_and_markers(False,specification)
    markers = []
    for cell_type, cell_data in base_markers.items():
        # print (cell_type)
        # print ('--')
        # print (cell_data)
        markers += cell_data['genes']
    
    # print ("MARKERS BEFORE FILTER2 ", markers)
    markers = filter_existing_genes(adata, markers)
    # print ("MARKERS FINAL2 ", markers)
    markers = list(set(markers))

    sc.tl.rank_genes_groups(adata, 'leiden', method='wilcoxon', n_genes=n_genes)
    top_genes_df = extract_top_genes_stats(adata, groupby='leiden', n_genes=25)
    second_dataset = True

    if sample_mapping:
        sc.tl.dendrogram(adata, groupby='leiden', use_rep='X_scVI')
        # sc.tl.dendrogram(adata, groupby='leiden')
    else:
        sc.tl.dendrogram(adata, groupby='leiden')

    marker_expression = sc.get.obs_df(adata, keys=['leiden'] + markers, use_raw=True)
    marker_expression.set_index('leiden', inplace=True)
    # Calculating mean and proportion of expression per cluster
    mean_expression = marker_expression.groupby('leiden').mean()
    expression_proportion = marker_expression.gt(0).groupby('leiden').mean()
    global global_top_genes_df, global_mean_expression, global_expression_proportion
    global_top_genes_df = top_genes_df
    global_mean_expression = mean_expression
    global_expression_proportion = expression_proportion
    return global_top_genes_df, global_mean_expression, global_expression_proportion

def retreive_stats(specification):
    with open("basic_data/mean_expression.json", 'r') as file:
        mean_expression = json.load(file)
    with open("basic_data/expression_proportion.json", 'r') as file:
        expression_proportion = json.load(file)
    global adata
    global_top_genes_df, global_mean_expression, global_expression_proportion = calculate_cluster_statistics(adata, 'overall', specification=specification)
    # myeloid_markers = get_myeloid_markers()
    # t_cell_markers = get_t_markers()
    # overall_markers = get_overall_markers()    
    markers = get_rag_and_markers(False, specification)
    markers = ', '.join(markers)
    explanation = "Please analyze the clustering statistics and classify each cluster based on the following data: Top Genes:Mean Expression: Expression Proportion: , based on statistical data: 1. top_genes_df: 25 top genes expression within each clusters, with it's p_val, p_val_adj, and logfoldchange; 2. mean_expression of the marker genes: specific marker genes mean expression within each cluster; 3. expression_proportion of the marker genes: every cluster each gene expression fraction within each cluster, and give back the mapping dictionary in the format like this group_to_cell_type = {'0': 'Myeloid cells','1': 'T cells','2': 'Myeloid cells','3': 'Myeloid cells','4': 'T cells'} without further explanation or comment.  I only want the summary map in the response, do not give me any explanation or comment or repeat my input, i dont want any redundant information other than the summary map"
    top_genes_summary = []
    mean_expression_str = ", ".join([f"{k}: {v}" for k, v in mean_expression.items()])
    expression_proportion_str = ", ".join([f"{k}: {v}" for k, v in expression_proportion.items()])
    # myeloid_markers_str = ", ".join(myeloid_markers)
    # t_cell_markers_str = ", ".join(t_cell_markers)
    # overall_markers_str = ", ".join(overall_markers)
    summary = (
        f"Explanation: {explanation}. "
        f"Mean expression data: {mean_expression_str}. "
        f"Expression proportion data: {expression_proportion_str}. "
        f"Top genes details: {global_top_genes_df}. "
        f"markers: {markers}. "
    )
    return summary


In [None]:
globalGroupToCellType = None
def generate_umap(path_row):
    global sample_mapping
    second_dataset = True
    matplotlib.use('Agg')
    global adata
    global resolution
    path = path_row #find_file_with_extension("media", ".h5ad")
    if not path:
        return ".h5ad file isn't given, unable to generate UMAP."
    adata = sc.read_h5ad(path)
    organ = path.split('/')[-1].split('.')[0]

    global current_adata
    current_adata = adata

    # Data preprocessing
    sc.pp.filter_cells(adata, min_genes=100)
    sc.pp.filter_genes(adata, min_cells=3)
    adata.var['mt'] = adata.var_names.str.startswith('MT-')
    sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
    adata = adata[adata.obs.pct_counts_mt < 20]
    adata.layers['counts'] = adata.X.copy()  # used by scVI-tools

    # Normalization
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    adata.raw = adata

    # find_and_load_sample_mapping("media")
    # Variable genes
    if sample_mapping:
        sc.pp.highly_variable_genes(adata, n_top_genes=3000, subset=True, layer='counts', flavor="seurat_v3", batch_key="Sample")
    else:
        sc.pp.highly_variable_genes(adata, n_top_genes=3000, subset=True, layer='counts', flavor="seurat_v3")

    # Setup and load scVI model
    if sample_mapping:
        SCVI.setup_anndata(adata, layer="counts", categorical_covariate_keys=["Sample"], continuous_covariate_keys=['pct_counts_mt', 'total_counts'])
        model = SCVI.load(dir_path="schatbot/glioma_scvi_model", adata=adata)
        latent = model.get_latent_representation()
        adata.obsm['X_scVI'] = latent
        adata.layers['scvi_normalized'] = model.get_normalized_expression(library_size=1e4)
    
    # Clustering and UMAP
    if sample_mapping:
        sc.pp.neighbors(adata, use_rep='X_scVI')
    else:
        sc.pp.neighbors(adata)
    sc.tl.umap(adata)
    sc.tl.leiden(adata, resolution=resolution)


    if sample_mapping:
        adata.obs['patient_name'] = adata.obs['Sample'].map(sample_mapping)

    # UMAP plot
    umap_df = adata.obsm['X_umap']
    adata.obs['UMAP_1'] = umap_df[:, 0]
    adata.obs['UMAP_2'] = umap_df[:, 1]

    #take this from MARK
    # markers = get_overall_markers()
    specification = {'marker':'human','tissue':[organ.lower()],'condition':'normal'}
    base_markers = get_rag_and_markers(False, specification)
    markers = []
    for cell_type, cell_data in base_markers.items():
        print (cell_type)
        print ('--')
        print (cell_data)
        markers += cell_data['genes']
    
    # print ("MARKERS BEFORE FILTER ", markers)
    markers = filter_existing_genes(adata, markers)
    # print ("MARKERS FINAL ", markers)
    markers = list(set(markers))
    # Calculate statistics to feed into GPT
    statistic_data = sc.get.obs_df(adata, keys=['leiden'] + markers, use_raw=True)
    statistic_data.set_index('leiden', inplace=True)
    mean_expression = statistic_data.groupby('leiden').mean()
    pd_mean_expression = pd.DataFrame(mean_expression)
    pd_mean_expression.to_csv("basic_data/mean_expression.csv")
    pd_mean_expression.to_json("basic_data/mean_expression.json")

    expression_proportion = statistic_data.gt(0).groupby('leiden').mean()
    pd_expression_proportion = pd.DataFrame(expression_proportion)
    pd_expression_proportion.to_csv("basic_data/expression_proportion.csv")
    pd_expression_proportion.to_json("basic_data/expression_proportion.json")
    # Dot plot
    if sample_mapping == None:
        sc.tl.dendrogram(adata, groupby='leiden')
        # with rc_context({'figure.figsize': (10, 10)}):
        #     sc.pl.dotplot(adata, markers, groupby='leiden', swap_axes=True, use_raw=True, standard_scale='var', dendrogram=True, color_map="Blues", save="dotplot.png")
        #     plt.close()
    else:
        sc.tl.dendrogram(adata, groupby='leiden', use_rep='X_scVI')
        # with rc_context({'figure.figsize': (10, 10)}):
        #     sc.pl.dotplot(adata, markers, groupby='leiden', swap_axes=True, use_raw=True, standard_scale='var', dendrogram=True, color_map="Blues", save="dotplot.png")
        #     plt.close()
    
    adata.obs['cell_type'] = 'Unknown'
    # adata.obs[['UMAP_1', 'UMAP_2', 'leiden', 'patient_name']].to_csv("basic_data/Overall cells_umap_data.csv", index=False)
    # adata.obs[['UMAP_1', 'UMAP_2', 'leiden', 'patient_name']].to_csv("process_cell_data/Overall cells_umap_data.csv", index=False)

    # Save dot plot data for Plotly
    dot_plot_data = statistic_data.reset_index().melt(id_vars='leiden', var_name='gene', value_name='expression')
    dot_plot_data.to_csv("basic_data/dot_plot_data.csv", index=False)

    # Save dendrogram data for Plotly
    dendrogram_data = adata.uns['dendrogram_leiden']
    pd_dendrogram_linkage = pd.DataFrame(dendrogram_data['linkage'], columns=['source', 'target', 'distance', 'count'])
    pd_dendrogram_linkage.to_csv("basic_data/dendrogram_data.csv", index=False)
    # rag_data = load_RAG("Overall cells")
    rag_data = get_rag_and_markers(True, specification)
    rag_data_str = ', '.join(rag_data)
                #   f"RAG Data : {', '.join(rag_data_str)}. " \

    summary = f"UMAP analysis completed. Data summary: {adata}, " \
                f"RAG Data : {str(rag_data)}. " \
              f"Cell counts details are provided. " \
              f"Additional data file generated: preface.txt."
    retrieve_stats_summary = retreive_stats(specification)
    final_summary = f"{summary} {retrieve_stats_summary}"
    current_adata = adata

    return final_summary

In [52]:
def label_clusters(cell_type, last_message):
    global adata
    global annotated_adata
    global conversation_history2
    global current_adata
    global base_annotated_adata
    adata2 = adata.copy()
    standardized_cell_type3 = cell_type.split()[0].capitalize()     
    standardized_cell_type2 = cell_type.split()[0].capitalize() + " cell"        
    standardized_cell_type = cell_type.split()[0].capitalize() + " cells"        
    # last_message = conversation_history2[-2]['content']
    try:
        start_idx = last_message.find("{")
        end_idx = last_message.rfind("}") + 1
        str_map = last_message[start_idx:end_idx]        
        map2 = ast.literal_eval(str_map)
        map2 = {str(key): value for key, value in map2.items()}        
        if standardized_cell_type == "Overall cells" or standardized_cell_type2 == "Overall cell":
            adata2.obs['cell_type_scChat'] = 'Unknown'
            for group, cell_type in map2.items():
                adata2.obs.loc[adata2.obs['leiden'] == group, 'cell_type_scChat'] = cell_type
            # adata2.obs[['UMAP_1', 'UMAP_2', 'leiden', 'patient_name', 'cell_type_scChat']].to_csv(f'scChat/umaps/{standardized_cell_type}_annotated_umap_data.csv', index=False)
            annotated_adata = adata2.copy()
            fname = f'annotated_adata/Overall cells_annotated_adata.pkl'
            with open(fname, "wb") as file:
                pickle.dump(annotated_adata, file)
            base_annotated_adata = adata2
        else:
            adata3 = base_annotated_adata.copy()
            specific_cells = adata3[adata3.obs['cell_type_scChat'].isin([standardized_cell_type])].copy()
            if specific_cells.shape[0] == 0:
                specific_cells = adata3[adata3.obs['cell_type_scChat'].isin([standardized_cell_type2])].copy()
            if specific_cells.shape[0] == 0:
                specific_cells = adata3[adata3.obs['cell_type_scChat'].isin([standardized_cell_type3])].copy()
            sc.tl.pca(specific_cells, svd_solver='arpack')
            sc.pp.neighbors(specific_cells)
            sc.tl.umap(specific_cells)
            sc.tl.leiden(specific_cells, resolution=resolution)

            specific_cells.obs['cell_type_scChat'] = 'Unknown'
            for group, cell_type in map2.items():
                specific_cells.obs.loc[specific_cells.obs['leiden'] == group, 'cell_type_scChat'] = cell_type
            
            # specific_cells.obs[['UMAP_1', 'UMAP_2', 'leiden', 'patient_name', 'cell_type_scChat']].to_csv(f'scChat/umaps/{standardized_cell_type}_annotated_umap_data.csv', index=False)
            
            annotated_adata = specific_cells.copy()
            fname = f'annotated_adata/{standardized_cell_type}_annotated_adata.pkl'
            with open(fname, "wb") as file:
                pickle.dump(annotated_adata, file)

    
    except (SyntaxError, ValueError) as e:
        print(f"Error in parsing the map: {e}")
        # Handle the error or return an appropriate response

    return "Repeat 'Annotation of clusters is complete'"


# Annotate

In [59]:
# root = '/stor/lep/workspace/AISbenchmark/AISbench'
root = '/stor/lep/workspace/AISbenchmark/Task1_data'
# organ_list = os.listdir(root)
organ_list = [file for file in os.listdir(root) if file.endswith('raw.h5ad')]

In [60]:
overlap_organ = []
for organ in organ_list:

    # if organ.lower() not in os.listdir('scChat/schatbot/scChat_RAG/human'):
    #     continue
    print(f'-------------------{organ}---------------------')
    overlap_organ.append(organ.split('.')[0])
    # study = os.listdir(os.path.join(root,organ))[0]
    # data_path = os.path.join(root,organ,study,'row_data.h5ad')
    data_path = os.path.join(root,organ)
    summary = generate_umap(data_path)

    conversation_history = [
        {'role': 'user', 'content': ' You are a chatbot for helping in Single Cell RNA Analysis, you can call the functions generate_umap, process_cells, label_clusters, display_umap, display_processed_umap and more multiple times. DO NOT FORGET THIS. respond with a greeting.'},
        {'role': 'user', 'content': 'generate umap'},
        {'role': 'user', 'content': summary},
        {'role': 'user', 'content': 'annotate the cell type of each cluster based on the UMAP analysis results'}
    ]
    # break

    new_response = openai.chat.completions.create(
                        model="gpt-4.1",
                        messages=conversation_history,
                        temperature=0.2,
                        top_p=0.4
                        # max_tokens=300
                    )
    last_message = new_response.choices[0].message.content
    label_clusters('overall cells', last_message)
    file_name = organ.replace('_raw','')
    annotated_adata.write_h5ad(f'/stor/lep/workspace/AISbenchmark/scChat/annotated_adata/{file_name}')

-------------------Bone_marrow_raw.h5ad---------------------
         Falling back to preprocessing with `sc.pp.pca` and default params.




Constructed file paths: ['schatbot/scChat_RAG/human/bone_marrow_raw/normal.json']
File not found: schatbot/scChat_RAG/human/bone_marrow_raw/normal.json
File not found: schatbot/scChat_RAG/human/bone_marrow_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/bone_marrow_raw/normal.json']
File not found: schatbot/scChat_RAG/human/bone_marrow_raw/normal.json
File not found: schatbot/scChat_RAG/human/bone_marrow_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/bone_marrow_raw/normal.json']
File not found: schatbot/scChat_RAG/human/bone_marrow_raw/normal.json
File not found: schatbot/scChat_RAG/human/bone_marrow_raw/normal.json


  mean_expression = statistic_data.groupby('leiden').mean()
  expression_proportion = statistic_data.gt(0).groupby('leiden').mean()
  mean_df = rep_df.groupby(level=0).mean()
  return reduction(axis=axis, out=out, **passkwargs)
  self.stats[group_name, 'names'] = self.var_names[global_indices]
  self.stats[group_name, 'scores'] = scores[global_indices]
  self.stats[group_name, 'pvals'] = pvals[global_indices]
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  mean_df = rep_df.groupby(level=0).mean()
  mean_expression = marker_expression.groupby('leiden').mean()
  expression_proportion = marker_expression.gt(0).groupby('leiden').mean()


Constructed file paths: ['schatbot/scChat_RAG/human/bone_marrow_raw/normal.json']
File not found: schatbot/scChat_RAG/human/bone_marrow_raw/normal.json
File not found: schatbot/scChat_RAG/human/bone_marrow_raw/normal.json
-------------------Kidney_raw.h5ad---------------------
         Falling back to preprocessing with `sc.pp.pca` and default params.




Constructed file paths: ['schatbot/scChat_RAG/human/kidney_raw/normal.json']
File not found: schatbot/scChat_RAG/human/kidney_raw/normal.json
File not found: schatbot/scChat_RAG/human/kidney_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/kidney_raw/normal.json']
File not found: schatbot/scChat_RAG/human/kidney_raw/normal.json
File not found: schatbot/scChat_RAG/human/kidney_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/kidney_raw/normal.json']
File not found: schatbot/scChat_RAG/human/kidney_raw/normal.json
File not found: schatbot/scChat_RAG/human/kidney_raw/normal.json


  mean_expression = statistic_data.groupby('leiden').mean()
  expression_proportion = statistic_data.gt(0).groupby('leiden').mean()
  mean_df = rep_df.groupby(level=0).mean()
  return reduction(axis=axis, out=out, **passkwargs)
  self.stats[group_name, 'names'] = self.var_names[global_indices]
  self.stats[group_name, 'scores'] = scores[global_indices]
  self.stats[group_name, 'pvals'] = pvals[global_indices]
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'names'] = self.var_names[global_indices]
  self.stats[group_name, 'scores'] = scores[global_indices]
  self.stats[group_name, 'pvals'] = pvals[global_indices]
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'names'] = self.var_names[global_indices]
  self.stats[group_name, 'scores'] = scores[global_indices]
  self.stats[group_name, 'pvals'] =

Constructed file paths: ['schatbot/scChat_RAG/human/kidney_raw/normal.json']
File not found: schatbot/scChat_RAG/human/kidney_raw/normal.json
File not found: schatbot/scChat_RAG/human/kidney_raw/normal.json
-------------------Eye_raw.h5ad---------------------
         Falling back to preprocessing with `sc.pp.pca` and default params.




Constructed file paths: ['schatbot/scChat_RAG/human/eye_raw/normal.json']
File not found: schatbot/scChat_RAG/human/eye_raw/normal.json
File not found: schatbot/scChat_RAG/human/eye_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/eye_raw/normal.json']
File not found: schatbot/scChat_RAG/human/eye_raw/normal.json
File not found: schatbot/scChat_RAG/human/eye_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/eye_raw/normal.json']
File not found: schatbot/scChat_RAG/human/eye_raw/normal.json
File not found: schatbot/scChat_RAG/human/eye_raw/normal.json


  mean_expression = statistic_data.groupby('leiden').mean()
  expression_proportion = statistic_data.gt(0).groupby('leiden').mean()
  mean_df = rep_df.groupby(level=0).mean()
  return reduction(axis=axis, out=out, **passkwargs)
  mean_df = rep_df.groupby(level=0).mean()
  mean_expression = marker_expression.groupby('leiden').mean()
  expression_proportion = marker_expression.gt(0).groupby('leiden').mean()


Constructed file paths: ['schatbot/scChat_RAG/human/eye_raw/normal.json']
File not found: schatbot/scChat_RAG/human/eye_raw/normal.json
File not found: schatbot/scChat_RAG/human/eye_raw/normal.json
-------------------Blood_raw.h5ad---------------------
         Falling back to preprocessing with `sc.pp.pca` and default params.




Constructed file paths: ['schatbot/scChat_RAG/human/blood_raw/normal.json']
File not found: schatbot/scChat_RAG/human/blood_raw/normal.json
File not found: schatbot/scChat_RAG/human/blood_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/blood_raw/normal.json']
File not found: schatbot/scChat_RAG/human/blood_raw/normal.json
File not found: schatbot/scChat_RAG/human/blood_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/blood_raw/normal.json']
File not found: schatbot/scChat_RAG/human/blood_raw/normal.json
File not found: schatbot/scChat_RAG/human/blood_raw/normal.json


  mean_expression = statistic_data.groupby('leiden').mean()
  expression_proportion = statistic_data.gt(0).groupby('leiden').mean()
  mean_df = rep_df.groupby(level=0).mean()
  return reduction(axis=axis, out=out, **passkwargs)
  self.stats[group_name, 'names'] = self.var_names[global_indices]
  self.stats[group_name, 'scores'] = scores[global_indices]
  self.stats[group_name, 'pvals'] = pvals[global_indices]
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  mean_df = rep_df.groupby(level=0).mean()
  mean_expression = marker_expression.groupby('leiden').mean()
  expression_proportion = marker_expression.gt(0).groupby('leiden').mean()


Constructed file paths: ['schatbot/scChat_RAG/human/blood_raw/normal.json']
File not found: schatbot/scChat_RAG/human/blood_raw/normal.json
File not found: schatbot/scChat_RAG/human/blood_raw/normal.json
-------------------Heart_raw.h5ad---------------------
         Falling back to preprocessing with `sc.pp.pca` and default params.




Constructed file paths: ['schatbot/scChat_RAG/human/heart_raw/normal.json']
File not found: schatbot/scChat_RAG/human/heart_raw/normal.json
File not found: schatbot/scChat_RAG/human/heart_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/heart_raw/normal.json']
File not found: schatbot/scChat_RAG/human/heart_raw/normal.json
File not found: schatbot/scChat_RAG/human/heart_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/heart_raw/normal.json']
File not found: schatbot/scChat_RAG/human/heart_raw/normal.json
File not found: schatbot/scChat_RAG/human/heart_raw/normal.json


  mean_expression = statistic_data.groupby('leiden').mean()
  expression_proportion = statistic_data.gt(0).groupby('leiden').mean()
  mean_df = rep_df.groupby(level=0).mean()
  return reduction(axis=axis, out=out, **passkwargs)
  self.stats[group_name, 'names'] = self.var_names[global_indices]
  self.stats[group_name, 'scores'] = scores[global_indices]
  self.stats[group_name, 'pvals'] = pvals[global_indices]
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  mean_df = rep_df.groupby(level=0).mean()
  mean_expression = marker_expression.groupby('leiden').mean()
  expression_proportion = marker_expression.gt(0).groupby('leiden').mean()


Constructed file paths: ['schatbot/scChat_RAG/human/heart_raw/normal.json']
File not found: schatbot/scChat_RAG/human/heart_raw/normal.json
File not found: schatbot/scChat_RAG/human/heart_raw/normal.json
-------------------Trachea_raw.h5ad---------------------
         Falling back to preprocessing with `sc.pp.pca` and default params.




Constructed file paths: ['schatbot/scChat_RAG/human/trachea_raw/normal.json']
File not found: schatbot/scChat_RAG/human/trachea_raw/normal.json
File not found: schatbot/scChat_RAG/human/trachea_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/trachea_raw/normal.json']
File not found: schatbot/scChat_RAG/human/trachea_raw/normal.json
File not found: schatbot/scChat_RAG/human/trachea_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/trachea_raw/normal.json']
File not found: schatbot/scChat_RAG/human/trachea_raw/normal.json
File not found: schatbot/scChat_RAG/human/trachea_raw/normal.json


  mean_expression = statistic_data.groupby('leiden').mean()
  expression_proportion = statistic_data.gt(0).groupby('leiden').mean()
  mean_df = rep_df.groupby(level=0).mean()
  return reduction(axis=axis, out=out, **passkwargs)
  self.stats[group_name, 'names'] = self.var_names[global_indices]
  self.stats[group_name, 'scores'] = scores[global_indices]
  self.stats[group_name, 'pvals'] = pvals[global_indices]
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'names'] = self.var_names[global_indices]
  self.stats[group_name, 'scores'] = scores[global_indices]
  self.stats[group_name, 'pvals'] = pvals[global_indices]
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'names'] = self.var_names[global_indices]
  self.stats[group_name, 'scores'] = scores[global_indices]
  self.stats[group_name, 'pvals'] =

Constructed file paths: ['schatbot/scChat_RAG/human/trachea_raw/normal.json']
File not found: schatbot/scChat_RAG/human/trachea_raw/normal.json
File not found: schatbot/scChat_RAG/human/trachea_raw/normal.json
-------------------Testis_raw.h5ad---------------------
         Falling back to preprocessing with `sc.pp.pca` and default params.




Constructed file paths: ['schatbot/scChat_RAG/human/testis_raw/normal.json']
File not found: schatbot/scChat_RAG/human/testis_raw/normal.json
File not found: schatbot/scChat_RAG/human/testis_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/testis_raw/normal.json']
File not found: schatbot/scChat_RAG/human/testis_raw/normal.json
File not found: schatbot/scChat_RAG/human/testis_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/testis_raw/normal.json']
File not found: schatbot/scChat_RAG/human/testis_raw/normal.json
File not found: schatbot/scChat_RAG/human/testis_raw/normal.json


  mean_expression = statistic_data.groupby('leiden').mean()
  expression_proportion = statistic_data.gt(0).groupby('leiden').mean()
  mean_df = rep_df.groupby(level=0).mean()
  return reduction(axis=axis, out=out, **passkwargs)
  mean_df = rep_df.groupby(level=0).mean()
  mean_expression = marker_expression.groupby('leiden').mean()
  expression_proportion = marker_expression.gt(0).groupby('leiden').mean()


Constructed file paths: ['schatbot/scChat_RAG/human/testis_raw/normal.json']
File not found: schatbot/scChat_RAG/human/testis_raw/normal.json
File not found: schatbot/scChat_RAG/human/testis_raw/normal.json
-------------------Pancreas_raw.h5ad---------------------
         Falling back to preprocessing with `sc.pp.pca` and default params.




Constructed file paths: ['schatbot/scChat_RAG/human/pancreas_raw/normal.json']
File not found: schatbot/scChat_RAG/human/pancreas_raw/normal.json
File not found: schatbot/scChat_RAG/human/pancreas_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/pancreas_raw/normal.json']
File not found: schatbot/scChat_RAG/human/pancreas_raw/normal.json
File not found: schatbot/scChat_RAG/human/pancreas_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/pancreas_raw/normal.json']
File not found: schatbot/scChat_RAG/human/pancreas_raw/normal.json
File not found: schatbot/scChat_RAG/human/pancreas_raw/normal.json


  mean_expression = statistic_data.groupby('leiden').mean()
  expression_proportion = statistic_data.gt(0).groupby('leiden').mean()
  mean_df = rep_df.groupby(level=0).mean()
  return reduction(axis=axis, out=out, **passkwargs)
  mean_df = rep_df.groupby(level=0).mean()
  mean_expression = marker_expression.groupby('leiden').mean()
  expression_proportion = marker_expression.gt(0).groupby('leiden').mean()


Constructed file paths: ['schatbot/scChat_RAG/human/pancreas_raw/normal.json']
File not found: schatbot/scChat_RAG/human/pancreas_raw/normal.json
File not found: schatbot/scChat_RAG/human/pancreas_raw/normal.json
-------------------Adipose_raw.h5ad---------------------
         Falling back to preprocessing with `sc.pp.pca` and default params.




Constructed file paths: ['schatbot/scChat_RAG/human/adipose_raw/normal.json']
File not found: schatbot/scChat_RAG/human/adipose_raw/normal.json
File not found: schatbot/scChat_RAG/human/adipose_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/adipose_raw/normal.json']
File not found: schatbot/scChat_RAG/human/adipose_raw/normal.json
File not found: schatbot/scChat_RAG/human/adipose_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/adipose_raw/normal.json']
File not found: schatbot/scChat_RAG/human/adipose_raw/normal.json
File not found: schatbot/scChat_RAG/human/adipose_raw/normal.json


  mean_expression = statistic_data.groupby('leiden').mean()
  expression_proportion = statistic_data.gt(0).groupby('leiden').mean()
  mean_df = rep_df.groupby(level=0).mean()
  return reduction(axis=axis, out=out, **passkwargs)
  mean_df = rep_df.groupby(level=0).mean()
  mean_expression = marker_expression.groupby('leiden').mean()
  expression_proportion = marker_expression.gt(0).groupby('leiden').mean()


Constructed file paths: ['schatbot/scChat_RAG/human/adipose_raw/normal.json']
File not found: schatbot/scChat_RAG/human/adipose_raw/normal.json
File not found: schatbot/scChat_RAG/human/adipose_raw/normal.json
-------------------Liver_raw.h5ad---------------------
         Falling back to preprocessing with `sc.pp.pca` and default params.




Constructed file paths: ['schatbot/scChat_RAG/human/liver_raw/normal.json']
File not found: schatbot/scChat_RAG/human/liver_raw/normal.json
File not found: schatbot/scChat_RAG/human/liver_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/liver_raw/normal.json']
File not found: schatbot/scChat_RAG/human/liver_raw/normal.json
File not found: schatbot/scChat_RAG/human/liver_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/liver_raw/normal.json']
File not found: schatbot/scChat_RAG/human/liver_raw/normal.json
File not found: schatbot/scChat_RAG/human/liver_raw/normal.json


  mean_expression = statistic_data.groupby('leiden').mean()
  expression_proportion = statistic_data.gt(0).groupby('leiden').mean()
  mean_df = rep_df.groupby(level=0).mean()
  return reduction(axis=axis, out=out, **passkwargs)
  mean_df = rep_df.groupby(level=0).mean()
  mean_expression = marker_expression.groupby('leiden').mean()
  expression_proportion = marker_expression.gt(0).groupby('leiden').mean()


Constructed file paths: ['schatbot/scChat_RAG/human/liver_raw/normal.json']
File not found: schatbot/scChat_RAG/human/liver_raw/normal.json
File not found: schatbot/scChat_RAG/human/liver_raw/normal.json
-------------------Breast_raw.h5ad---------------------
         Falling back to preprocessing with `sc.pp.pca` and default params.




Constructed file paths: ['schatbot/scChat_RAG/human/breast_raw/normal.json']
File not found: schatbot/scChat_RAG/human/breast_raw/normal.json
File not found: schatbot/scChat_RAG/human/breast_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/breast_raw/normal.json']
File not found: schatbot/scChat_RAG/human/breast_raw/normal.json
File not found: schatbot/scChat_RAG/human/breast_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/breast_raw/normal.json']
File not found: schatbot/scChat_RAG/human/breast_raw/normal.json
File not found: schatbot/scChat_RAG/human/breast_raw/normal.json


  mean_expression = statistic_data.groupby('leiden').mean()
  expression_proportion = statistic_data.gt(0).groupby('leiden').mean()
  mean_df = rep_df.groupby(level=0).mean()
  return reduction(axis=axis, out=out, **passkwargs)
  mean_df = rep_df.groupby(level=0).mean()
  mean_expression = marker_expression.groupby('leiden').mean()
  expression_proportion = marker_expression.gt(0).groupby('leiden').mean()


Constructed file paths: ['schatbot/scChat_RAG/human/breast_raw/normal.json']
File not found: schatbot/scChat_RAG/human/breast_raw/normal.json
File not found: schatbot/scChat_RAG/human/breast_raw/normal.json
-------------------Salivary_gland_raw.h5ad---------------------
         Falling back to preprocessing with `sc.pp.pca` and default params.




Constructed file paths: ['schatbot/scChat_RAG/human/salivary_gland_raw/normal.json']
File not found: schatbot/scChat_RAG/human/salivary_gland_raw/normal.json
File not found: schatbot/scChat_RAG/human/salivary_gland_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/salivary_gland_raw/normal.json']
File not found: schatbot/scChat_RAG/human/salivary_gland_raw/normal.json
File not found: schatbot/scChat_RAG/human/salivary_gland_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/salivary_gland_raw/normal.json']
File not found: schatbot/scChat_RAG/human/salivary_gland_raw/normal.json
File not found: schatbot/scChat_RAG/human/salivary_gland_raw/normal.json


  mean_expression = statistic_data.groupby('leiden').mean()
  expression_proportion = statistic_data.gt(0).groupby('leiden').mean()
  mean_df = rep_df.groupby(level=0).mean()
  return reduction(axis=axis, out=out, **passkwargs)
  mean_df = rep_df.groupby(level=0).mean()
  mean_expression = marker_expression.groupby('leiden').mean()
  expression_proportion = marker_expression.gt(0).groupby('leiden').mean()


Constructed file paths: ['schatbot/scChat_RAG/human/salivary_gland_raw/normal.json']
File not found: schatbot/scChat_RAG/human/salivary_gland_raw/normal.json
File not found: schatbot/scChat_RAG/human/salivary_gland_raw/normal.json
-------------------Skin_raw.h5ad---------------------
         Falling back to preprocessing with `sc.pp.pca` and default params.




Constructed file paths: ['schatbot/scChat_RAG/human/skin_raw/normal.json']
File not found: schatbot/scChat_RAG/human/skin_raw/normal.json
File not found: schatbot/scChat_RAG/human/skin_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/skin_raw/normal.json']
File not found: schatbot/scChat_RAG/human/skin_raw/normal.json
File not found: schatbot/scChat_RAG/human/skin_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/skin_raw/normal.json']
File not found: schatbot/scChat_RAG/human/skin_raw/normal.json
File not found: schatbot/scChat_RAG/human/skin_raw/normal.json


  mean_expression = statistic_data.groupby('leiden').mean()
  expression_proportion = statistic_data.gt(0).groupby('leiden').mean()
  mean_df = rep_df.groupby(level=0).mean()
  return reduction(axis=axis, out=out, **passkwargs)
  mean_df = rep_df.groupby(level=0).mean()
  mean_expression = marker_expression.groupby('leiden').mean()
  expression_proportion = marker_expression.gt(0).groupby('leiden').mean()


Constructed file paths: ['schatbot/scChat_RAG/human/skin_raw/normal.json']
File not found: schatbot/scChat_RAG/human/skin_raw/normal.json
File not found: schatbot/scChat_RAG/human/skin_raw/normal.json
-------------------Intestine_raw.h5ad---------------------
         Falling back to preprocessing with `sc.pp.pca` and default params.




Constructed file paths: ['schatbot/scChat_RAG/human/intestine_raw/normal.json']
File not found: schatbot/scChat_RAG/human/intestine_raw/normal.json
File not found: schatbot/scChat_RAG/human/intestine_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/intestine_raw/normal.json']
File not found: schatbot/scChat_RAG/human/intestine_raw/normal.json
File not found: schatbot/scChat_RAG/human/intestine_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/intestine_raw/normal.json']
File not found: schatbot/scChat_RAG/human/intestine_raw/normal.json
File not found: schatbot/scChat_RAG/human/intestine_raw/normal.json


  mean_expression = statistic_data.groupby('leiden').mean()
  expression_proportion = statistic_data.gt(0).groupby('leiden').mean()
  mean_df = rep_df.groupby(level=0).mean()
  return reduction(axis=axis, out=out, **passkwargs)
  mean_df = rep_df.groupby(level=0).mean()
  mean_expression = marker_expression.groupby('leiden').mean()
  expression_proportion = marker_expression.gt(0).groupby('leiden').mean()


Constructed file paths: ['schatbot/scChat_RAG/human/intestine_raw/normal.json']
File not found: schatbot/scChat_RAG/human/intestine_raw/normal.json
File not found: schatbot/scChat_RAG/human/intestine_raw/normal.json
-------------------Ovary_raw.h5ad---------------------
         Falling back to preprocessing with `sc.pp.pca` and default params.




Constructed file paths: ['schatbot/scChat_RAG/human/ovary_raw/normal.json']
File not found: schatbot/scChat_RAG/human/ovary_raw/normal.json
File not found: schatbot/scChat_RAG/human/ovary_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/ovary_raw/normal.json']
File not found: schatbot/scChat_RAG/human/ovary_raw/normal.json
File not found: schatbot/scChat_RAG/human/ovary_raw/normal.json
Constructed file paths: ['schatbot/scChat_RAG/human/ovary_raw/normal.json']
File not found: schatbot/scChat_RAG/human/ovary_raw/normal.json
File not found: schatbot/scChat_RAG/human/ovary_raw/normal.json


  mean_expression = statistic_data.groupby('leiden').mean()
  expression_proportion = statistic_data.gt(0).groupby('leiden').mean()
  mean_df = rep_df.groupby(level=0).mean()
  return reduction(axis=axis, out=out, **passkwargs)
  mean_df = rep_df.groupby(level=0).mean()
  mean_expression = marker_expression.groupby('leiden').mean()
  expression_proportion = marker_expression.gt(0).groupby('leiden').mean()


Constructed file paths: ['schatbot/scChat_RAG/human/ovary_raw/normal.json']
File not found: schatbot/scChat_RAG/human/ovary_raw/normal.json
File not found: schatbot/scChat_RAG/human/ovary_raw/normal.json
