This is the script for task1 of AISBench. We used the CellTypist as an example. You can replace the results with your own AI scientist's output.

In [None]:
# to run this script with CellTypist, you need to first install it.
# !pip intall celltypist

import celltypist
from celltypist import models
import scanpy as sc
import os
import openai
import re
import numpy as np

# download task1 dataset

there are totally 31 organ:

['Skin', 'Spleen', 'Bladder', 'Blood', 'Breast', 'Intestine', 'Bone_marrow', 'Liver', 'Pancreas', 'Oesophagus', 'Stomach', 'Testis', 'Oral_cavity', 'Heart', 'Thymus', 'Salivary_gland', 'Eye', 'Adipose', 'Lymph_node', 'Adrenal_gland', 'Nose', 'Gallbladder', 'Muscle', 'Uterine_tube', 'Femur', 'Kidney', 'Ovary', 'Prostate', 'Uterus', 'Trachea', 'Spinal_cord']

The subset supported by scChat:

['Skin','Blood','Breast','Intestine','Bone_marrow','Liver','Pancreas','Testis','Heart','Salivary_gland','Eye','Adipose','Kidney','Ovary','Trachea']

In [None]:
from huggingface_hub import hf_hub_download

file_path = hf_hub_download(
    repo_id="EperLuo/BAISBench",        
    filename="task1_Skin.h5ad",      # take skin as example
    repo_type="dataset",
    local_dir="Task1_data", 
    local_dir_use_symlinks=False       
)

print(file_path)


# load model

In [None]:
#Show all available models that can be downloaded and used.
models.models_description()

#Download all the available models.
models.download_models(force_update = True)

# uHAF

In [None]:
import uhaf as uhaflib

uhaf = uhaflib.build_uhaf(latest=True)
print(len(uhaf.df_uhafs))

In [None]:
# set OpenAI API key
openai.api_key = 'sk-your-key'

def clean_markdown_code_block(raw: str) -> str:
    raw = re.sub(r'^```.*\n', '', raw)
    raw = re.sub(r'\n```$', '', raw)
    return raw.strip()

def map_cell_types(reference_cell_types, user_input):
    prompt = (
        "Given the reference cell types: {reference}, "
        "please map the user input cell types: {user_input} "
        "to the reference cell types (find the most related one and map to it). Return the result as a dictionary. "
        "For example, if user input is 'naive t' and reference includes 'Naive T cell', "
        "the output should be something like {{'naive t': 'Naive T cell'}}. "
        "if user input is 'Intestinal epithelial stem cell' and reference includes 'Epithelial cell',"
        "the output should be something like {{'Intestinal epithelial stem cell': 'Epithelial cell'}}. "
        "Note that the item in the dictionary should be strictly included in the reference cell types and the user input cell types,"
        "do not invent anything by yourself. If there is organ name (such as Intestinal) in the user input cell type, just ignore it."
        "If there is unknown cell type, labeled it as Unclassified."
        "Please output the result strictly as a valid Python dictionary without any additional explanation, formatting, or markdown code block."
    ).format(reference=", ".join(reference_cell_types), user_input=user_input)

    # OpenAI API
    response = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}]
    )

    output = response.choices[0].message.content
    
    # transform the returned str to dict
    try:
        clean_str = clean_markdown_code_block(output)
        result = eval(clean_str)
    except Exception as e:
        print("Error parsing the output:", e)
        print(output)
        result = {}

    return result

def get_second_level_children(tree, node):
    second_level = []
    # first level child
    first_level = tree[tree['father'] == node]['child'].tolist()
    # child the child of first level child
    for child in first_level:
        second_level.extend(tree[tree['father'] == child]['child'].tolist())
    return second_level

def get_all_descendants(tree, node):
    descendants = []
    queue = [node]

    while queue:
        current = queue.pop(0)
        children = tree[tree['father'] == current]['child'].tolist()
        descendants.extend(children)
        queue.extend(children)
    
    return descendants

# Use CellTypist to annotate the cells

In [None]:
scores = []
root = 'Task1_data/.cache/huggingface/download'
organ_list = os.listdir(root)

In [None]:
for organ in organ_list:
    data_path = os.path.join(root,organ)
    adata = sc.read(data_path)

    organ = organ.replace('task1_', '').replace('.h5ad','')

    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)

    #Alternatively, the input can be specified as an `AnnData` already loaded in memory.
    if organ == 'Adrenal_gland':
        predictions = celltypist.annotate(adata, model = 'Fetal_Human_AdrenalGlands.pkl', majority_voting = True)
    elif organ == 'Blood':
        predictions = celltypist.annotate(adata, model = 'Healthy_COVID19_PBMC.pkl', majority_voting = True)
    # elif organ == 'Breast':
    #     predictions = celltypist.annotate(adata, model = 'Cells_Adult_Breast.pkl', majority_voting = True)
    elif organ == 'Heart':
        predictions = celltypist.annotate(adata, model = 'Healthy_Adult_Heart.pkl', majority_voting = True)
    elif organ == 'Intestine':
        predictions = celltypist.annotate(adata, model = 'Cells_Intestinal_Tract.pkl', majority_voting = True)
    elif organ == 'Liver':
        predictions = celltypist.annotate(adata, model = 'Healthy_Human_Liver.pkl', majority_voting = True)
    elif organ == 'Pancreas':
        predictions = celltypist.annotate(adata, model = 'Fetal_Human_Pancreas.pkl', majority_voting = True)
    elif organ == 'Skin':
        predictions = celltypist.annotate(adata, model = 'Adult_Human_Skin.pkl', majority_voting = True)
    elif organ == 'Thymus':
        predictions = celltypist.annotate(adata, model = 'Developing_Human_Thymus.pkl', majority_voting = True)
    elif organ == 'Eye':
        predictions = celltypist.annotate(adata, model = 'Fetal_Human_Retina.pkl', majority_voting = True)
    elif organ == 'Testis':
        predictions = celltypist.annotate(adata, model = 'Developing_Human_Gonads.pkl', majority_voting = True)
    elif organ == 'Trachea':
        predictions = celltypist.annotate(adata, model = 'Human_Lung_Atlas.pkl', majority_voting = True)
    else:
        # continue
        predictions = celltypist.annotate(adata, model = 'Immune_All_Low.pkl', majority_voting = True)

    adata.obs['celltypist_type'] = predictions.predicted_labels.majority_voting.values

    # align the cell type name with the uHAF system
    uhaf_tree = uhaf.df_uhafs[organ.replace('_',' ')]
    reference_cell_types = uhaf_tree.index.values
    user_input = np.unique(adata.obs['celltypist_type'])

    cell_mapping = map_cell_types(reference_cell_types, user_input)
    adata.obs['celltypist_type'] = [cell_mapping[pred] if pred in cell_mapping.keys() else 'unclassified' for pred in adata.obs['celltypist_type'].values]

    # calculate annotation score
    score = 0
    unclass = 0
    for i in range(adata.shape[0]):
        if adata.obs['cell_type'][i] == 'Unclassified': # remove the unclassified cell in the original dataset
            unclass+=1
            continue
        if adata.obs['celltypist_type'][i]==adata.obs['cell_type'][i]:
            score += 1
        elif adata.obs['celltypist_type'][i] in get_all_descendants(uhaf_tree, adata.obs['cell_type'][i]):
            score += 1
        elif adata.obs['celltypist_type'][i]==uhaf_tree.loc[adata.obs['cell_type'][i]]['father']:
            score += 0.5
        elif uhaf_tree.loc[adata.obs['cell_type'][i]]['father']!='Cell' and adata.obs['celltypist_type'][i]==uhaf_tree.loc[uhaf_tree.loc[adata.obs['cell_type'][i]]['father']]['father']:
            score += 0.2  
    print(f'The accuracy score for {organ} is: ', score/(adata.shape[0]-unclass))
    scores.append(score/(adata.shape[0]-unclass))