# Generate Exhaustive Cell ID List

In [1]:
# Import Packages
import obonet
import pandas as pd

In [2]:
# Load Cell Ontology from OBO file
url = 'http://purl.obolibrary.org/obo/cl.obo'
print('Loading Cell Ontology...')
graph = obonet.read_obo(url)
print('Ontology loaded.')

# Collect all names + synonyms
records = []
for node_id, data in graph.nodes(data=True):
    if data.get('name'):
        records.append({'ontology_id': node_id, 'label': data['name']})
    synonyms = data.get('synonym', [])
    for synonym in synonyms:
        # Extract text inside quotes
        parts = synonym.split('"')
        if len(parts) >= 2:
            synonym_text = parts[1]
            records.append({'ontology_id': node_id, 'label': synonym_text})


Loading Cell Ontology...
Ontology loaded.


In [4]:
df = pd.DataFrame(records).drop_duplicates()
df

Unnamed: 0,ontology_id,label
0,CL:0000000,cell
1,CL:0000001,primary cultured cell
2,CL:0000001,primary cell culture cell
3,CL:0000001,primary cell line cell
4,CL:0000001,unpassaged cultured cell
...,...,...
61623,http://identifiers.org/ensembl/ENSG00000206073,SERPINB4
61624,http://identifiers.org/ensembl/ENSG00000215182,MUC5AC
61625,http://identifiers.org/ensembl/ENSG00000235665,LINC00298
61626,http://identifiers.org/ensembl/ENSG00000242265,PEG10


In [20]:
# Save to CSV
df.iloc[:6156,].to_csv("/Users/alexantill/Göran_Karlsson_Lab/benchLLM/cell_to_cell_ontology.csv", index=False)