In [1]:
import ontolopy as opy
import pandas as pd 
from myst_nb import glue
import time

# Read in files:
# -------------
fantom_obo_file = '../c08-combining/data/experiments/fantom/ff-phase2-170801.obo.txt'
fantom_samples_info_file = '../c08-combining/data/experiments/fantom/fantom_humanSamples2.0.csv'
uberon_obo_file = '../c08-combining/data/uberon_ext_210321.obo' 

# Uberon OBO:
uberon_obo = opy.load_obo(
    file_loc=uberon_obo_file, 
    ont_ids=['GO', 'UBERON','CL'], 
)

# FANTOM OBO:
fantom_obo = opy.load_obo(
    file_loc=fantom_obo_file, 
    ont_ids=['CL', 'FF', 'GO', 'UBERON', 'DOID'],
)

# FANTOM Samples Info file:
fantom_samples_info = pd.read_csv(fantom_samples_info_file, index_col=1)


# Glue Samples Info excerpt:
# --------------------------
indices = [1,9,11]  # choose rows for variety
display(fantom_samples_info.iloc[indices])
glue("fantom-samples-info-excerpt", fantom_samples_info.iloc[indices], display=False)

Unnamed: 0_level_0,Source Name,Charateristics [description],Characteristics [catalog_id],Characteristics [Category],Chracteristics [Species],Characteristics [Sex],Characteristics [Age],Characteristics [Developmental stage],Characteristics[Tissue],Characteristics [Cell lot],Characteristics [Cell type],Characteristics [Catalogue ID],Characteristics [Collaboration],Characteristics [Provider],Protocol REF,Extract Name,Material Type
Charateristics [ff_ontology],Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
FF:10002-101A5,10002-101A5,SABiosciences XpressRef Human Universal Total ...,B208251,tissues,Human (Homo sapiens),mixed,,UNDEFINED,unclassifiable,,CELL MIXTURE - tissue sample,,FANTOM5 OSC CORE (contact: Al Forrest),SABiosciences,OP-RNA-extraction-totalRNA-TRIzol-isopropanol-...,10002-101A5,Total RNA
FF:10016-101C7,10016-101C7,"heart, adult, pool1",0910061 -7,tissues,Human (Homo sapiens),mixed,,"70,73,74 years old adult",heart,,CELL MIXTURE - tissue sample,,FANTOM5 OSC CORE (contact: Al Forrest),Ambion,OP-RNA-extraction-totalRNA-ToTALLY-RNA-v1.0,10016-101C7,Total RNA
FF:10018-101C9,10018-101C9,"liver, adult, pool1",0910061 -9,tissues,Human (Homo sapiens),mixed,,"64,69,70 years old adult",liver,,CELL MIXTURE - tissue sample,,FANTOM5 OSC CORE (contact: Al Forrest),Ambion,OP-RNA-extraction-totalRNA-ToTALLY-RNA-v1.0,10018-101C9,Total RNA


In [2]:
def get_disease_related_samples(samples, ont):
    disease_relations_of_interest = ['is_a','is_model_for']
    disease_related = opy.Relations(
        allowed_relations=disease_relations_of_interest, 
        sources=list(samples),
        targets=['DOID'],
        ont=ont,
    )
    return disease_related

start = time.time()
disease_related = get_disease_related_samples(fantom_samples_info.index, ont=fantom_obo).dropna(subset=['to'])
time_taken = time.time()-start
print(f"Finds {disease_related.shape[0]} disease relations in {time_taken:.3f} seconds")

Finds 566 disease relations in 0.057 seconds


In [3]:
# Styling table for thesis
# ========================
disease_related['relation_text'] = disease_related['relation_text'].str.wrap(50)
disease_related['relation_path'] = disease_related['relation_path'].str.wrap(48)
to_display = disease_related.head(5).style.set_properties(**{
    'white-space': 'pre-wrap',
})
display(to_display)
glue("disease-relations-found", to_display, display=False)

Unnamed: 0_level_0,relation_path,relation_text,to
from,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
FF:10050-101G5,FF:10050-101G5.is_model_for~DOID:5844,"heart, adult, diseased post-infarction, donor1 is model for myocardial infarction",DOID:5844
FF:10051-101G6,FF:10051-101G6.is_model_for~DOID:114,"heart, adult, diseased, donor1 is model for heart disease",DOID:114
FF:10399-106A3,FF:10399-106A3.is_a~FF:0101883.is_a~FF:0100740.i s_model_for~DOID:8692,"acute myeloid leukemia (FAB M5) cell line:THP-1, rep3 (fresh) is a acute myeloid leukemia cell line sample is a myeloid leukemia cell line sample is model for myeloid leukemia",DOID:8692
FF:10400-106A4,FF:10400-106A4.is_a~FF:0101883.is_a~FF:0100740.i s_model_for~DOID:8692,"acute myeloid leukemia (FAB M5) cell line:THP-1, rep1 (revived) is a acute myeloid leukemia cell line sample is a myeloid leukemia cell line sample is model for myeloid leukemia",DOID:8692
FF:10405-106A9,FF:10405-106A9.is_a~FF:0101883.is_a~FF:0100740.i s_model_for~DOID:8692,"acute myeloid leukemia (FAB M5) cell line:THP-1, rep3 (thawed) is a acute myeloid leukemia cell line sample is a myeloid leukemia cell line sample is model for myeloid leukemia",DOID:8692


In [4]:
def get_differentiable_samples(samples, ont):
    in_vivo = 'FF:0000002'
    in_vivo_samples = opy.Relations(
        allowed_relations=['is_a'], 
        sources=list(samples), 
        targets=[in_vivo], 
        ont=ont,
    ).dropna(subset=['to'])
    
    differentiable = 'GO:0030154'
    differentiable_samples = opy.Relations(
        allowed_relations=['is_a', 'derives_from', 'capable_of'],
        sources=list(in_vivo_samples.index),
        targets=[differentiable],
        ont=ont,
    )
    return differentiable_samples

# merge ontology:
merged = uberon_obo.merge(fantom_obo)

# get differentiable cell samples:
start = time.time()
differentiable_samples = get_differentiable_samples(fantom_samples_info.index, ont=merged).dropna(subset=['to'])
time_taken = time.time()-start
print(f"Finds {differentiable_samples.shape[0]} relations to cell differentiation in {time_taken:.3f} seconds")

Finds 254 relations to cell differentiation in 0.108 seconds


In [5]:
# Styling table for thesis
# ========================
differentiable_samples['relation_text'] = differentiable_samples['relation_text'].str.wrap(50)
differentiable_samples['relation_path'] = differentiable_samples['relation_path'].str.wrap(48)
to_display = differentiable_samples.iloc[[0,4,5,7,8]].style.set_properties(**{
    'white-space': 'pre-wrap',
})
display(to_display)
glue("differentiable-relations-found", to_display, display=False)

Unnamed: 0_level_0,relation_path,relation_text,to
from,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
FF:11214-116A8,FF:11214-116A8.is_a~FF:0000094.derives_from~CL:0 002569.is_a~CL:0000134.is_a~CL:0000048.is_a~CL:0 000034.is_a~CL:0011115.capable_of~GO:0030154,"Mesenchymal stem cell - umbilical, donor0 is a human mesenchymal stem cell of umbilical cord- Sciencell sample derives from mesenchymal stem cell of umbilical cord is a mesenchymal stem cell is a multi fate stem cell is a stem cell is a precursor cell capable of cell differentiation",GO:0030154
FF:11224-116B9,FF:11224-116B9.is_a~FF:0000024.derives_from~CL:0 000576.is_a~CL:0011026.is_a~CL:0011115.capable_o f~GO:0030154,"CD14-positive Monocytes, donor1 is a human CD14-positive monocyte sample derives from monocyte is a progenitor cell is a precursor cell capable of cell differentiation",GO:0030154
FF:11227-116C3,FF:11227-116C3.is_a~FF:0000044.derives_from~CL:0 000576.is_a~CL:0011026.is_a~CL:0011115.capable_o f~GO:0030154,"Dendritic Cells - monocyte immature derived, donor1, rep1 is a human monocyte immature derived dendritic cell sample derives from monocyte is a progenitor cell is a precursor cell capable of cell differentiation",GO:0030154
FF:11229-116C5,FF:11229-116C5.derives_from~CL:0000576.is_a~CL:0 011026.is_a~CL:0011115.capable_of~GO:0030154,"CD14+ monocyte derived endothelial progenitor cells, donor1 derives from monocyte is a progenitor cell is a precursor cell capable of cell differentiation",GO:0030154
FF:11240-116D7,FF:11240-116D7.is_a~FF:0000165.derives_from~CL:0 000594.is_a~CL:0000680.is_a~CL:0000055.is_a~CL:0 011115.capable_of~GO:0030154,"Skeletal Muscle Satellite Cells, donor1 is a human skeletal muscle satellite cell sample derives from skeletal muscle satellite cell is a muscle precursor cell is a non-terminally differentiated cell is a precursor cell capable of cell differentiation",GO:0030154
