# Example: RDF workflow

This notebook provides insights on how the RDF is generated from the property table via the [rdf.py](../src/pyBiodatafuse/graph/rdf.py) module.

In [1]:
import os; os.chdir(os.path.abspath(os.path.join(os.getcwd(), '..', 'src')))
import pandas as pd
from pyBiodatafuse.graph import rdf
from pyBiodatafuse import constants

  from .autonotebook import tqdm as notebook_tqdm


### Load the sample property table

In [2]:
data = pd.read_pickle('../combined_df.pkl')
data.head(3)

Unnamed: 0,identifier,identifier.source,target,target.source,Bgee_expression_levels,MINERVA,WikiPathways,OpenTargets_reactome,OpenTargets_go,DISGENET_diseases,OpenTargets_diseases,OpenTargets_compounds,PubChem_assays,StringDB_ppi
0,AHR,HGNC,ENSG00000106546,Ensembl,"[{'anatomical_entity_id': 'UBERON_0000178', 'a...","[{'pathway_id': 953.0, 'pathway_label': 'Kynur...","[{'pathway_id': 'WP5044', 'pathway_label': 'Ky...","[{'pathway_label': 'Endogenous sterols', 'path...","[{'go_id': 'GO:0005667', 'go_name': 'transcrip...","[{'disease_name': 'Mammary Neoplasms', 'HPO': ...",[{'disease_name': 'acute respiratory distress ...,"[{'chembl_id': 'CHEMBL259571', 'drugbank_id': ...","[{'pubchem_assay_id': nan, 'assay_type': nan, ...","[{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS..."
1,CHRNG,HGNC,ENSG00000196811,Ensembl,"[{'anatomical_entity_id': 'UBERON_0000178', 'a...","[{'pathway_id': nan, 'pathway_label': nan, 'pa...","[{'pathway_id': nan, 'pathway_label': nan, 'pa...",[{'pathway_label': 'Highly sodium permeable po...,"[{'go_id': 'GO:0015464', 'go_name': 'acetylcho...",[{'disease_name': 'Multiple pterygium syndrome...,[{'disease_name': 'acute respiratory distress ...,"[{'chembl_id': 'CHEMBL1200641', 'drugbank_id':...","[{'pubchem_assay_id': nan, 'assay_type': nan, ...","[{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS..."
2,DMD,HGNC,ENSG00000198947,Ensembl,"[{'anatomical_entity_id': 'UBERON_0000178', 'a...","[{'pathway_id': nan, 'pathway_label': nan, 'pa...","[{'pathway_id': 'WP2858', 'pathway_label': 'Ec...",[{'pathway_label': 'Striated Muscle Contractio...,"[{'go_id': 'GO:0016010', 'go_name': 'dystrophi...","[{'disease_name': 'Muscular Dystrophy, Duchenn...",[{'disease_name': 'acute respiratory distress ...,"[{'chembl_id': 'CHEMBL2108278', 'drugbank_id':...","[{'pubchem_assay_id': nan, 'assay_type': nan, ...","[{'stringdb_link_to': nan, 'Ensembl': nan, 'sc..."


In [3]:
data.iloc[1]['DISGENET_diseases']

[{'disease_name': 'Multiple pterygium syndrome',
  'HPO': '',
  'NCI': 'NCI_C101039',
  'OMIM': 'OMIM_265000, OMIM_100730, OMIM_163950',
  'MONDO': 'MONDO_0009926, MONDO_0020746, MONDO_0017415',
  'ORDO': 'ORDO_2990, ORDO_294060',
  'EFO': '',
  'DO': 'DO_0080110, DO_0081322',
  'MESH': 'MESH_C537377',
  'UMLS': 'UMLS_C0265261',
  'disease_type': 'disease',
  'disease_umlscui': 'C0265261',
  'score': 1.0,
  'ei': 1.0,
  'el': None},
 {'disease_name': 'MULTIPLE PTERYGIUM SYNDROME, LETHAL TYPE',
  'HPO': '',
  'NCI': 'NCI_C101038',
  'OMIM': 'OMIM_253290, OMIM_100730, OMIM_100690, OMIM_100720',
  'MONDO': 'MONDO_0009668, MONDO_0017415',
  'ORDO': 'ORDO_294060, ORDO_33108',
  'EFO': '',
  'DO': 'DO_0080110',
  'MESH': 'MESH_C537377',
  'UMLS': 'UMLS_C1854678',
  'disease_type': 'disease',
  'disease_umlscui': 'C1854678',
  'score': 0.95,
  'ei': 1.0,
  'el': None},
 {'disease_name': 'Arthrogryposis multiplex congenita',
  'HPO': 'HPO_HP:0002804',
  'NCI': '',
  'OMIM': '',
  'MONDO': 'MON

### Generating RDF from table

In [4]:
g  = rdf.generate_rdf(data)

Binding sio to http://semanticscience.org/resource/
Binding hgnc to http://bio2rdf.org/hgnc:
Binding obo to http://purl.obolibrary.org/obo/
Binding umls to https://uts-ws.nlm.nih.gov/rest/semantic-network/2015AB/CUI/
Binding ensembl to https://identifiers.org/ensembl:
Binding dcat to http://www.w3.org/ns/dcat#
Binding biodatafuse to https://biodatafuse.org/
Binding foaf to http://xmlns.com/foaf/0.1/
Binding skos to http://www.w3.org/2004/02/skos/core#
Binding owl to http://www.w3.org/2002/07/owl#
Binding rdf to http://www.w3.org/1999/02/22-rdf-syntax-ns#
Binding rdfs to http://www.w3.org/2000/01/rdf-schema#
Binding xsd to http://www.w3.org/2001/XMLSchema#


### Print out result

In [5]:
print(g.serialize(format="turtle"))

@prefix life_cycle_base_node: <https://biodatafuse.org/rdf/life_cycle> .
@prefix obo: <http://purl.obolibrary.org/obo/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix sio: <http://semanticscience.org/resource/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<https://biodatafuse.org/rdf/experimental_process/000000/AHR> a sio:SIO_001077 ;
    sio:SIO_000229 <https://biodatafuse.org/rdf/gene_expression_value/000000/AHR_UBERON_0000178>,
        <https://biodatafuse.org/rdf/gene_expression_value/000000/AHR_UBERON_0000310>,
        <https://biodatafuse.org/rdf/gene_expression_value/000000/AHR_UBERON_0000948>,
        <https://biodatafuse.org/rdf/gene_expression_value/000000/AHR_UBERON_0000955>,
        <https://biodatafuse.org/rdf/gene_expression_value/000000/AHR_UBERON_0000990>,
        <https://biodatafuse.org/rdf/gene_expression_value/000000/AHR_UBERON_0001004>,
        <https://biodatafuse.org/rdf/gene_expre