In [1]:
#We have one mutation of interest (rs4784227-> chr16:52565276) and we want to study its possible implications in the regulation of gene expression. 
#First we will find the enhancers that are located on these coords with the function getCRMs_by_coord

In [10]:
from PyBioGateway import getCRMs_by_coord, crm2phen, getPhenotype, crm2gene, gene2protein, prot2bp, gene2crm, crm2tfac

In [7]:
mutation_position = 52565276

# We define a range around the mutation position (e.g., +/- 1000 bases)
range_start = mutation_position - 12500
range_end = mutation_position + 12500

#Now we use the function to get CRMs in the defined range
crms = getCRMs_by_coord("chr-16", range_start, range_end)

# We will count the number of entries in the list
num_entries = len(crms) if isinstance(crms, list) else 0

print(f"Number of CRMs in the specified range: {num_entries}")


Number of CRMs in the specified range: 485


In [4]:
#We will select some crm for continue the study, and we will see if the crm is related with any disease using crm2phen function

In [4]:
for crm in crms:
    crm_name = crm['crm_name']
    phen_results = crm2phen(crm_name)
    if phen_results != ("No data available for the introduced crm or you may have introduced an instance that is not a crm. Check your data type with type_data function."):    
        print(f"CRM: {crm_name}")
        print(f"Phenotypes: {phen_results}\n")
#We get the crm that are related to a phenotype, in this case only crm/CRMHS00000005858 is related to a disease, which is Breast cancer.

CRM: crm/CRMHS00000005858
Phenotypes: [{'phen_id': 'OMIM/114480', 'database': 'http://biocc.hrbmu.edu.cn/DiseaseEnhancer/; http://health.tsinghua.edu.cn/jianglab/endisease/', 'articles': 'pubmed/23001124'}, {'phen_id': 'MESH/D001943', 'database': 'http://biocc.hrbmu.edu.cn/DiseaseEnhancer/; http://health.tsinghua.edu.cn/jianglab/endisease/', 'articles': 'pubmed/23001124'}, {'phen_id': 'DOID/DOID_1612', 'database': 'http://biocc.hrbmu.edu.cn/DiseaseEnhancer/; http://health.tsinghua.edu.cn/jianglab/endisease/', 'articles': 'pubmed/23001124'}]



In [5]:
print(getPhenotype("114480"))

[{'phen_label': 'BREAST CANCER'}]


In [6]:
# We are going now to search if our enhancer have any target gene using crm2gene function
genes=crm2gene("crm/CRMHS00000005858")
print(genes)

[{'gene_name': 'TOX3', 'database': 'http://biocc.hrbmu.edu.cn/DiseaseEnhancer/; http://health.tsinghua.edu.cn/jianglab/endisease/', 'articles': 'pubmed/23001124'}]


In [7]:
#The gene TOX3 is related to our crm, now we can find which proteins are encoded by this gene in Homo sapiens with gene2protein function
protein=gene2protein("TOX3","Homo sapiens")
print(protein)

[{'prot_name': 'H3BTZ9_HUMAN'}, {'prot_name': 'J3QQQ6_HUMAN'}, {'prot_name': 'TOX3_HUMAN'}]


In [8]:
#We want to know in which biological process are involved these proteins, we will use prot2bp function
for prot in protein:
    prot_name=prot['prot_name']
    bp_results=prot2bp(prot_name)
    print(f"Protein {prot_name}")
    print(f"Biological process: {bp_results}\n")

Protein H3BTZ9_HUMAN
Biological process: No data available for the introduced protein or you may have introduced an instance that is not a protein. Check your data type with type_data function.

Protein J3QQQ6_HUMAN
Biological process: No data available for the introduced protein or you may have introduced an instance that is not a protein. Check your data type with type_data function.

Protein TOX3_HUMAN
Biological process: [{'bp_id': 'GO:0006357', 'bp_label': 'regulation of transcription by RNA polymerase II', 'relation_label': 'O15405--GO:0006357', 'database': 'goa/', 'articles': 'pubmed/21873635'}, {'bp_id': 'GO:0042981', 'bp_label': 'regulation of apoptotic process', 'relation_label': 'O15405--GO:0042981', 'database': 'goa/', 'articles': 'pubmed/21172805'}, {'bp_id': 'GO:0043524', 'bp_label': 'negative regulation of neuron apoptotic process', 'relation_label': 'O15405--GO:0043524', 'database': 'goa/', 'articles': 'pubmed/21172805'}, {'bp_id': 'GO:0045944', 'bp_label': 'positive re

In [None]:
#We find that TOX3_HUMAN protein is related with regulation of apoptotic process, regulation of transcription by RNA polymerase II, 
#negative regulation of neuron apoptotic process and positive regulation of transcription by RNA polymerase II.

In [5]:
#As multiple enhancers can regulate the same gene, we are going to find with the function gene2crm which others enhancers can regulate our target gene:
len(gene2crm("TOX3"))
#There is a total of 393 enhancers related with our target gene.

393

In [15]:
#Now we will find if there are any transcription factor related with our crm.
for crm in crms:
    crm_name = crm['crm_name']
    if crm_name=="crm/CRMHS00000032653":
        print("yes")
    else:
        print("no")


no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
n