In [2]:
import dgipy
import pandas as pd

# Basic Functions for Grabbing Clinically Relevant Data from DGIdb using DGIpy
DGIpy is a python-wrapper that sends pre-written queries to the DGIdb server to retrieve interaction and annotation data for drugs or genes of interest. A search can begin by calling the get_interactions() function within DGIpy on a list of genes of interest. These genes can be clinically significant variation-containing genes from a patient or genes of interest for experimental or drug discovery pipelines. The results from each search come prepared to wrap in the user's favorite data analysis package (e.g. Polars, Pandas).
  
Once a preliminary search has been done, drugs from interaction results can be fed into additional search methods within DGIpy to retrieve active market ANDA/NDA data from openFDA for approved therapeutics as well as active clinical trial data from ClinicalTrials.gov.

## Search for Interactions using Commonly Mutated Genes in Cancer

In [3]:
# Search for interaction data for genes of interest
genes = ['BRAF','PDGFRA','PDGFRB']
df = pd.DataFrame(dgipy.get_interactions(genes))

In [4]:
# Inspect the data
df[0:5]

Unnamed: 0,gene_name,gene_concept_id,gene_long_name,drug_name,drug_concept_id,drug_approved,interaction_score,interaction_attributes,interaction_sources,interaction_pmids
0,PDGFRB,hgnc:8804,platelet derived growth factor receptor beta,CEP-2563,chembl:CHEMBL3545332,False,0.038609,"{'Cancer Type': None, 'Pathway': None, 'Varian...",[ChEMBL],[]
1,PDGFRB,hgnc:8804,platelet derived growth factor receptor beta,ROMIPLOSTIM,rxcui:805452,True,0.082043,"{'Cancer Type': None, 'Pathway': None, 'Varian...",[TTD],[]
2,PDGFRB,hgnc:8804,platelet derived growth factor receptor beta,LENVATINIB,rxcui:1603296,True,0.036464,"{'Cancer Type': None, 'Pathway': None, 'Varian...",[TdgClinicalTrial],[]
3,PDGFRB,hgnc:8804,platelet derived growth factor receptor beta,ANLOTINIB,ncit:C138997,False,0.093764,"{'Cancer Type': None, 'Pathway': None, 'Varian...",[ChEMBL],[]
4,PDGFRB,hgnc:8804,platelet derived growth factor receptor beta,MOTESANIB,ncit:C71896,False,0.238671,"{'Cancer Type': None, 'Pathway': None, 'Varian...","[ChEMBL, MyCancerGenome, TALC, TdgClinicalTrial]",[]


In [5]:
# See the most frequent drug interactions
df['drug_name'].value_counts()

drug_name
AXITINIB               3
LENVATINIB             3
REGORAFENIB            3
CEDIRANIB              3
DASATINIB ANHYDROUS    3
                      ..
IPILIMUMAB             1
OXALIPLATIN            1
LIFIRAFENIB            1
PYRVINIUM              1
ALECTINIB              1
Name: count, Length: 264, dtype: int64

In [7]:
# Filter the Data for only Approved Therapeutics
filtered_df = df[df['drug_approved']==True].reset_index(drop=True)

# Grab active market drugs for FDA approved therapeutics
openfda_data = pd.DataFrame(dgipy.get_drug_applications(list(filtered_df['drug_name'])))
openfda_data


HTTP status error for Drugs@FDA lookup ANDA078803 from drug rxcui:32592: OXALIPLATIN
HTTP status error for Drugs@FDA lookup ANDA090610 from drug rxcui:20863: CHLOROQUINE PHOSPHATE
HTTP status error for Drugs@FDA lookup NDA213973 from drug rxcui:2369389: RIPRETINIB
Unable to enumify value 'ORAL SUSPENSION' into enum '<enum 'ProductRoute'>'
HTTP status error for Drugs@FDA lookup ANDA207228 from drug rxcui:142433: DOXORUBICIN HYDROCHLORIDE
Unable to enumify value 'CAPSULE, PELLETS' into enum '<enum 'ProductDosageForm'>'
Unable to enumify value 'CAPSULE, PELLETS' into enum '<enum 'ProductDosageForm'>'
Unable to enumify value 'CAPSULE, PELLETS' into enum '<enum 'ProductDosageForm'>'
HTTP status error for Drugs@FDA lookup ANDA071198 from drug rxcui:225852: LEUCOVORIN CALCIUM
HTTP status error for Drugs@FDA lookup ANDA071199 from drug rxcui:225852: LEUCOVORIN CALCIUM
HTTP status error for Drugs@FDA lookup ANDA076898 from drug rxcui:140587: CELECOXIB
HTTP status error for Drugs@FDA lookup ANDA

Unnamed: 0,drug_name,drug_concept_id,drug_product_application,drug_brand_name,drug_marketing_status,drug_dosage_form,drug_dosage_strength
0,OSIMERTINIB,rxcui:1721560,NDA208065,TAGRISSO,ProductMarketingStatus.PRESCRIPTION,ProductDosageForm.TABLET,EQ 40MG BASE
1,OSIMERTINIB,rxcui:1721560,NDA208065,TAGRISSO,ProductMarketingStatus.PRESCRIPTION,ProductDosageForm.TABLET,EQ 80MG BASE
2,PAZOPANIB,rxcui:714438,ANDA215837,PAZOPANIB HYDROCHLORIDE,ProductMarketingStatus.PRESCRIPTION,ProductDosageForm.TABLET,EQ 200MG BASE
3,PAZOPANIB,rxcui:714438,ANDA217517,PAZOPANIB HYDROCHLORIDE,ProductMarketingStatus.PRESCRIPTION,ProductDosageForm.TABLET,EQ 200MG BASE
4,PAZOPANIB,rxcui:714438,ANDA217713,PAZOPANIB HYDROCHLORIDE,ProductMarketingStatus.PRESCRIPTION,ProductDosageForm.TABLET,EQ 200MG BASE
...,...,...,...,...,...,...,...
591,CAPECITABINE,rxcui:194000,ANDA211724,CAPECITABINE,ProductMarketingStatus.PRESCRIPTION,ProductDosageForm.TABLET,500MG
592,CAPECITABINE,rxcui:194000,ANDA217237,CAPECITABINE,ProductMarketingStatus.PRESCRIPTION,ProductDosageForm.TABLET,150MG
593,CAPECITABINE,rxcui:194000,ANDA217237,CAPECITABINE,ProductMarketingStatus.PRESCRIPTION,ProductDosageForm.TABLET,500MG
594,CAPECITABINE,rxcui:194000,NDA020896,XELODA,ProductMarketingStatus.PRESCRIPTION,ProductDosageForm.TABLET,150MG


In [8]:
# Grab active clinical trials for FDA Approved Therapeutics
clinical_trial_data = dgipy.get_clinical_trials(list(filtered_df['drug_name']))
clinical_trial_df = pd.DataFrame(clinical_trial_data)
clinical_trial_df

Unnamed: 0,search_term,trial_id,brief,study_type,min_age,age_groups,pediatric,conditions,interventions
0,ROMIPLOSTIM,NCT00418665,A Safety and Efficacy Study to Evaluate AMG 53...,INTERVENTIONAL,18 Years,"[ADULT, OLDER_ADULT]",False,"[Myelodysplastic Syndromes, Thrombocytopenia]","{'armGroups': [{'label': '750 mcg AMG 531', 't..."
1,ROMIPLOSTIM,NCT03957694,Study of AMG531(Romiplostim) in Patients With ...,INTERVENTIONAL,20 Years,"[ADULT, OLDER_ADULT]",False,[Aplastic Anemia],"{'armGroups': [{'label': 'AMG531', 'type': 'EX..."
2,ROMIPLOSTIM,NCT00299182,Study of AMG 531 to Evaluate the Safety & Effi...,INTERVENTIONAL,18 Years,"[ADULT, OLDER_ADULT]",False,[Lymphoma],{'armGroups': [{'label': '1 mcg/ kg AMG531 Pre...
3,ROMIPLOSTIM,NCT03622931,Patients With Relapsed Ovarian Cancer (2nd and...,INTERVENTIONAL,18 Years,"[ADULT, OLDER_ADULT]",False,[Ovarian Cancer],{'armGroups': [{'label': 'the experimental arm...
4,ROMIPLOSTIM,NCT02052882,Study of Romiplostim for Chemotherapy Induced ...,INTERVENTIONAL,18 Years,"[ADULT, OLDER_ADULT]",False,[Isolated Chemotherapy-induced Thrombocytopenia],"{'armGroups': [{'label': 'Romiplostim', 'type'..."
...,...,...,...,...,...,...,...,...,...
1117,ALECTINIB,NCT02023125,A Study Investigating the Effect of Food and E...,INTERVENTIONAL,18 Years,[ADULT],False,[Healthy Volunteer],{'armGroups': [{'label': 'Group 1: Treatment A...
1118,ALECTINIB,NCT05081674,Brazilian Lung Immunotherapy Study,INTERVENTIONAL,18 Years,"[ADULT, OLDER_ADULT]",False,[Lung Neoplasm],"{'armGroups': [{'label': 'ALK-translocated', '..."
1119,ALECTINIB,NCT02706626,Trial of Brigatinib After Treatment With Next-...,INTERVENTIONAL,18 Years,"[ADULT, OLDER_ADULT]",False,[Non-Small Cell Lung Cancer],{'armGroups': [{'label': 'Cohort A: Disease pr...
1120,ALECTINIB,NCT03596866,A Study of Brigatinib Compared to Alectinib in...,INTERVENTIONAL,18 Years,"[ADULT, OLDER_ADULT]",False,[ALK+ Advanced NSCLC],"{'armGroups': [{'label': 'Brigatinib', 'type':..."


In [46]:
# Filter for just Clinical Trials for Pediatric patients
clinical_trial_df = clinical_trial_df[clinical_trial_df['pediatric']==True].reset_index(drop=True)
clinical_trial_df

Unnamed: 0,search_term,trial_id,brief,study_type,min_age,age_groups,pediatric,conditions,interventions
0,ROMIPLOSTIM,NCT04478227,TPO-Mimetic Use in Children for Hematopoietic ...,INTERVENTIONAL,0 Years,"[CHILD, ADULT]",True,"[Bone Marrow Failure Disorders, Aplastic Anemi...","{'armGroups': [{'label': 'Arm A', 'type': 'EXP..."
1,ROMIPLOSTIM,NCT04588194,"Romiplostim, Rituximab and Dexamethasone as Fr...",INTERVENTIONAL,16 Years,"[CHILD, ADULT, OLDER_ADULT]",True,"[Immune Thrombocytopenia, Thrombotic Thrombocy...","{'armGroups': [{'label': 'Romiplostim, Rituxim..."
2,LENVATINIB,NCT02432274,Study of Lenvatinib in Children and Adolescent...,INTERVENTIONAL,2 Years,"[CHILD, ADULT]",True,"[Tumors, Solid Malignant Tumors, Osteosarcoma,...",{'armGroups': [{'label': 'Cohort 1: Single-Age...
3,DASATINIB ANHYDROUS,NCT01467986,Multimodal Molecular Targeted Therapy to Treat...,INTERVENTIONAL,,"[CHILD, ADULT]",True,[Neuroblastoma Recurrent],"{'armGroups': [{'label': 'Irinotecan, Temozolo..."
4,DASATINIB ANHYDROUS,NCT02596828,Prospective Pilot Trial to Assess a Multimodal...,INTERVENTIONAL,0 Years,"[CHILD, ADULT]",True,[Pineoblastoma],"{'armGroups': [{'label': 'RIST', 'type': 'EXPE..."
...,...,...,...,...,...,...,...,...,...
94,DASATINIB ANHYDROUS,NCT02596828,Prospective Pilot Trial to Assess a Multimodal...,INTERVENTIONAL,0 Years,"[CHILD, ADULT]",True,[Pineoblastoma],"{'armGroups': [{'label': 'RIST', 'type': 'EXPE..."
95,DOXORUBICIN LIPOSOME,NCT00003776,Combination Chemotherapy and Surgery in Treati...,INTERVENTIONAL,,"[CHILD, ADULT]",True,[Sarcoma],"{'interventions': [{'type': 'BIOLOGICAL', 'nam..."
96,DOXORUBICIN LIPOSOME,NCT02792491,Phase II Prospective Trial of Addition of Ritu...,INTERVENTIONAL,,"[CHILD, ADULT, OLDER_ADULT]",True,[Diffuse Large B-cell Lymphoma],{'armGroups': [{'label': 'Reduced dose R-CHOP'...
97,DABRAFENIB,NCT01619774,An Open-Label Phase II Study of the Combinatio...,INTERVENTIONAL,16 Years,"[CHILD, ADULT, OLDER_ADULT]",True,[Melanoma],{'armGroups': [{'label': 'GSK2118436 + GSK1120...


In [48]:
# Quantify spread of conditions covered by retrieved clinical trials
clinical_trial_df['conditions'].explode().value_counts()

conditions
Acute Lymphoblastic Leukemia                                             5
Chronic Myeloid Leukemia                                                 5
Sarcoma                                                                  4
Neuroblastoma Recurrent                                                  3
Liver Neoplasms                                                          3
                                                                        ..
Locally Advanced and/or Metastatic Inflammatory Myofibroblastic Tumor    1
Inflammatory Myofibroblastic Tumor                                       1
Breast Cancer                                                            1
Epilepsy                                                                 1
Melanoma                                                                 1
Name: count, Length: 138, dtype: int64

In [53]:
# Quantify number of trials admitting specific patient age group
clinical_trial_df['age_groups'].explode().value_counts()

age_groups
CHILD          99
ADULT          88
OLDER_ADULT    48
Name: count, dtype: int64

## Get category annotations for patient genes of interest

In [10]:
# Search for interaction data for genes of interest from a patient sample
genes = ['BRAF','PDGFRA','PDGFRB','BCL2','GATA1','CBL','CEBPA','CXCR4','EZH2','FBXW7','GATA2']
categories = pd.DataFrame(dgipy.get_categories(genes))
categories

Unnamed: 0,gene_name,gene_concept_id,gene_full_name,gene_category,gene_category_sources
0,EZH2,hgnc:3527,enhancer of zeste 2 polycomb repressive comple...,CLINICALLY ACTIONABLE,"[CIViC, CarisMolecularIntelligence, Foundation..."
1,EZH2,hgnc:3527,enhancer of zeste 2 polycomb repressive comple...,ENZYME,"[GuideToPharmacology, HumanProteinAtlas]"
2,EZH2,hgnc:3527,enhancer of zeste 2 polycomb repressive comple...,NUCLEAR HORMONE RECEPTOR,[Pharos]
3,EZH2,hgnc:3527,enhancer of zeste 2 polycomb repressive comple...,TRANSCRIPTION FACTOR,[Pharos]
4,GATA2,hgnc:4171,GATA binding protein 2,CLINICALLY ACTIONABLE,"[CarisMolecularIntelligence, FoundationOneGene..."
5,GATA2,hgnc:4171,GATA binding protein 2,TRANSCRIPTION FACTOR,[Pharos]
6,GATA2,hgnc:4171,GATA binding protein 2,TRANSCRIPTION FACTOR COMPLEX,[GO]
7,PDGFRB,hgnc:8804,platelet derived growth factor receptor beta,CLINICALLY ACTIONABLE,"[CIViC, CarisMolecularIntelligence, Foundation..."
8,PDGFRB,hgnc:8804,platelet derived growth factor receptor beta,DRUGGABLE GENOME,"[HingoraniCasas, HopkinsGroom, RussLampel]"
9,PDGFRB,hgnc:8804,platelet derived growth factor receptor beta,KINASE,"[HopkinsGroom, Pharos, dGene]"


In [7]:
# Quantify results
categories['gene_category'].value_counts()

gene_category
CLINICALLY ACTIONABLE               11
DRUGGABLE GENOME                     5
TRANSCRIPTION FACTOR                 4
KINASE                               4
DRUG RESISTANCE                      4
ENZYME                               3
TRANSCRIPTION FACTOR COMPLEX         3
TYROSINE KINASE                      2
EXTERNAL SIDE OF PLASMA MEMBRANE     2
NUCLEAR HORMONE RECEPTOR             1
TUMOR SUPPRESSOR                     1
SERINE THREONINE KINASE              1
DNA REPAIR                           1
CELL SURFACE                         1
G PROTEIN COUPLED RECEPTOR           1
Name: count, dtype: int64

In [9]:
# Filter for just KINASE genes
filtered_categories = categories[categories['gene_category'].str.contains('KINASE')].reset_index(drop=True)
filtered_categories

Unnamed: 0,gene_name,gene_concept_id,gene_full_name,gene_category,gene_category_sources
0,PDGFRB,hgnc:8804,platelet derived growth factor receptor beta,KINASE,"[HopkinsGroom, Pharos, dGene]"
1,PDGFRB,hgnc:8804,platelet derived growth factor receptor beta,TYROSINE KINASE,"[GO, dGene]"
2,PDGFRA,hgnc:8803,platelet derived growth factor receptor alpha,KINASE,"[HopkinsGroom, Pharos, dGene]"
3,PDGFRA,hgnc:8803,platelet derived growth factor receptor alpha,TYROSINE KINASE,[dGene]
4,CBL,hgnc:1541,Cbl proto-oncogene,KINASE,[Pharos]
5,BRAF,hgnc:1097,"B-Raf proto-oncogene, serine/threonine kinase",KINASE,"[HopkinsGroom, Pharos, dGene]"
6,BRAF,hgnc:1097,"B-Raf proto-oncogene, serine/threonine kinase",SERINE THREONINE KINASE,"[GO, dGene]"


In [11]:
# Filter for just DRUGGABLE GENOME genes
filtered_categories = categories[categories['gene_category'].str.contains('DRUGGABLE GENOME')].reset_index(drop=True)
filtered_categories

Unnamed: 0,gene_name,gene_concept_id,gene_full_name,gene_category,gene_category_sources
0,PDGFRB,hgnc:8804,platelet derived growth factor receptor beta,DRUGGABLE GENOME,"[HingoraniCasas, HopkinsGroom, RussLampel]"
1,PDGFRA,hgnc:8803,platelet derived growth factor receptor alpha,DRUGGABLE GENOME,"[HingoraniCasas, HopkinsGroom, RussLampel]"
2,BRAF,hgnc:1097,"B-Raf proto-oncogene, serine/threonine kinase",DRUGGABLE GENOME,"[HingoraniCasas, HopkinsGroom, RussLampel]"
3,BCL2,hgnc:990,BCL2 apoptosis regulator,DRUGGABLE GENOME,"[HingoraniCasas, HopkinsGroom, RussLampel]"
4,CXCR4,hgnc:2561,C-X-C motif chemokine receptor 4,DRUGGABLE GENOME,"[HingoraniCasas, HopkinsGroom, RussLampel]"
