## The FDA pharmacogenomics biomarkers source table
* Obtained from https://www.fda.gov/drugs/science-and-research-drugs/table-pharmacogenomic-biomarkers-drug-labeling
* date: 06/25/2025

In [23]:
## Load necessary packages
import os
import pandas as pd
import glob
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt

## Define the version number
version_number = "07_01_2025"
deployment_date = "2025-07-01"

## Load files and convert them into separate node & edge files
* check all imported file structure

In [24]:
## Notice!! Please change the file path of following codes into your own
raw_files_path = '/Users/Weiqi0/ISB_working/Ilya_lab/Translator/Pharmagenomics_KG/files/FDA_Pharmacogenomic_biomarkers_in_Drug_labeling/'

## Define the output path for node & edge files after formatting
download_path_node_file = f'/Users/Weiqi0/ISB_working/Ilya_lab/Translator/Pharmagenomics_KG/files/parsed/FDA_pharmacogenomics_biomarkers_parsed_node_{version_number}.tsv'
download_path_edge_file = f'/Users/Weiqi0/ISB_working/Ilya_lab/Translator/Pharmagenomics_KG/files/parsed/FDA_pharmacogenomics_biomarkers_parsed_edge_{version_number}.tsv'

In [25]:
## Check all csv files being read
for f in os.listdir(raw_files_path):
    if f.endswith('.csv'):
        print(f)

Table_of_Pharmacogenomic_Biomarkers_in_Drug_Labeling_FDA.csv


In [26]:
## Read each individual csv files
FDA_biomarkers_df = pd.read_csv(raw_files_path + 'Table_of_Pharmacogenomic_Biomarkers_in_Drug_Labeling_FDA.csv')

FDA_biomarkers_df.head(5)

Unnamed: 0,Drug,Therapeutic Area*,Biomarker†,Labeling Sections
0,Articaine and Epinephrine (1),Anesthesiology,G6PD,Warnings and Precautions
1,Articaine and Epinephrine (2),Anesthesiology,Nonspecific (Congenital Methemoglobinemia),Warnings and Precautions
2,Bupivacaine (1),Anesthesiology,G6PD,Warnings
3,Bupivacaine (2),Anesthesiology,Nonspecific (Congenital Methemoglobinemia),Warnings
4,Chloroprocaine (1),Anesthesiology,G6PD,Warnings


In [34]:
print(len(FDA_biomarkers_df))

608


## Notice: Multiple types of labeling sections
* full list here:
* 

In [27]:
## check unique labeling sections
unique_labeling_values = FDA_biomarkers_df['Labeling Sections'].unique()
print("All possible labeling are here: " ,unique_labeling_values)
print("Full length of the list: ", len(unique_labeling_values))

 'Use in Specific Populations'
 'Use in Specific Populations, Clinical Pharmacology'
 'Drug Interactions, Clinical Pharmacology'
 'Clinical Pharmacology' 'Overdosage'
 'Dosage and Administration, Clinical Pharmacology'
 'Use in Specific Populations, Clinical Pharmacology, Clinical Studies'
 'Adverse Reactions, Clinical Pharmacology'
 'Precautions' 'Clinical Studies'
 'Clinical Pharmacology, Clinical Studies'
 'Dosage and Administration, Use in Specific Populations, Clinical Pharmacology'
 'Indications and Usage, Adverse Reactions, Clinical Studies'
 'Indications and Usage, Dosage and Administration, Adverse Reactions, Use in Specific Populations, Clinical Pharmacology, Clinical Studies'
 'Indications and Usage, Clinical Studies' 'Contraindications'
 'Drug Interactions'
 'Indications and Usage, Adverse Reactions, Use in Specific Populations, Clinical Studies'
 'Indications and Usage, Dosage and Administration, Adverse Reactions, Use in Specific Populations, Clinical Studies'
 'Indicatio

## Helper function to obtain all unique labels from the full list
* Information based on Figure A: Highlights of perscribing information,
* Sections on figure:
    * Warning: Title of warning
    * Recent major changes
    * Indications and usage
    * Dosage and administration
    * Dosage forms and strengths
    * Contraindications
    * Warnings and precautions
    * Adverse reactions
    * Drug interactions
    * Use in specific populations
* https://www.fda.gov/drugs/fdas-labeling-resources-human-prescription-drugs/frequently-asked-questions-about-labeling-prescription-medicines?utm_source=chatgpt.com
* 

In [28]:
def find_unique_words(string_list):
    unique_words = set()
    for s in string_list:
        words = s.split(',')
        for word in words:
            cleaned_word = word.strip()
            if cleaned_word:
                unique_words.add(cleaned_word)
    return unique_words

In [29]:
unique_labels = find_unique_words(unique_labeling_values)
print("All possible unique labels: ", unique_labels)
print(len(unique_labels))

18


In [30]:
import pandas as pd

## Define the data table
## Information based on Figure A: Highlights of perscribing information,
data = {
    "Label": [
        "Boxed Warning",
        "Contraindications",
        "Warnings",
        "Warnings and Precautions", ## Group 'Warning and Precautions' here
        "Precautions",
        "Overdosage",
        "Adverse Reactions",
        "Adverse Interactions",
        "Drug Interactions",
        "Use in Specific Populations",
        "Patient Counseling Info",
        "Indications and Usage", ## Group 'Indication and Usage' here
        "Dosage and Administrations", ## Group 'Dosage and Administration' here
        "Clinical Pharmacology",
        "Clinical Studies"
    ],
    "Type": [
        "Safety (critical)",
        "Safety (critical)",
        "Safety (critical)",
        "Safety",
        "Safety",
        "Safety",
        "Safety",
        "Safety",
        "Safety",
        "Guidance",
        "Communication",
        "Usage Guidance",
        "Usage Guidance",
        "Informational",
        "Informational"
    ],
    "Severity/Priority Level": [
        "Highest",
        "High",
        "High",
        "Medium-High",
        "Medium-High",
        "Medium-High",
        "Medium",
        "Medium",
        "Medium",
        "Medium",
        "Medium",
        "Low",
        "Low",
        "Low",
        "Low"
    ]
}

# Create the DataFrame
labeling_type_and_severity_df = pd.DataFrame(data)

# Display the DataFrame
print(labeling_type_and_severity_df)

                          Label               Type Severity/Priority Level
1             Contraindications  Safety (critical)                    High
4                   Precautions             Safety             Medium-High
5                    Overdosage             Safety             Medium-High
6             Adverse Reactions             Safety                  Medium
7          Adverse Interactions             Safety                  Medium
8             Drug Interactions             Safety                  Medium
9   Use in Specific Populations           Guidance                  Medium
10      Patient Counseling Info      Communication                  Medium
11        Indications and Usage     Usage Guidance                     Low
12   Dosage and Administrations     Usage Guidance                     Low
13        Clinical Pharmacology      Informational                     Low
14             Clinical Studies      Informational                     Low


## Now some examples to discuss and decide the priority of which biomarkers subtype to assign
* example 1: multiple high severity classes
* 'Boxed Warning, Contraindications, Warnings and Precautions, Adverse Reactions, Patient Counseling Information'
* example 2: a mix of high, medium, and low classes
* 'Warnings and Precautions, Adverse Reactions, Patient Counseling Information'

In [32]:
# Filter rows
filtered_df = FDA_biomarkers_df[FDA_biomarkers_df['Labeling Sections'] 
    == "Boxed Warning, Contraindications, Warnings and Precautions, Adverse Reactions, Patient Counseling Information"]

print(filtered_df)

            Drug Therapeutic Area* Biomarker†  \
595  Pegloticase      Rheumatology       G6PD   

                                     Labeling Sections  


In [33]:
# Filter rows
filtered_df = FDA_biomarkers_df[FDA_biomarkers_df['Labeling Sections'] 
    == "Warnings and Precautions, Adverse Reactions, Patient Counseling Information"]

print(filtered_df)

               Drug Therapeutic Area*  \
49      Dapsone (2)       Dermatology   
319  Dabrafenib (2)          Oncology   

                                     Biomarker†  \
49   Nonspecific (Congenital Methemoglobinemia)   
319                                        G6PD   

                                     Labeling Sections  


In [None]:
## Drugbank result
## https://go.drugbank.com/pharmaco/genomics/DBSNPE001209?utm_source=chatgpt.com

## G6PD deficiency -> Increased risk of hemolytic anemia.