In [8]:
import json
import pandas as pd
from tqdm import tqdm

### Extract Headers
Identify which section headers might be relevant to our experimental design

In [9]:
def extract_keys(data, one_giant_list):

    for entry in data['results']:
        for key in list(entry.keys()):
            one_giant_list.append(key)

    return(one_giant_list)


In [17]:
file_numbers = ['0001','0002','0003','0004','0005','0006','0007','0008','0009','0010','0011','0012']
keys_list = []

for number in tqdm(file_numbers):
    f = open(f'../data/drug-label-{number}-of-0012.json','r')
    data = json.load(f)
    keys_list = extract_keys(data, keys_list)

keys_df = pd.DataFrame({'keys' : keys_list})


100%|██████████| 12/12 [00:12<00:00,  1.03s/it]


In [18]:
print(len(keys_df['keys'].value_counts()))
keys_df['keys'].value_counts()[0:50]

keys_df['keys'].value_counts().reset_index().to_csv('keys.csv')

159


### Extract Data
Using the openFDA json set, extract the relevant text fields from each application, where they exist. Save this data to a growing dataframe and then save the dataframe

In [12]:
HEADERS = ['brand_name',
           'application_number',
            'adverse_reactions',
            'clinical_studies',
            'indications_and_usage',
            'contraindications',
            'warnings_and_cautions',
            'warnings',
            'precautions',
            'pharmacokinetics',
            'purpose',
            'clinical_pharmacology',
            'active_ingredient',
            'stop_use',
            'boxed_warning',
            'pharmacodynamics',
            'pharmacogenomics'
            ]


In [13]:
def extract_data_point(data):
    tdf = pd.DataFrame(columns=HEADERS)
    for entry in data['results']:
        build_dict = {}
        for header in HEADERS:
            if header == 'brand_name': #TODO: Handle broken brand names
                try:
                    build_dict[header] = entry['openfda'][header][0]
                except:
                    build_dict[header] = None
            elif header == 'application_number': #TODO: Handle broken application numbers
                try:
                    build_dict[header] = entry['openfda'][header][0]
                except:
                    build_dict[header] = None

            else:
                try:
                    build_dict[header] = entry[header][0]
                except:
                    build_dict[header] = None

        tdf = pd.concat([tdf,pd.DataFrame.from_dict([build_dict])],axis=0).reset_index(drop=True)

    return(tdf)

# "openfda": {
        # "application_number": [

In [19]:
file_numbers = ['0001','0002','0003','0004','0005','0006','0007','0008','0009','0010','0011','0012']
fda_df = pd.DataFrame(columns=HEADERS)

for number in tqdm(file_numbers):
    f = open(f'../data/drug-label-{number}-of-0012.json','r')
    data = json.load(f)
    fda_df = pd.concat([fda_df,extract_data_point(data)],axis=0).reset_index(drop=True)

fda_df

100%|██████████| 12/12 [07:50<00:00, 39.17s/it]


Unnamed: 0,brand_name,application_number,adverse_reactions,clinical_studies,indications_and_usage,contraindications,warnings_and_cautions,warnings,precautions,pharmacokinetics,purpose,clinical_pharmacology,active_ingredient,stop_use,boxed_warning,pharmacodynamics,pharmacogenomics
0,ACETAMINOPHEN AND CODEINE PHOSPHATE,ANDA040419,ADVERSE REACTIONS The following serious advers...,,INDICATIONS AND USAGE Acetaminophen and codein...,CONTRAINDICATIONS Acetaminophen and codeine ph...,,"WARNINGS Addiction, Abuse, and Misuse Acetamin...",PRECAUTIONS Risks of Driving and Operating Mac...,,,CLINICAL PHARMACOLOGY Mechanism of Action Code...,,,WARNING: SERIOUS AND LIFE-THREATENING RISKS FR...,,
1,Magnesium Sulfate in Water,ANDA209642,ADVERSE REACTIONS The adverse effects of paren...,,INDICATIONS AND USAGE Magnesium Sulfate in Wat...,CONTRAINDICATIONS Intravenous magnesium should...,,WARNINGS FETAL HARM: Continuous administration...,PRECAUTIONS Because magnesium is removed from ...,Pharmacokinetics Absorption: Intravenously adm...,,CLINICAL PHARMACOLOGY Magnesium (Mg ++ ) is an...,,,,,
2,omeprazole,ANDA216096,,,Use • treats frequent heartburn (occurs 2 or m...,,,Warnings Allergy alert: • do not use if you ar...,,,Purpose Acid reducer,,Active ingredient (in each capsule) Omeprazole...,Stop use and ask a doctor if: • your heartburn...,,,
3,,,,,"Uses When using this product, avoid contact wi...",,,Warnings For external use only . When using th...,,,Purpose Antimicrobial,,Active ingredient Benzalkonium Chloride 0.1% P...,Stop use and ask a doctor if irritation or red...,,,
4,Zenpep,BLA022210,6 ADVERSE REACTIONS The following serious or o...,14 CLINICAL STUDIES Adult and Pediatric Patien...,1 INDICATIONS AND USAGE ZENPEP ® is indicated ...,4 CONTRAINDICATIONS None None ( 4 ),5 WARNINGS AND PRECAUTIONS • Fibrosing Colonop...,,,12.3 Pharmacokinetics Following oral administr...,,12 CLINICAL PHARMACOLOGY 12.1 Mechanism of Act...,,,,12.2 Pharmacodynamics For patients consuming a...,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
223999,Dacarbazine,ANDA075259,"ADVERSE REACTIONS Symptoms of anorexia, nausea...",,INDICATIONS AND USAGE Dacarbazine for Injectio...,CONTRAINDICATIONS Dacarbazine for injection is...,,WARNINGS Hemopoietic depression is the most co...,PRECAUTIONS Hospitalization is not always nece...,,,CLINICAL PHARMACOLOGY After intravenous admini...,,,WARNING It is recommended that dacarbazine for...,,
224000,Articadent,NDA020971,6 ADVERSE REACTIONS Reactions to articaine are...,6.1 Clinical Studies Experience Because clinic...,1 INDICATIONS AND USAGE ARTICADENT is indicate...,4 CONTRAINDICATIONS ARTICADENT is contraindica...,5 WARNINGS AND PRECAUTIONS Accidental Intravas...,,,12.3 Pharmacokinetics Absorption: Following de...,,12 CLINICAL PHARMACOLOGY 12.1 Mechanism of Act...,,,,"12.2 Pharmacodynamics Clinically, the order of...",
224001,Carvedilol,ANDA077614,6 ADVERSE REACTIONS Most common adverse events...,14 CLINICAL STUDIES 14.1 Heart Failure A total...,1 INDICATIONS AND USAGE Carvedilol tablets are...,4 CONTRAINDICATIONS Bronchial asthma or relate...,5 WARNINGS AND PRECAUTIONS Acute exacerbation ...,,,12.3 Pharmacokinetics Carvedilol tablets are r...,,12 CLINICAL PHARMACOLOGY 12.1 Mechanism of Act...,,,,12.2 Pharmacodynamics Heart Failure The basis ...,
224002,hydrocodone bitartrate and acetaminophen,ANDA207509,ADVERSE REACTIONS The following adverse reacti...,,INDICATIONS AND USAGE Hydrocodone bitartrate a...,CONTRAINDICATIONS Hydrocodone Bitartrate and A...,,"WARNINGS Addiction, Abuse, and Misuse Hydrocod...",PRECAUTIONS Risks of Driving and Operating Mac...,Pharmacokinetics The behavior of the individua...,,CLINICAL PHARMACOLOGY Mechanism of Action Hydr...,,,"WARNING: ADDICTION, ABUSE, AND MISUSE; RISK EV...",Pharmacodynamics Effects on the Central Nervou...,


In [21]:
fda_df.to_excel('../data/openfda-04232024.xlsx',sheet_name='04232024')

### Scratch

In [None]:
file_numbers = ['0001','0002','0003','0004','0005','0006','0007','0008','0009','0010','0011','0012']
fda_df = pd.DataFrame(columns=['id','adverse_reactions','indications_and_usage','contraindications','warnings_and_cautions','brand_name'])

for number in tqdm(file_numbers):
    f = open(f'../data/drug-label-{number}-of-0011.json','r')
    data = json.load(f)
    fda_df = extract_data(data,fda_df)



file_numbers = ['0001','0002','0003','0004','0005','0006','0007','0008','0009','0010','0011']
fda_df = pd.DataFrame(columns=['id','adverse_reactions','indications_and_usage','contraindications','warnings_and_cautions','brand_name'])

for number in tqdm(file_numbers):
    f = open(f'../data/drug-label-{number}-of-0011.json','r')
    data = json.load(f)
    fda_df = extract_data(data,fda_df)



In [None]:
data['results'][0].keys()

# additional fields of interest potentially: nonclinical_toxicology, clinical pharmacology?



In [None]:
data['results'][0]['openfda'] # brand name, rxcui

{'application_number': ['ANDA065117'],
 'brand_name': ['AMOXICILLIN AND CLAVULANATE POTASSIUM'],
 'generic_name': ['AMOXICILLIN AND CLAVULANATE POTASSIUM'],
 'manufacturer_name': ['DIRECT RX'],
 'product_ndc': ['61919-019', '61919-401'],
 'product_type': ['HUMAN PRESCRIPTION DRUG'],
 'route': ['ORAL'],
 'substance_name': ['AMOXICILLIN', 'CLAVULANATE POTASSIUM'],
 'rxcui': ['308189', '617296'],
 'spl_id': ['b96b2e24-4192-31e2-e053-2a95a90a2356'],
 'spl_set_id': ['0173e9de-a995-4386-bb65-8fc2bbf347f9'],
 'package_ndc': ['61919-401-32', '61919-019-20'],
 'original_packager_product_ndc': ['65862-071', '66685-1002'],
 'unii': ['804826J2HU', 'Q42OMW3AT8']}