In [1]:
import json
import pandas as pd
from tqdm import tqdm

### Extract Headers
Identify which section headers might be relevant to our experimental design

In [2]:
def extract_keys(data, one_giant_list):

    for entry in data['results']:
        for key in list(entry.keys()):
            one_giant_list.append(key)

    return(one_giant_list)


In [3]:
file_numbers = ['0001','0002','0003','0004','0005','0006','0007','0008','0009','0010','0011']
keys_list = []

for number in tqdm(file_numbers):
    f = open(f'../data/drug-label-{number}-of-0011.json','r')
    data = json.load(f)
    keys_list = extract_keys(data, keys_list)

keys_df = pd.DataFrame({'keys' : keys_list})


100%|██████████| 11/11 [00:31<00:00,  2.86s/it]


In [4]:
print(len(keys_df['keys'].value_counts()))
keys_df['keys'].value_counts()[0:50]

keys_df['keys'].value_counts().reset_index().to_csv('keys.csv')

159


### Extract Data
Using the openFDA json set, extract the relevant text fields from each application, where they exist. Save this data to a growing dataframe and then save the dataframe

In [5]:
HEADERS = ['brand_name',
           'application_number',
            'adverse_reactions',
            'indications_and_usage',
            'contraindications',
            'warnings_and_cautions',
            'warnings',
            'precautions',
            'pharmacokinetics',
            'purpose',
            'clinical_pharmacology',
            'active_ingredient',
            'stop_use',
            'boxed_warning',
            'pharmacodynamics',
            'pharmacogenomics'
            ]


In [6]:
def extract_data_point(data):
    tdf = pd.DataFrame(columns=HEADERS)
    for entry in data['results']:
        build_dict = {}
        for header in HEADERS:
            if header == 'brand_name':
                try:
                    build_dict[header] = entry['openfda'][header][0]
                except:
                    build_dict[header] = None
            elif header == 'application_number':
                try:
                    build_dict[header] = entry['openfda'][header][0]
                except:
                    build_dict[header] = None

            else:
                try:
                    build_dict[header] = entry[header][0]
                except:
                    build_dict[header] = None

        tdf = pd.concat([tdf,pd.DataFrame.from_dict([build_dict])],axis=0).reset_index(drop=True)

    return(tdf)

# "openfda": {
        # "application_number": [

In [7]:
file_numbers = ['0001','0002','0003','0004','0005','0006','0007','0008','0009','0010','0011']
fda_df = pd.DataFrame(columns=HEADERS)

for number in tqdm(file_numbers):
    f = open(f'../data/drug-label-{number}-of-0011.json','r')
    data = json.load(f)
    fda_df = pd.concat([fda_df,extract_data_point(data)],axis=0).reset_index(drop=True)

fda_df

100%|██████████| 11/11 [26:09<00:00, 142.71s/it]


Unnamed: 0,brand_name,application_number,adverse_reactions,indications_and_usage,contraindications,warnings_and_cautions,warnings,precautions,pharmacokinetics,purpose,clinical_pharmacology,active_ingredient,stop_use,boxed_warning,pharmacodynamics,pharmacogenomics
0,AMOXICILLIN AND CLAVULANATE POTASSIUM,ANDA065117,ADVERSE REACTIONS SECTION The following are di...,INDICATIONS & USAGE SECTION To reduce the deve...,CONTRAINDICATIONS SECTION Amoxicillinfor oral ...,WARNINGS AND PRECAUTIONS SECTION 5.1 Anaphylac...,,,,,CLINICAL PHARMACOLOGY SECTION 12.1 Mechanism o...,,,,,
1,UNDA 312,,,Uses For the relief of symptoms associated wit...,,,Warnings Sore throat warning: Severe or persis...,,,Uses For the relief of symptoms associated wit...,,Active ingredients Each drop contains: Angelic...,Stop use and ask a doctor if Cough persists fo...,,,
2,SUN PROTECT LIP BALM SPF 30,part352,,Uses Helps prevent sunburn. If used as directe...,,,Warnings For external use only. Do not use on ...,,,Purpose Sunscreen,,Drug Facts Active ingredients Non Nano Zinc Ox...,,,,
3,LOSARTAN POTASSIUM AND HYDROCHLOROTHIAZIDE,ANDA078385,,,,,,,,,,,,,,
4,Potassium Phosphates,NDA212832,6 ADVERSE REACTIONS The following clinically s...,1 INDICATIONS AND USAGE Potassium Phosphates I...,4 CONTRAINDICATIONS Potassium Phosphates Injec...,5 WARNINGS AND PRECAUTIONS Serious Cardiac Adv...,,,12.3 Pharmacokinetics Distribution Approximate...,,12 CLINICAL PHARMACOLOGY 12.1 Mechanism of Act...,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
215910,Carbidopa and Levodopa,ANDA214092,ADVERSE REACTIONS The most common adverse reac...,INDICATIONS AND USAGE Carbidopa and levodopa t...,CONTRAINDICATIONS Nonselective monoamine oxida...,,WARNINGS When carbidopa and levodopa tablets a...,"PRECAUTIONS General As with levodopa, periodic...",Pharmacokinetics Carbidopa reduces the amount ...,,CLINICAL PHARMACOLOGY Mechanism of Action Park...,,,,Pharmacodynamics When levodopa is administered...,
215911,REFRESH Optive Mega-3,part349,,"Uses For the temporary relief of burning, irri...",,,Warnings For external use only. To avoid conta...,,,Purpose Eye lubricant Eye lubricant Eye lubricant,,Active ingredients Carboxymethylcellulose sodi...,Stop use and ask a doctor if you experience ey...,,,
215912,Creon,BLA020725,6 ADVERSE REACTIONS The most serious adverse r...,1 INDICATIONS AND USAGE CREON ® is indicated f...,4 CONTRAINDICATIONS None. None ( 4 ),5 WARNINGS AND PRECAUTIONS Fibrosing colonopat...,,,12.3 Pharmacokinetics The pancreatic enzymes i...,,12 CLINICAL PHARMACOLOGY 12.1 Mechanism of Act...,,,,,
215913,Losartan Potassium and Hydrochlorothiazide,ANDA078245,6 ADVERSE REACTIONS Most common adverse reacti...,1 INDICATIONS AND USAGE Losartan potassium and...,4 CONTRAINDICATIONS Losartan potassium and hyd...,5 WARNINGS AND PRECAUTIONS Hypotension: Correc...,,,12.3 Pharmacokinetics Losartan Potassium Absor...,,12 CLINICAL PHARMACOLOGY 12.1 Mechanism of Act...,,,WARNING: FETAL TOXICITY When pregnancy is dete...,12.2 Pharmacodynamics Losartan Potassium Losar...,


In [8]:
# fda_df.to_excel('../data/openfda-2.xlsx',sheet_name='02052024')

### Scratch

In [3]:
file_numbers = ['0001','0002','0003','0004','0005','0006','0007','0008','0009','0010','0011']
fda_df = pd.DataFrame(columns=['id','adverse_reactions','indications_and_usage','contraindications','warnings_and_cautions','brand_name'])

for number in tqdm(file_numbers):
    f = open(f'../data/drug-label-{number}-of-0011.json','r')
    data = json.load(f)
    fda_df = extract_data(data,fda_df)



file_numbers = ['0001','0002','0003','0004','0005','0006','0007','0008','0009','0010','0011']
fda_df = pd.DataFrame(columns=['id','adverse_reactions','indications_and_usage','contraindications','warnings_and_cautions','brand_name'])

for number in tqdm(file_numbers):
    f = open(f'../data/drug-label-{number}-of-0011.json','r')
    data = json.load(f)
    fda_df = extract_data(data,fda_df)



In [6]:
data['results'][0].keys()

# additional fields of interest potentially: nonclinical_toxicology, clinical pharmacology?



In [12]:
data['results'][0]['openfda'] # brand name, rxcui

{'application_number': ['ANDA065117'],
 'brand_name': ['AMOXICILLIN AND CLAVULANATE POTASSIUM'],
 'generic_name': ['AMOXICILLIN AND CLAVULANATE POTASSIUM'],
 'manufacturer_name': ['DIRECT RX'],
 'product_ndc': ['61919-019', '61919-401'],
 'product_type': ['HUMAN PRESCRIPTION DRUG'],
 'route': ['ORAL'],
 'substance_name': ['AMOXICILLIN', 'CLAVULANATE POTASSIUM'],
 'rxcui': ['308189', '617296'],
 'spl_id': ['b96b2e24-4192-31e2-e053-2a95a90a2356'],
 'spl_set_id': ['0173e9de-a995-4386-bb65-8fc2bbf347f9'],
 'package_ndc': ['61919-401-32', '61919-019-20'],
 'original_packager_product_ndc': ['65862-071', '66685-1002'],
 'unii': ['804826J2HU', 'Q42OMW3AT8']}