For processing the extracted JSON files downloaded from: https://open.fda.gov/apis/drug/label/download/

This script assumes the .json files are placed under `data/OpenFDAJson` and iterates over each file, mapping the Drug Label Data {SPL ID, SET ID} -> NDA Application numbers.

In [1]:
import csv
import json
import os

DATA_DIR = 'data/OpenFDAJson'

In [2]:
HEADER_ROW = ['effective_time', 'spl_id', 'spl_set_id', 'generic_name', 'brand_name', 'manufacturer_name', 'nda']

def process_data(csvwriter, file_path):
    with open(file_path) as f:
        recs = json.loads(f.read())
        for rec in recs['results']:
            mappings = []
            if 'openfda' in rec and 'application_number' in rec['openfda']:
                for appln in rec['openfda']['application_number']:
                    if appln.startswith('NDA'):
                        row = [rec['effective_time'],
                               rec['id'],
                               rec['set_id'],
                               '|'.join(rec['openfda']['generic_name']),
                               '|'.join(rec['openfda']['brand_name']),
                               '|'.join(rec['openfda']['manufacturer_name']),
                               appln]
                        csvwriter.writerow(row)

In [3]:
with open('spl_id_label_nda.csv', 'w+') as f:
    csvwriter = csv.writer(f, delimiter=',')
    # Write header row
    csvwriter.writerow(HEADER_ROW)
    # Process each file and write data
    for dir_name, _, file_names in os.walk(DATA_DIR):
        for file_name in file_names:
            if file_name.endswith('.json'):
                print('Processing: ', file_name)
                process_data(csvwriter, os.path.join(dir_name, file_name))

Processing:  drug-label-0009-of-0010.json
Processing:  drug-label-0008-of-0010.json
Processing:  drug-label-0010-of-0010.json
Processing:  drug-label-0005-of-0010.json
Processing:  drug-label-0002-of-0010.json
Processing:  drug-label-0003-of-0010.json
Processing:  drug-label-0004-of-0010.json
Processing:  drug-label-0006-of-0010.json
Processing:  drug-label-0001-of-0010.json
Processing:  drug-label-0007-of-0010.json
