In [1]:
import requests

import numpy as np
import pandas as pd

import datetime as dt

import matplotlib.pyplot as plt
import seaborn as sns

from ast import literal_eval

pd.set_option('display.max_columns', None) # display all columns in DF
pd.set_option('display.max_rows', None) # display all columns in DF

# Data from openFDA NDC

Load data

In [12]:
df_openFDA_NDC = pd.read_csv('raw_openFDA_NDC_data.csv', sep = '~')

In [13]:
df_openFDA_NDC.head(2)

Unnamed: 0,is_original_packager,name,strength,product_ndc,generic_name,labeler_name,brand_name,finished,packaging,listing_expiration_date,openfda,marketing_category,dosage_form,spl_id,product_type,marketing_start_date,product_id,application_number,brand_name_base,marketing_end_date,active_ingredients,route,pharm_class,brand_name_suffix,dea_schedule,labeler_code,product_code,unique_id
0,True,,,72835-601,Mometasone Furoate and Dimethicone,"V2 Pharma, LLC",Quinixil,True,"[{'package_ndc': '72835-601-02', 'description'...",20231231.0,"{'manufacturer_name': ['V2 Pharma, LLC'], 'rxc...",ANDA,KIT,d371387b-e6d5-4e49-a489-57ad96af442a,HUMAN PRESCRIPTION DRUG,20190603,72835-601_d371387b-e6d5-4e49-a489-57ad96af442a,ANDA076216,Quinixil,,,,,,,72835,601,mometasone furoate and dimethicone-kit-nan-nan
1,True,DESONIDE,.5 mg/g,0472-0804,desonide,"Actavis Pharma, Inc.",Desonide,True,"[{'package_ndc': '0472-0804-15', 'description'...",20221231.0,"{'manufacturer_name': ['Actavis Pharma, Inc.']...",NDA AUTHORIZED GENERIC,CREAM,8d3633e6-80d9-4779-862a-b608180e202a,HUMAN PRESCRIPTION DRUG,20130801,0472-0804_8d3633e6-80d9-4779-862a-b608180e202a,NDA019048,Desonide,,"[{'name': 'DESONIDE', 'strength': '.5 mg/g'}]",TOPICAL,['Corticosteroid Hormone Receptor Agonists [Mo...,,,472,804,desonide-cream-topical-.5 mg/g


Clear data and remove duplicates

In [14]:
df_openFDA_NDC = df_openFDA_NDC['name'].str.lower()
df_openFDA_NDC = pd.DataFrame(df_openFDA_NDC.drop_duplicates())
df_openFDA_NDC = df_openFDA_NDC.dropna()

In [15]:
print(len(df_openFDA_NDC))

1891


# Data from NIH

In [16]:
count = 1

for index, row in df_openFDA_NDC.iterrows(): # iterating through drugs
    
    drug_name = row[0]
    
    try:
        start_url = 'https://rxnav.nlm.nih.gov/REST/rxclass/class/byDrugName.json?drugName='
        middle_url = str(drug_name)

        r = requests.get(start_url + middle_url, timeout = 20) # requesting meta data
        data_raw = r.json()
        
        data = data_raw['rxclassDrugInfoList']

        df_temp = pd.json_normalize(data['rxclassDrugInfo'])
        
        if count == 1:
            df_NIH = df_temp
        else:
            df_NIH = pd.concat([df_NIH, df_temp])
        
    except:
        continue

# Downloading Data

In [17]:
df_NIH.to_csv('nih_data_name.csv', sep = '~', index = False)

In [18]:
df_NIH

Unnamed: 0,rela,relaSource,minConcept.rxcui,minConcept.name,minConcept.tty,rxclassMinConceptItem.classId,rxclassMinConceptItem.className,rxclassMinConceptItem.classType,rxclassMinConceptItem.classUrl
0,isa_structure,SNOMEDCT,1865953,crisaborole,IN,1149498006,Ether structure-containing product,STRUCT,http://snomed.info/id/1149498006
1,isa_disposition,SNOMEDCT,1865953,crisaborole,IN,407314008,Enzyme inhibitor-containing product,DISPOS,http://snomed.info/id/407314008
2,isa_disposition,SNOMEDCT,1865953,crisaborole,IN,714682003,Phosphodiesterase 4 inhibitor-containing product,DISPOS,http://snomed.info/id/714682003
3,isa_structure,SNOMEDCT,1865953,crisaborole,IN,768700009,Nitrogen and/or nitrogen compound-containing p...,STRUCT,http://snomed.info/id/768700009
4,has_ingredient,MEDRT,1865953,crisaborole,IN,D001896,Boron Compounds,CHEM,
5,may_treat,MEDRT,1865953,crisaborole,IN,D003876,"Dermatitis, Atopic",DISEASE,
6,ci_with,MEDRT,1865953,crisaborole,IN,D004342,Drug Hypersensitivity,DISEASE,
7,has_ingredient,MEDRT,1865953,crisaborole,IN,D019086,"Bridged Bicyclo Compounds, Heterocyclic",CHEM,
8,,ATC,1865953,crisaborole,IN,D11AH,"Agents for dermatitis, excluding corticosteroids",ATC1-4,
9,has_pe,MEDRT,1865953,crisaborole,IN,N0000009925,Unknown Physiological Effect,PE,


# Data Overview

Groupping by disease and drug class

In [50]:
len(df_NIH)

17

In [None]:
df_NIH_gro_class = pd.DataFrame(df_NIH.groupby(['rxclassMinConceptItem.classType', 'rxclassMinConceptItem.className'])['minConcept.name'].count())
df_NIH_gro_class = df_NIH_gro_class.sort_values(by = ['minConcept.name'], ascending=False)
df_NIH_gro_class.head(20)

# Selecting a specific class (NOT IN USE)

In [None]:
count = 0

for index, row in df_openFDA_NDC.iterrows(): # iterating through drugs
    
    try:
        count = count + 1
        
        drug_name = row[0]

        start_url = 'https://rxnav.nlm.nih.gov/REST/rxclass/class/byDrugName.json?drugName='
        middle_url = str(drug_name)

        r = requests.get(start_url + middle_url, timeout = 20) # requesting meta data
        data_raw = r.json()
        
        data = data_raw['rxclassDrugInfoList']

        df_temp = pd.json_normalize(data['rxclassDrugInfo'])

        df_temp_epc = df_temp.loc[(df_temp['rela'] == 'has_epc') & (df_temp['relaSource'] == 'DAILYMED')]
        df_temp_dis = df_temp.loc[(df_temp['rela'] == 'may_treat') & (df_temp['relaSource'] == 'MEDRT')]
        
        df_temp = pd.concat([df_temp_epc, df_temp_dis], ignore_index=True)
        
        if count == 1:
            df_NIH = df_temp
        else:
            df_NIH = pd.concat([df_NIH, df_temp])
        
    except:
        continue