In [1]:
# Import dependencies

import requests
import json
import pandas as pd
from pprint import pprint

In [2]:
# Open MedicationRequest.keep.code-class text file

file = open("MedicationRequest.keep.code-class", "r")

In [3]:
rxcodes = []

In [4]:
# Read the file

lines = file.readlines()

In [6]:
# Extract drug rxcui code from each row

for line in lines:
    rxcodes.append(line.split('\t'))

In [7]:
# Remove the first 'not found' code

rxcodes.pop(0)

['100272', 'not-found\n']

In [8]:
# Remove drug class from the list
# rxcodes has only drug codes

for r in rxcodes:
    r.pop(1)

In [10]:
# Convert rxcodes list to dataframe 

df = pd.DataFrame(rxcodes)

In [12]:
# Retain unique rxcui codes

query_code = df[0].unique()

In [13]:
query_code.size

712

In [14]:
# Base query URL

url_rxclass = "https://rxnav.nlm.nih.gov/REST/rxclass/class/byRxcui.json?rxcui="

In [27]:
i=0
d = {}
drug_class = []
med_parent = {}
for code in query_code:
    # Complete query URL
    query_url = url_rxclass+code
    response = requests.get(query_url)
    data = response.json()
    try:
        drug_info = data['rxclassDrugInfoList']['rxclassDrugInfo']
    
        for i in range(len(drug_info)):
            d.update({drug_info[i]['minConcept']['name']:drug_info[i]['minConcept']['rxcui']})  
            parent_drug = drug_info[0]['minConcept']['name'].split()
            drug_class.append(parent_drug[0])
            med_parent.update({code:parent_drug[0]}) # med_parent is a dict with format {'drug rxcui code':'drug'}
    except:
        pass

In [16]:
# Dictionary containing drug name and rxcui code

d

{'Calcium Chloride': '1901',
 'Sodium Acetate': '56443',
 'sodium citrate': '56466',
 'Magnesium Chloride': '6579',
 'Potassium Chloride': '8591',
 'Sodium Chloride': '9863',
 'Epoprostenol 1.5 MG Injection [Veletri]': '1009216',
 'Epoprostenol 1.5 MG Injection': '562502',
 'Epoprostenol': '8814',
 'Lidocaine Hydrochloride 10 MG/ML Injectable Solution': '1010033',
 'Lidocaine Hydrochloride 10 MG/ML Injectable Solution [Xylocaine]': '1010035',
 'Lidocaine Hydrochloride': '142440',
 'Lidocaine': '6387',
 'Lidocaine Hydrochloride 20 MG/ML Injectable Solution': '1010671',
 'Lidocaine Hydrochloride 20 MG/ML Injectable Solution [Xylocaine]': '1010673',
 'Lidocaine Hydrochloride 20 MG/ML Mucous Membrane Topical Solution': '1010739',
 'Epinephrine 0.01 MG/ML / Lidocaine Hydrochloride 10 MG/ML Injectable Solution': '1010751',
 'Epinephrine 0.01 MG/ML / Lidocaine Hydrochloride 10 MG/ML Injectable Solution [Xylocaine with Epinephrine]': '1010755',
 'Epinephrine': '3992',
 'Lidocaine Hydrochloride

In [17]:
# Convert 'd' to dataframe

drug_class_code = pd.Series(d).to_frame('rxcui')

drug_class_code = drug_class_code.reset_index()

drug_class_code.head()

Unnamed: 0,index,rxcui
0,Calcium Chloride,1901
1,Sodium Acetate,56443
2,sodium citrate,56466
3,Magnesium Chloride,6579
4,Potassium Chloride,8591


In [18]:
# Convert 'drug_class' variable containing parent drug class to dataframe

drug_class_list = pd.DataFrame(drug_class)

drug_class_unique = pd.DataFrame(drug_class_list[0].unique())

drug_class_unique = drug_class_unique.rename(columns={0:'class'})

drug_class_unique.head()

Unnamed: 0,class
0,Calcium
1,Epoprostenol
2,Lidocaine
3,Epinephrine
4,Triamcinolone


In [19]:
# Map parent drug class to rxcui code

for i,j in drug_class_code.iterrows():
    for k in drug_class_unique.iterrows():
        if j['index']==(k[1]['class']):
            print(f"{k[1]['class']} {j['rxcui']}")
    

Epoprostenol 8814
Lidocaine 6387
Epinephrine 3992
Triamcinolone 10759
Diphenhydramine 3498
Famotidine 4278
Acetaminophen 161
Doxazosin 49276
Lisinopril 29046
Simvastatin 36567
Amiodarone 703
Ondansetron 26225
Oxycodone 7804
gabapentin 25480
Oxymetazoline 7812
Mupirocin 42372
Bupivacaine 1815
Glucose 4850
Hydrocodone 5489
valacyclovir 73645
Roflumilast 1091836
Phenazopyridine 8120
Linagliptin 1100699
rivaroxaban 1114195
Docusate 82003
levothyroxine 10582
Ticagrelor 1116632
Cefuroxime 2194
Bacitracin 1291
Levetiracetam 114477
COLLAGENASE 58939
Povidone-Iodine 8611
Albuterol 435
Ipratropium 7213
Atropine 1223
Naloxone 7242
Labetalol 6185
Phenylephrine 8163
Meperidine 6754
formoterol 25255
Lactulose 6218
Hydrocortisone 5492
Cyclopentolate 3001
mirabegron 1300786
Thiamine 10454
Cholecalciferol 2418
heparin 5224
apixaban 1364430
Alprazolam 596
Promethazine 8745
vilanterol 1424884
dorzolamide 60207
Carboplatin 40048
meloxicam 41493
ropinirole 72302
remifentanil 73032
glimepiride 25789
quetiap

In [24]:
med_class = []
superclass_code = []
for i,j in drug_class_code.iterrows():
    for k in drug_class_unique.iterrows():
        if j['index']==(k[1]['class']):
            med_class.append(k[1]['class']) # med_class has drug super class
            superclass_code.append(j['rxcui']) # superclass_code has drug super class rxcui code

In [29]:
# Convert 'med_parent' to dataframe

med_parent_df = pd.Series(med_parent).to_frame('class')

med_parent_df = med_parent_df.reset_index()

med_parent_df.head()

Unnamed: 0,index,class
0,1007530,Calcium
1,1009216,Epoprostenol
2,1010035,Lidocaine
3,1010673,Lidocaine
4,1010739,Lidocaine


In [36]:
i = 0
medcode_complete = {}
for m in med_class:
    medcode_complete.update({m:superclass_code[i]})
    i = i+1

In [37]:
medcode_complete

{'Epoprostenol': '8814',
 'Lidocaine': '6387',
 'Epinephrine': '3992',
 'Triamcinolone': '10759',
 'Diphenhydramine': '3498',
 'Famotidine': '4278',
 'Acetaminophen': '161',
 'Doxazosin': '49276',
 'Lisinopril': '29046',
 'Simvastatin': '36567',
 'Amiodarone': '703',
 'Ondansetron': '26225',
 'Oxycodone': '7804',
 'gabapentin': '25480',
 'Oxymetazoline': '7812',
 'Mupirocin': '42372',
 'Bupivacaine': '1815',
 'Glucose': '4850',
 'Hydrocodone': '5489',
 'valacyclovir': '73645',
 'Roflumilast': '1091836',
 'Phenazopyridine': '8120',
 'Linagliptin': '1100699',
 'rivaroxaban': '1114195',
 'Docusate': '82003',
 'levothyroxine': '10582',
 'Ticagrelor': '1116632',
 'Cefuroxime': '2194',
 'Bacitracin': '1291',
 'Levetiracetam': '114477',
 'COLLAGENASE': '58939',
 'Povidone-Iodine': '8611',
 'Albuterol': '435',
 'Ipratropium': '7213',
 'Atropine': '1223',
 'Naloxone': '7242',
 'Labetalol': '6185',
 'Phenylephrine': '8163',
 'Meperidine': '6754',
 'formoterol': '25255',
 'Lactulose': '6218',
 'H

In [69]:
# Convert 'medcode_complete' to dataframe

medcode_complete_df = pd.Series(medcode_complete).to_frame('classcode')

medcode_complete_df = medcode_complete_df.reset_index()

medcode_complete_df.head()

Unnamed: 0,index,classcode
0,Epoprostenol,8814
1,Lidocaine,6387
2,Epinephrine,3992
3,Triamcinolone,10759
4,Diphenhydramine,3498


In [80]:
for index, row in med_parent_df.iterrows():
    for i, r in medcode_complete_df.iterrows():
        if row['class']==r['index']:
            print(f"{row['index']} {row['class']} {r['classcode']}")

1009216 Epoprostenol 8814
1010035 Lidocaine 6387
1010673 Lidocaine 6387
1010739 Lidocaine 6387
1010755 Epinephrine 3992
1011703 Lidocaine 6387
1011705 Lidocaine 6387
1011852 Lidocaine 6387
1014314 Triamcinolone 10759
1020477 Diphenhydramine 3498
104095 Famotidine 4278
104369 Doxazosin 49276
104378 Lisinopril 29046
104490 Simvastatin 36567
104491 Simvastatin 36567
104895 Ondansetron 26225
104896 Ondansetron 26225
104897 Ondansetron 26225
1049214 Acetaminophen 161
1049221 Acetaminophen 161
1049225 Acetaminophen 161
1049613 Oxycodone 7804
1049621 Oxycodone 7804
1049909 Diphenhydramine 3498
105028 gabapentin 25480
105029 gabapentin 25480
105030 gabapentin 25480
106101 Oxymetazoline 7812
106346 Mupirocin 42372
107606 Bupivacaine 1815
10761 Triamcinolone 10759
1085636 Triamcinolone 10759
108780 valacyclovir 73645
1091843 Roflumilast 1091836
1094104 Phenazopyridine 8120
1094107 Phenazopyridine 8120
1100706 Linagliptin 1100699
1114202 rivaroxaban 1114195
1115005 Docusate 82003
1115267 levothyr

801731 alvimopan 480639
808921 Fosfomycin 4550
8164 Dextromethorphan 3289
82063 Acetaminophen 161
824194 Amoxicillin 723
824586 Psyllium 8928
82808 cefepime 20481
828322 cyclobenzaprine 21949
828350 cyclobenzaprine 21949
831103 Diltiazem 3443
833219 Diltiazem 3443
833528 Amiodarone 703
834023 Methylprednisolone 6902
834239 febuxostat 73689
834350 Amiodarone 703
835603 Tramadol 10689
836284 Menthol 6750
836368 Ipratropium 7213
8410 alteplase 8410
847627 Lactate 114202
847628 Lactate 114202
847630 Lactate 114202
84990 gadobutrol 84990
854232 Enoxaparin 67108
854236 Enoxaparin 67108
854239 Enoxaparin 67108
854242 Enoxaparin 67108
854247 Enoxaparin 67108
854249 Enoxaparin 67108
854253 Enoxaparin 67108
854859 dronedarone 233698
855288 Warfarin 11289
855296 Warfarin 11289
855302 Warfarin 11289
855312 Warfarin 11289
855318 Warfarin 11289
855324 Warfarin 11289
855332 Warfarin 11289
855344 Warfarin 11289
855635 Diclofenac 3355
856666 rifaximin 35619
856834 Amitriptyline 704
856845 Amitriptyline

In [103]:
n_list = []

for index, row in med_parent_df.iterrows():
    for i, r in medcode_complete_df.iterrows():
        if row['class']==r['index']:
            n=[row['index'],row['class'],r['classcode']]
            n_list.append(n)

In [104]:
n_list

[['1009216', 'Epoprostenol', '8814'],
 ['1010035', 'Lidocaine', '6387'],
 ['1010673', 'Lidocaine', '6387'],
 ['1010739', 'Lidocaine', '6387'],
 ['1010755', 'Epinephrine', '3992'],
 ['1011703', 'Lidocaine', '6387'],
 ['1011705', 'Lidocaine', '6387'],
 ['1011852', 'Lidocaine', '6387'],
 ['1014314', 'Triamcinolone', '10759'],
 ['1020477', 'Diphenhydramine', '3498'],
 ['104095', 'Famotidine', '4278'],
 ['104369', 'Doxazosin', '49276'],
 ['104378', 'Lisinopril', '29046'],
 ['104490', 'Simvastatin', '36567'],
 ['104491', 'Simvastatin', '36567'],
 ['104895', 'Ondansetron', '26225'],
 ['104896', 'Ondansetron', '26225'],
 ['104897', 'Ondansetron', '26225'],
 ['1049214', 'Acetaminophen', '161'],
 ['1049221', 'Acetaminophen', '161'],
 ['1049225', 'Acetaminophen', '161'],
 ['1049613', 'Oxycodone', '7804'],
 ['1049621', 'Oxycodone', '7804'],
 ['1049909', 'Diphenhydramine', '3498'],
 ['105028', 'gabapentin', '25480'],
 ['105029', 'gabapentin', '25480'],
 ['105030', 'gabapentin', '25480'],
 ['106101'

In [89]:
main_file = []
for line in lines:
    line = line.strip('\n')
    main_file.append(line.split('\t'))

In [90]:
main_file

[['100272', 'not-found'],
 ['1007530', 'Calcium'],
 ['1007530', 'Electrolyte solutions'],
 ['1007530', 'Acidifiers'],
 ['1007530', 'Electrolyte solutions'],
 ['1007530', 'Anticoagulants'],
 ['1007530', 'Salt solutions'],
 ['1007530', 'Magnesium'],
 ['1007530', 'Electrolyte solutions'],
 ['1007530', 'Potassium'],
 ['1007530', 'Electrolyte solutions'],
 ['1007530', 'Sodium'],
 ['1007530', 'Salt solutions'],
 ['1007530', 'Electrolyte solutions'],
 ['1009216', 'Antihypertensive Agents'],
 ['1009216', 'Platelet Aggregation Inhibitors'],
 ['1009216', 'Platelet aggregation inhibitors excl. heparin'],
 ['1010035', 'Anesthetics, Local'],
 ['1010035', 'Anti-Arrhythmia Agents'],
 ['1010035', 'Voltage-Gated Sodium Channel Blockers'],
 ['1010035', 'Anesthetics, Local'],
 ['1010035', 'Anti-Arrhythmia Agents'],
 ['1010035', 'Voltage-Gated Sodium Channel Blockers'],
 ['1010035', 'Antiarrhythmics, class Ib'],
 ['1010035', 'Local anesthetics'],
 ['1010035', 'Anesthetics for topical use'],
 ['1010035', '

In [91]:
# Remove the first 'not found' code

main_file.pop(0)

['100272', 'not-found']

In [106]:
new_list = []
for m in main_file:
    for n in n_list:
        if m[0]==n[0]:
            nl = [m[0],n[2],n[1],m[1]]
            new_list.append(nl)

In [107]:
new_list

[['1009216', '8814', 'Epoprostenol', 'Antihypertensive Agents'],
 ['1009216', '8814', 'Epoprostenol', 'Platelet Aggregation Inhibitors'],
 ['1009216',
  '8814',
  'Epoprostenol',
  'Platelet aggregation inhibitors excl. heparin'],
 ['1010035', '6387', 'Lidocaine', 'Anesthetics, Local'],
 ['1010035', '6387', 'Lidocaine', 'Anti-Arrhythmia Agents'],
 ['1010035', '6387', 'Lidocaine', 'Voltage-Gated Sodium Channel Blockers'],
 ['1010035', '6387', 'Lidocaine', 'Anesthetics, Local'],
 ['1010035', '6387', 'Lidocaine', 'Anti-Arrhythmia Agents'],
 ['1010035', '6387', 'Lidocaine', 'Voltage-Gated Sodium Channel Blockers'],
 ['1010035', '6387', 'Lidocaine', 'Antiarrhythmics, class Ib'],
 ['1010035', '6387', 'Lidocaine', 'Local anesthetics'],
 ['1010035', '6387', 'Lidocaine', 'Anesthetics for topical use'],
 ['1010035', '6387', 'Lidocaine', 'Amides'],
 ['1010035', '6387', 'Lidocaine', 'Anesthetics, local'],
 ['1010035', '6387', 'Lidocaine', 'Local anesthetics'],
 ['1010035', '6387', 'Lidocaine', 'An

In [111]:
 medRequest_code = pd.DataFrame(new_list,columns=['drug rxcui code','class code','drug','class'])

In [112]:
medRequest_code.head()

Unnamed: 0,drug rxcui code,class code,drug,class
0,1009216,8814,Epoprostenol,Antihypertensive Agents
1,1009216,8814,Epoprostenol,Platelet Aggregation Inhibitors
2,1009216,8814,Epoprostenol,Platelet aggregation inhibitors excl. heparin
3,1010035,6387,Lidocaine,"Anesthetics, Local"
4,1010035,6387,Lidocaine,Anti-Arrhythmia Agents


In [113]:
medRequest_code.to_csv('medRequest.csv',index=False)

In [114]:
medRequest_code_noduplicates = medRequest_code.drop_duplicates()

In [115]:
medRequest_code_noduplicates.to_csv('medRequest-noduplicates.csv',index=False)