In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing as pp
from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
!jupyter --version

jupyter core     : 4.7.1
jupyter-notebook : 6.4.3
qtconsole        : 5.1.0
ipython          : 7.26.0
ipykernel        : 6.2.0
jupyter client   : 6.1.12
jupyter lab      : not installed
nbconvert        : 6.1.0
ipywidgets       : 7.6.3
nbformat         : 5.1.3
traitlets        : 5.0.5


In [3]:
import pkg_resources
import sys
print("Pandas: " + pkg_resources.get_distribution("pandas").version)
print("NumPy: " + pkg_resources.get_distribution("numpy").version)
print("Scikit-Learn: " + pkg_resources.get_distribution("scikit-learn").version)
print("MLXtend: " + pkg_resources.get_distribution("mlxtend").version)
print("Python: " + sys.version)

Pandas: 1.3.2
NumPy: 1.20.3
Scikit-Learn: 0.24.2
MLXtend: 0.19.0
Python: 3.8.11 (default, Aug  6 2021, 09:57:55) [MSC v.1916 64 bit (AMD64)]


In [4]:
data = pd.read_csv('medical_market_basket.csv')

In [5]:
data.head()

Unnamed: 0,Presc01,Presc02,Presc03,Presc04,Presc05,Presc06,Presc07,Presc08,Presc09,Presc10,Presc11,Presc12,Presc13,Presc14,Presc15,Presc16,Presc17,Presc18,Presc19,Presc20
0,,,,,,,,,,,,,,,,,,,,
1,amlodipine,albuterol aerosol,allopurinol,pantoprazole,lorazepam,omeprazole,mometasone,fluconozole,gabapentin,pravastatin,cialis,losartan,metoprolol succinate XL,sulfamethoxazole,abilify,spironolactone,albuterol HFA,levofloxacin,promethazine,glipizide
2,,,,,,,,,,,,,,,,,,,,
3,citalopram,benicar,amphetamine salt combo xr,,,,,,,,,,,,,,,,,
4,,,,,,,,,,,,,,,,,,,,


In [6]:
data = data[data.isna().sum(axis=1) < 19].reset_index(drop=True)
data

Unnamed: 0,Presc01,Presc02,Presc03,Presc04,Presc05,Presc06,Presc07,Presc08,Presc09,Presc10,Presc11,Presc12,Presc13,Presc14,Presc15,Presc16,Presc17,Presc18,Presc19,Presc20
0,amlodipine,albuterol aerosol,allopurinol,pantoprazole,lorazepam,omeprazole,mometasone,fluconozole,gabapentin,pravastatin,cialis,losartan,metoprolol succinate XL,sulfamethoxazole,abilify,spironolactone,albuterol HFA,levofloxacin,promethazine,glipizide
1,citalopram,benicar,amphetamine salt combo xr,,,,,,,,,,,,,,,,,
2,paroxetine,allopurinol,,,,,,,,,,,,,,,,,,
3,abilify,atorvastatin,folic acid,naproxen,losartan,,,,,,,,,,,,,,,
4,hydrochlorothiazide,glyburide,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5742,doxycycline hyclate,clotrimazole,,,,,,,,,,,,,,,,,,
5743,amphetamine,clotrimazole,lantus,,,,,,,,,,,,,,,,,
5744,citalopram,metoprolol,amphetamine salt combo xr,glyburide,celebrex,losartan,,,,,,,,,,,,,,
5745,alprazolam,losartan,,,,,,,,,,,,,,,,,,


In [8]:
prescriptions = []

for i, col in enumerate(data.columns):
    for j in range(len(data[col])):
        val = str(data.iloc[j][i])
        if val != 'nan' and val not in prescriptions:
            prescriptions.append(val)

print(prescriptions)

['amlodipine', 'citalopram', 'paroxetine', 'abilify', 'hydrochlorothiazide', 'metformin', 'metoprolol', 'amphetamine salt combo xr', 'carvedilol', 'benicar', 'tamsulosin', 'trimethoprim DS', 'trazodone HCI', 'lisinopril', 'methylprednisone', 'topiramate', 'folic acid', 'isosorbide mononitrate', 'allopurinol', 'glyburide', 'potassium Chloride', 'diazepam', 'furosemide', 'alendronate', 'Premarin', 'meloxicam', 'venlafaxine XR', 'ibuprophen', 'clopidogrel', 'amphetamine salt combo', 'codeine', 'mometasone', 'amphetamine', 'triamcinolone Ace topical', 'doxycycline hyclate', 'fenofibrate', 'atorvastatin', 'cephalexin', 'glipizide', 'dextroamphetamine XR', 'alprazolam', 'simvastatin', 'prednisone', 'losartan', 'naproxen', 'albuterol HFA', 'metoprolol succinate XL', 'levofloxacin', 'spironolactone', 'celebrex', 'escitalopram', 'ciprofloxacin', 'ezetimibe', 'lorazepam', 'pantoprazole', 'cefdinir', 'albuterol aerosol', 'valsartan', 'diclofenac sodium', 'viagra', 'quetiapine', 'clonidine HCI', '

In [9]:
data_counts = pd.DataFrame(index=range(data.shape[0]), columns=prescriptions)

In [10]:
rows, cols = data.shape
rows = range(rows)
cols = range(cols)

for row in rows:
    for col in cols:
        val = data.iloc[row, col]
        for cnt_col in data_counts.columns:
            if cnt_col == val:
                data_counts[cnt_col][row] = 1
                
data_counts.fillna(0, inplace=True)
data_counts

Unnamed: 0,amlodipine,citalopram,paroxetine,abilify,hydrochlorothiazide,metformin,metoprolol,amphetamine salt combo xr,carvedilol,benicar,...,fluoxetine HCI,hydrocortisone 2.5% cream,boniva,bupropion sr,flovent hfa 110mcg inhaler,levothyroxine sodium,pioglitazone,crestor,clonazepam,finasteride
0,1,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,1,0,0,0,0,0,1,0,1,...,0,0,0,0,0,0,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5742,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5743,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5744,0,1,0,0,0,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
5745,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [11]:
freq_items = apriori(data_counts, min_support=0.03, use_colnames=True).sort_values('support', ascending=False).reset_index(drop=True)
freq_items['length'] = freq_items['itemsets'].apply(lambda x: len(x))
freq_items

Unnamed: 0,support,itemsets,length
0,0.294936,(abilify),1
1,0.218897,(amphetamine salt combo xr),1
2,0.218201,(carvedilol),1
3,0.201148,(diazepam),1
4,0.200104,(glyburide),1
...,...,...,...
77,0.030799,"(atorvastatin, metoprolol)",2
78,0.030799,"(abilify, amlodipine)",2
79,0.030625,"(losartan, diazepam)",2
80,0.030103,"(abilify, metformin)",2


In [12]:
ass_rule = association_rules(freq_items, metric='lift', min_threshold=1).sort_values('lift', ascending=False).reset_index(drop=True)
ass_rule

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(carvedilol),(lisinopril),0.218201,0.124587,0.051157,0.23445,1.88182,0.023972,1.143509
1,(lisinopril),(carvedilol),0.124587,0.218201,0.051157,0.410615,1.88182,0.023972,1.326465
2,(metformin),(abilify),0.064555,0.294936,0.030103,0.466307,1.581043,0.011063,1.321104
3,(abilify),(metformin),0.294936,0.064555,0.030103,0.102065,1.581043,0.011063,1.041773
4,(metoprolol),(atorvastatin),0.121107,0.16339,0.030799,0.25431,1.556466,0.011011,1.121928
5,(atorvastatin),(metoprolol),0.16339,0.121107,0.030799,0.188498,1.556466,0.011011,1.083046
6,(amphetamine salt combo xr),(citalopram),0.218897,0.110318,0.037585,0.171701,1.556414,0.013436,1.074107
7,(citalopram),(amphetamine salt combo xr),0.110318,0.218897,0.037585,0.340694,1.556414,0.013436,1.184735
8,(abilify),(glipizide),0.294936,0.081956,0.036019,0.122124,1.490119,0.011847,1.045756
9,(glipizide),(abilify),0.081956,0.294936,0.036019,0.43949,1.490119,0.011847,1.257897


In [13]:
ass_rule_complete = association_rules(freq_items, metric='lift').sort_values('lift', ascending=False).reset_index(drop=True)
ass_rule_complete.iloc[52:70]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
52,(amphetamine salt combo xr),(carvedilol),0.218897,0.218201,0.047677,0.217806,0.998191,-8.6e-05,0.999495
53,(carvedilol),(amphetamine salt combo xr),0.218201,0.218897,0.047677,0.218501,0.998191,-8.6e-05,0.999493
54,(carvedilol),(losartan),0.218201,0.15991,0.034627,0.158692,0.992387,-0.000266,0.998553
55,(losartan),(carvedilol),0.15991,0.218201,0.034627,0.21654,0.992387,-0.000266,0.99788
56,(amphetamine salt combo xr),(diazepam),0.218897,0.201148,0.043327,0.197933,0.984016,-0.000704,0.995991
57,(diazepam),(amphetamine salt combo xr),0.201148,0.218897,0.043327,0.215398,0.984016,-0.000704,0.995541
58,(citalopram),(abilify),0.110318,0.294936,0.031843,0.288644,0.978663,-0.000694,0.991154
59,(abilify),(citalopram),0.294936,0.110318,0.031843,0.107965,0.978663,-0.000694,0.997361
60,(losartan),(diazepam),0.15991,0.201148,0.030625,0.191513,0.952096,-0.001541,0.988082
61,(diazepam),(losartan),0.201148,0.15991,0.030625,0.152249,0.952096,-0.001541,0.990964


In [14]:
data_counts.to_csv('OFM3_basket_clean.csv')