In [10]:
# Necessary libs
import requests # download data files from data.world
import os # delete downloaded files after parsing/loading
import feather # import data from file
import pandas as pd, numpy as np # data structures and analysis

import matplotlib.pyplot as plt # graphical plotting
plt.style.use('ggplot') # Pretty style

In [11]:
# Load up drug names data
def download_drugnames():
    filename = 'drugnames.feather'
    resp = requests.get('https://query.data.world/s/a0rj42sc17vhn7cyejxtyy8kv')
    resp.raise_for_status()

    with open(filename, 'wb') as handle:
        for block in resp.iter_content(1024):
            handle.write(block)

    out = feather.read_dataframe(filename)
    os.remove(filename)
    return out
    
df_drugnames = download_drugnames()
df_drugnames

Unnamed: 0,drugname_brand,drugname_generic
0,10 WASH,SULFACETAMIDE SODIUM
1,1ST TIER UNIFINE PENTIPS,"PEN NEEDLE, DIABETIC"
2,1ST TIER UNIFINE PENTIPS PLUS,"PEN NEEDLE, DIABETIC"
3,60PSE-400GFN-20DM,GUAIFENESIN/DM/PSEUDOEPHEDRINE
4,8-MOP,METHOXSALEN
5,A-B OTIC,ANTIPYRINE/BENZOCAINE
6,ABACAVIR,ABACAVIR SULFATE
7,ABACAVIR-LAMIVUDINE-ZIDOVUDINE,ABACAVIR/LAMIVUDINE/ZIDOVUDINE
8,ABELCET,AMPHOTERICIN B LIPID COMPLEX
9,ABILIFY,ARIPIPRAZOLE


In [12]:
# Load up spending data for each available year
def download_year_data():
    year_url_map = {
        2011: 'https://query.data.world/s/1mjkt1yjuug5hj4edxhgwhnom',
        2012: 'https://query.data.world/s/ee3n1cpxfupn33ozq515iifij',
        2013: 'https://query.data.world/s/1igu5omsev4g97xiowwpj3igr',
        2014: 'https://query.data.world/s/86zkfbit8wd3argjlnmh63daw',
        2015: 'https://query.data.world/s/2aohoqfccunny99afpktzudla'
    }
    
    year_data = {}

    for year in year_url_map:
        resp = requests.get(year_url_map[year])
        resp.raise_for_status()
        
        filename = 'spending_{0}.feather'.format(year)
        with open(filename, 'wb') as handle:
            for block in resp.iter_content(1024):
                handle.write(block)
                
        year_data[year] = feather.read_dataframe(filename)
        os.remove(filename)
        
    return year_data

df_years = download_year_data()
df_years

{2011:                        drugname_brand                 drugname_generic  \
 0                            10 WASH             SULFACETAMIDE SODIUM    
 1           1ST TIER UNIFINE PENTIPS             PEN NEEDLE, DIABETIC    
 2                  60PSE-400GFN-20DM   GUAIFENESIN/DM/PSEUDOEPHEDRINE    
 3                              8-MOP                      METHOXSALEN    
 4                           A-B OTIC            ANTIPYRINE/BENZOCAINE    
 5                            ABELCET     AMPHOTERICIN B LIPID COMPLEX    
 6                            ABILIFY                     ARIPIPRAZOLE    
 7                   ABILIFY DISCMELT                     ARIPIPRAZOLE    
 8                           ABRAXANE         PACLITAXEL PROTEIN-BOUND    
 9                            ABSTRAL                 FENTANYL CITRATE    
 10                            ACANYA   CLINDAMYCIN PHOS/BENZOYL PEROX    
 11                          ACARBOSE                         ACARBOSE    
 12                

In [13]:
# Set the index of each DataFrame to be drug names, then remove those columns
for year in df_years:
    index_values = df_years[year].iloc[:, :2].values.T
    df_years[year].index = pd.MultiIndex.from_arrays(index_values)
    df_years[year] = df_years[year].iloc[:, 2:]

In [14]:
# Create a Panel from all the individual year data
drug_spending = pd.Panel(df_years)
drug_spending

<class 'pandas.core.panel.Panel'>
Dimensions: 5 (items) x 4498 (major_axis) x 10 (minor_axis)
Items axis: 2011 to 2015
Major_axis axis: (10 WASH , SULFACETAMIDE SODIUM ) to (ZYVOX , LINEZOLID )
Minor_axis axis: claim_count to out_of_pocket_avg_lowincome

In [15]:
# Top 10 drugs by total cumulative dollars spent, 2011-2015, descending (in billions of dollars)
(drug_spending.loc[:, :, 'total_spending'].sum().dropna().sort_values(ascending=False) / 1000000000) \
    .head(10) \
    .map(lambda x: '$ ' + ('%.1fB' % x).rjust(5, ' '))

LANTUS/LANTUS SOLOSTAR   INSULIN GLARGINE,HUM.REC.ANLOG     $ 14.2B
NEXIUM                   ESOMEPRAZOLE MAGNESIUM             $ 11.3B
CRESTOR                  ROSUVASTATIN CALCIUM               $ 10.8B
ADVAIR DISKUS            FLUTICASONE/SALMETEROL             $ 10.4B
ABILIFY                  ARIPIPRAZOLE                       $  9.4B
SPIRIVA                  TIOTROPIUM BROMIDE                 $  9.2B
HARVONI                  LEDIPASVIR/SOFOSBUVIR              $  7.7B
JANUVIA                  SITAGLIPTIN PHOSPHATE              $  7.3B
REVLIMID                 LENALIDOMIDE                       $  6.9B
NAMENDA                  MEMANTINE HCL                      $  6.0B
dtype: object

In [16]:
# Top 10 drugs by total cumulative claims, 2011-2015, descending (in millions of claims)
(drug_spending.loc[:, :, 'claim_count'].sum().dropna().sort_values(ascending=False) / 1000000) \
    .head(10) \
    .map(lambda x: '%.1fM' % x)

SIMVASTATIN                 SIMVASTATIN                   181.3M
LISINOPRIL                  LISINOPRIL                    181.1M
LEVOTHYROXINE SODIUM        LEVOTHYROXINE SODIUM          169.9M
AMLODIPINE BESYLATE         AMLODIPINE BESYLATE           167.6M
HYDROCODONE-ACETAMINOPHEN   HYDROCODONE/ACETAMINOPHEN     162.0M
OMEPRAZOLE                  OMEPRAZOLE                    155.9M
FUROSEMIDE                  FUROSEMIDE                    129.2M
ATORVASTATIN CALCIUM        ATORVASTATIN CALCIUM          114.8M
METFORMIN HCL               METFORMIN HCL                 109.0M
METOPROLOL TARTRATE         METOPROLOL TARTRATE           102.1M
dtype: object

In [17]:
# Find claim counts and total spending in 2011 for all forms of omeprazole (Nexium)
all_omeprazoles = ['OMEPRAZOLE' in generic for (brand, generic) in drug_spending[2011].index.values]
drug_spending[2011][all_omeprazoles][['claim_count', 'total_spending']].dropna().sort_values(by='claim_count', ascending=False)

Unnamed: 0,Unnamed: 1,claim_count,total_spending
OMEPRAZOLE,OMEPRAZOLE,27360685.0,676847500.0
NEXIUM,ESOMEPRAZOLE MAGNESIUM,8224024.0,1970138000.0
OMEPRAZOLE-SODIUM BICARBONATE,OMEPRAZOLE/SODIUM BICARBONATE,71956.0,15475950.0
VIMOVO,NAPROXEN/ESOMEPRAZOLE MAG,34216.0,3201527.0
ZEGERID,OMEPRAZOLE/SODIUM BICARBONATE,25275.0,7420869.0
PRILOSEC,OMEPRAZOLE,10950.0,4080108.0
PRILOSEC,OMEPRAZOLE MAGNESIUM,2056.0,626024.7
NEXIUM I.V.,ESOMEPRAZOLE SODIUM,704.0,216451.5
