In [13]:
# Necessary libs
import feather # import data from file
import pandas as pd, numpy as np # data structures and analysis

# Graphical plotting
import matplotlib
matplotlib.style.use('ggplot') # Pretty style
import matplotlib.pyplot as plt # actual plotting module

In [2]:
# Load up drug names data
df_drugnames = feather.read_dataframe('drugnames.feather')

In [5]:
# Load up spending data for each available year
year_file_map = { year: 'spending-' + str(year) + '.feather' for year in range(2011, 2016) }
df_years = { year: feather.read_dataframe(year_file_map[year]) for year in year_file_map }

In [9]:
# Add drug names as a multi-index
idx = pd.MultiIndex.from_arrays(df_drugnames.iloc[:, [0,1]].values.T, names=df_drugnames.columns.values)
for year in df_years:
    df_years[year].index = idx

In [10]:
# Create a Panel from all the individual year data
drug_spending = pd.Panel(df_years)
drug_spending

<class 'pandas.core.panel.Panel'>
Dimensions: 5 (items) x 4498 (major_axis) x 10 (minor_axis)
Items axis: 2011 to 2015
Major_axis axis: (10 WASH , SULFACETAMIDE SODIUM ) to (ZYVOX , LINEZOLID )
Minor_axis axis: claim_count to out_of_pocket_avg_lowincome

In [72]:
# Top 10 drugs by total cumulative dollars spent, 2011-2015, descending (in billions of dollars)
(drug_spending.loc[:, :, 'total_spending'].sum().dropna().sort_values(ascending=False) / 1000000000) \
    .head(10) \
    .map(lambda x: '$ ' + ('%.1fB' % x).rjust(5, ' '))

drugname_brand           drugname_generic               
LANTUS/LANTUS SOLOSTAR   INSULIN GLARGINE,HUM.REC.ANLOG     $ 14.2B
NEXIUM                   ESOMEPRAZOLE MAGNESIUM             $ 11.3B
CRESTOR                  ROSUVASTATIN CALCIUM               $ 10.8B
ADVAIR DISKUS            FLUTICASONE/SALMETEROL             $ 10.4B
ABILIFY                  ARIPIPRAZOLE                       $  9.4B
SPIRIVA                  TIOTROPIUM BROMIDE                 $  9.2B
HARVONI                  LEDIPASVIR/SOFOSBUVIR              $  7.7B
JANUVIA                  SITAGLIPTIN PHOSPHATE              $  7.3B
REVLIMID                 LENALIDOMIDE                       $  6.9B
NAMENDA                  MEMANTINE HCL                      $  6.0B
dtype: object

In [73]:
# Top 10 drugs by total cumulative claims, 2011-2015, descending (in millions of claims)
(drug_spending.loc[:, :, 'claim_count'].sum().dropna().sort_values(ascending=False) / 1000000) \
    .head(10) \
    .map(lambda x: '%.1fM' % x)

drugname_brand              drugname_generic          
SIMVASTATIN                 SIMVASTATIN                   181.3M
LISINOPRIL                  LISINOPRIL                    181.1M
LEVOTHYROXINE SODIUM        LEVOTHYROXINE SODIUM          169.9M
AMLODIPINE BESYLATE         AMLODIPINE BESYLATE           167.6M
HYDROCODONE-ACETAMINOPHEN   HYDROCODONE/ACETAMINOPHEN     162.0M
OMEPRAZOLE                  OMEPRAZOLE                    155.9M
FUROSEMIDE                  FUROSEMIDE                    129.2M
ATORVASTATIN CALCIUM        ATORVASTATIN CALCIUM          114.8M
METFORMIN HCL               METFORMIN HCL                 109.0M
METOPROLOL TARTRATE         METOPROLOL TARTRATE           102.1M
dtype: object

In [112]:
all_omeprazoles = ['OMEPRAZOLE' in generic for (brand, generic) in drug_spending[2011].index.values]
drug_spending[2011][all_omeprazoles][['claim_count', 'total_spending']].dropna().sort_values(by='claim_count', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,claim_count,total_spending
drugname_brand,drugname_generic,Unnamed: 2_level_1,Unnamed: 3_level_1
OMEPRAZOLE,OMEPRAZOLE,27360685.0,676847500.0
NEXIUM,ESOMEPRAZOLE MAGNESIUM,8224024.0,1970138000.0
OMEPRAZOLE-SODIUM BICARBONATE,OMEPRAZOLE/SODIUM BICARBONATE,71956.0,15475950.0
VIMOVO,NAPROXEN/ESOMEPRAZOLE MAG,34216.0,3201527.0
ZEGERID,OMEPRAZOLE/SODIUM BICARBONATE,25275.0,7420869.0
PRILOSEC,OMEPRAZOLE,10950.0,4080108.0
PRILOSEC,OMEPRAZOLE MAGNESIUM,2056.0,626024.7
NEXIUM I.V.,ESOMEPRAZOLE SODIUM,704.0,216451.5
