In [2]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go
import plotly.offline as py
import requests
import os

I want to look at how PHRMA spending leading up to the 2018 midterms is correlated with potential interests, including opioid overdose, marijuana use and regulations. We'll start by looking for a potential correlation with opioid death rates and absolute number of deaths.

Overdose rates and totals from the CDC data portal. Citation as follows:

Centers for Disease Control and Prevention, National Center for Health Statistics. Multiple Cause of Death 1999-2016 on CDC WONDER Online Database, released December, 2017. Data are from the Multiple Cause of Death Files, 1999-2016, as compiled from data provided by the 57 vital statistics jurisdictions through the Vital Statistics Cooperative Program. Accessed at http://wonder.cdc.gov/mcd-icd10.html on Jan 4, 2018 4:27:00 PM

In [3]:
#Loading in overdose rate, total overdose deaths
overdose_by_county = pd.read_csv('overdose_by_county.csv', delimiter='\t')[:1000]
overdose_by_state = pd.read_csv('overdose_by_state.csv', delimiter='\t')[:51]

In [4]:
#State-level overdose statistics
overdose_by_state.describe()

Unnamed: 0,State Code,Deaths,Population,Crude Rate
count,51.0,51.0,51.0,51.0
mean,28.960784,1318.921569,6335834.0,21.927451
std,15.832828,1336.585453,7243849.0,9.186361
min,1.0,75.0,585501.0,7.7
25%,16.5,335.5,1757121.0,14.95
50%,29.0,912.0,4436974.0,20.1
75%,41.5,1657.0,7109536.0,26.7
max,56.0,5094.0,39250020.0,49.8


In [5]:
#County-level overdose statistics
overdose_by_county.describe()

Unnamed: 0,State Code,County Code,Deaths,Population
count,1000.0,1000.0,1000.0,1000.0
mean,30.503,30590.808,61.55,279284.4
std,15.174263,15188.83238,101.227677,544665.9
min,1.0,1003.0,10.0,14640.0
25%,18.0,18062.5,15.0,65145.5
50%,34.0,34018.0,25.0,124685.5
75%,42.0,42061.5,60.0,271041.5
max,56.0,56037.0,1130.0,10137920.0


In [6]:
#Sort states by rate of overdose
overdose_by_state = overdose_by_state.sort_values('Crude Rate', axis=0, ascending=False)
overdose_by_state

Unnamed: 0,Notes,State,State Code,Deaths,Population,Crude Rate
48,,West Virginia,54.0,912.0,1831102.0,49.8
8,,District of Columbia,11.0,276.0,681170.0,40.5
35,,Ohio,39.0,4477.0,11614373.0,38.5
38,,Pennsylvania,42.0,4762.0,12784227.0,37.2
29,,New Hampshire,33.0,495.0,1334795.0,37.1
21,,Massachusetts,25.0,2379.0,6811779.0,34.9
20,,Maryland,24.0,2089.0,6016447.0,34.7
17,,Kentucky,21.0,1525.0,4436974.0,34.4
39,,Rhode Island,44.0,330.0,1056426.0,31.2
7,,Delaware,10.0,288.0,952065.0,30.3


In [7]:
overdose_by_county

Unnamed: 0,Notes,State,State Code,County,County Code,Deaths,Population,Crude Rate
0,,Alabama,1.0,"Baldwin County, AL",1003.0,34.0,208563.0,16.3
1,,Alabama,1.0,"Blount County, AL",1009.0,15.0,57704.0,Unreliable
2,,Alabama,1.0,"Calhoun County, AL",1015.0,16.0,114611.0,Unreliable
3,,Alabama,1.0,"Cullman County, AL",1043.0,20.0,82471.0,24.3
4,,Alabama,1.0,"DeKalb County, AL",1049.0,19.0,70900.0,Unreliable
5,,Alabama,1.0,"Escambia County, AL",1053.0,11.0,37728.0,Unreliable
6,,Alabama,1.0,"Etowah County, AL",1055.0,18.0,102564.0,Unreliable
7,,Alabama,1.0,"Jefferson County, AL",1073.0,256.0,659521.0,38.8
8,,Alabama,1.0,"Lauderdale County, AL",1077.0,13.0,92318.0,Unreliable
9,,Alabama,1.0,"Lee County, AL",1081.0,11.0,158991.0,Unreliable


OK, so let's first see if we can establish that big pharma is targeting Congressional races by the year. If we can do that, it may be possible to demonstrate that they're also shifting their money year after year depending on *where* the deaths are occurring.

In [21]:
#Set up Center for Responsive Politics (OpenSecrets) API
from crpapi import CRP
OPENSECRETS_API_KEY = os.environ['OPENSECRETS_API_KEY']
PROPUBLICA_CAMPAIGN_FINANCE_API_KEY = os.environ['PROPUBLICA_CAMPAIGN_FINANCE_API_KEY']
PHARMA_MANUFACTURING = 'H4300'
PHARMA_WHOLESALE = 'H4400'
opensecrets = CRP(OPENSECRETS_API_KEY)

In [22]:
#Set up data scraping parameters
years = [2013,2014,2015,2016,2017,2018]
candidates_uri = 'https://api.propublica.org/campaign-finance/v1/{cycle}/candidates/search'
print(candidates_uri.format(cycle=2012))
payload = {
    'X-API-Key':PROPUBLICA_CAMPAIGN_FINANCE_API_KEY
}
r = requests.get(candidates_uri.format(cycle=2012), params=payload)
r.json()

https://api.propublica.org/campaign-finance/v1/2012/candidates/search


{'message': 'Unauthorized'}