In [1]:
import json
import numpy as np
import pandas as pd
import psycopg2

%load_ext jupyternotify

<IPython.core.display.Javascript object>

# How many doctors on Medicaid drug utilization boards have received payments from drug manufacturers?

We will use Open Payments data to determine the extent of payments from 2013 through 2016 from drug manufacturers to doctors serving on Medicaid drug utilization boards between the start of 2016 and March of 2018.

## Create tables

First, connect to the database.

In [2]:
with open("config.json") as f:
    conf = json.load(f)

In [3]:
conn_str = "host={} dbname={} user={} password={}".format(conf["host"], conf["database"], conf["user"], conf["password"])

In [4]:
conn = psycopg2.connect(conn_str)
conn.autocommit = True # Allow the notebook to commit transactions (like creating a table) to the connected database.

Query the database to create a new table of all Medicaid drug utilization board members and the general purpose payments they received from drug manufacturers.

In [5]:
%%notify
general_payments_by_doctor = pd.read_sql("""CREATE TABLE IF NOT EXISTS general_payments_by_doctor AS
SELECT physician_profile_id1 AS physician_profile_id,
       first_name,
       last_name,
       state_name,
       degree_1,
       degree_2,
       general_payments_amount,
       number_of_general_payments
FROM
  (SELECT physician_profile_id AS physician_profile_id1,
          first_name,
          last_name,
          state_name,
          degree_1,
          degree_2
   FROM committee_members
   LEFT JOIN open_payments_general ON first_name = physician_first_name
   AND last_name = physician_last_name
   AND state_name = recipient_state
   GROUP BY physician_profile_id,
            first_name,
            last_name,
            state_name,
            degree_1,
            degree_2) AS matching_committee_members
LEFT JOIN
  (SELECT physician_profile_id AS physician_profile_id2,
          sum(total_amount_of_payment_usdollars) AS general_payments_amount,
          count(DISTINCT record_id) AS number_of_general_payments
   FROM open_payments_general
   GROUP BY physician_profile_id) AS totals_by_id ON matching_committee_members.physician_profile_id1 = totals_by_id.physician_profile_id2;


GRANT ALL ON TABLE general_payments_by_doctor TO redash;


SELECT *
FROM general_payments_by_doctor;""", con=conn)

<IPython.core.display.Javascript object>

The only consistent data we have about members serving on state drug utilization boards are the members' first name, last name and state. As such, that’s all we have to join against the Open Payments data. So in order to understand the extent of possible duplicates in this data, we will count how many distinct doctors in both the Open Payments general payments table and the [NPPES](https://npiregistry.cms.hhs.gov/) (a national dataset of physicians) registry table match our committee members.

In [6]:
%%notify
general_payments_by_doctor_with_counts = pd.read_sql("""
CREATE TABLE IF NOT EXISTS general_payments_by_doctor_with_counts AS
SELECT general_payments_by_doctor.physician_profile_id,
       general_payments_by_doctor.first_name,
       general_payments_by_doctor.last_name,
       general_payments_by_doctor.state_name,
       general_payments_by_doctor.degree_1,
       general_payments_by_doctor.degree_2,
       general_payments_amount,
       number_of_general_payments,
       possible_duplicates_open_payments,
       possible_duplicates_nppes
FROM general_payments_by_doctor
LEFT JOIN
  (SELECT physician_first_name,
          physician_last_name,
          recipient_state,
          count(DISTINCT physician_profile_id) AS possible_duplicates_open_payments
   FROM open_payments_general
   GROUP BY physician_first_name,
            physician_last_name,
            recipient_state) AS open_payments_grouped_by_first_last_state ON general_payments_by_doctor.first_name = open_payments_grouped_by_first_last_state.physician_first_name
AND general_payments_by_doctor.last_name = open_payments_grouped_by_first_last_state.physician_last_name
AND general_payments_by_doctor.state_name = open_payments_grouped_by_first_last_state.recipient_state
LEFT JOIN
  (SELECT provider_first_name,
          provider_last_name_legal_name,
          provider_business_practice_location_address_state_name,
          count(DISTINCT npi) AS possible_duplicates_nppes
   FROM nppes
   GROUP BY provider_first_name,
            provider_last_name_legal_name,
            provider_business_practice_location_address_state_name) AS nppes_grouped_by_first_last_state ON general_payments_by_doctor.first_name = nppes_grouped_by_first_last_state.provider_first_name
AND general_payments_by_doctor.last_name = nppes_grouped_by_first_last_state.provider_last_name_legal_name
AND general_payments_by_doctor.state_name = nppes_grouped_by_first_last_state.provider_business_practice_location_address_state_name;


GRANT ALL ON TABLE general_payments_by_doctor_with_counts TO redash;


SELECT *
FROM general_payments_by_doctor_with_counts;""", con=conn)

<IPython.core.display.Javascript object>

Return a table of deduplicated doctors and the general payments they received.

In [7]:
%%notify
deduplicated_general_payments_by_doctor_with_counts = pd.read_sql("""
CREATE TABLE IF NOT EXISTS deduplicated_general_payments_by_doctor_with_counts AS
SELECT *
FROM
  (SELECT DISTINCT ON (first_name,
                       last_name,
                       state_name) *
   FROM general_payments_by_doctor_with_counts) AS grouped_by_first_last_state;


GRANT ALL ON TABLE deduplicated_general_payments_by_doctor_with_counts TO redash;


SELECT *
FROM deduplicated_general_payments_by_doctor_with_counts;""", con=conn)

<IPython.core.display.Javascript object>

Remove all the doctors where our query turned up a duplicate in either the Open Payments or NPPES data.

In [8]:
%%notify
duplicates_removed = pd.read_sql("""
CREATE TABLE IF NOT EXISTS duplicates_removed AS
SELECT *
FROM deduplicated_general_payments_by_doctor_with_counts
WHERE (possible_duplicates_nppes < 2
       AND possible_duplicates_open_payments < 2)
  OR (possible_duplicates_nppes IS NULL
      AND possible_duplicates_open_payments IS NULL)
  OR (possible_duplicates_nppes < 2
      AND possible_duplicates_open_payments IS NULL)
  OR (possible_duplicates_open_payments < 2
      AND possible_duplicates_nppes IS NULL);


GRANT ALL ON TABLE duplicates_removed TO redash;


SELECT *
FROM duplicates_removed;""", con=conn)

<IPython.core.display.Javascript object>

## Analysis

How much money from drug companies have doctors sitting on drug utilization boards received?

In [9]:
duplicates_removed["general_payments_amount"].sum()

2409260.4400000013

OK. So doctors sitting on drug utilization boards have taken in about $2.4 million. And how many of these doctors have taken money?

In [10]:
duplicates_removed[duplicates_removed["general_payments_amount"] > 0].count()

physician_profile_id                 263
first_name                           263
last_name                            263
state_name                           263
degree_1                             254
degree_2                              24
general_payments_amount              263
number_of_general_payments           263
possible_duplicates_open_payments    263
possible_duplicates_nppes            256
dtype: int64

OK. So, 263 doctors on drug utilization boards have taken at least some money from drug companies. What amount of all doctors sitting on these boards does that figure represent? To figure that out, we'll need to first calculate all the covered physicians sitting on the boards, as determined by their degree(s).

In [11]:
covered_degrees = ["MD", "DO", "DMD", "DDS", "DPM", "OD", "DC"]
covered_degrees

['MD', 'DO', 'DMD', 'DDS', 'DPM', 'OD', 'DC']

So how many doctors sitting on these boards either held one of these degrees or took money from drug companies? That's our denominator.

In [12]:
duplicates_removed[(duplicates_removed["degree_1"].isin(covered_degrees)) | (duplicates_removed["degree_2"].isin(covered_degrees)) | (duplicates_removed["general_payments_amount"] > 0)].count()

physician_profile_id                 263
first_name                           416
last_name                            416
state_name                           416
degree_1                             407
degree_2                              40
general_payments_amount              263
number_of_general_payments           263
possible_duplicates_open_payments    263
possible_duplicates_nppes            338
dtype: int64

OK. So, it looks like 416 doctors either held one of these covered degrees or took money from drug companies (and were thus by definition covered by Open Payments). It's possible some of the doctors for whom we lack degree information are covered by Open Payments but we just don't know. So using the 416 figure as our denominator, 63 percent of covered doctors sitting on these boards took some amount of money from drug companies.

How many received more than $1,000?

In [13]:
duplicates_removed[duplicates_removed["general_payments_amount"] > 1000].count()

physician_profile_id                 92
first_name                           92
last_name                            92
state_name                           92
degree_1                             88
degree_2                              6
general_payments_amount              92
number_of_general_payments           92
possible_duplicates_open_payments    92
possible_duplicates_nppes            88
dtype: int64

How many received more than $10,000?

In [14]:
duplicates_removed[duplicates_removed["general_payments_amount"] > 10000].count()

physician_profile_id                 26
first_name                           26
last_name                            26
state_name                           26
degree_1                             23
degree_2                              2
general_payments_amount              26
number_of_general_payments           26
possible_duplicates_open_payments    26
possible_duplicates_nppes            25
dtype: int64

How many states had doctors on such a board who received money from drug companies?

In [15]:
sum_grouped_by_state = duplicates_removed.groupby("state_name")["general_payments_amount"].sum().to_frame()
sum_grouped_by_state.reset_index(inplace=True)
sum_grouped_by_state[sum_grouped_by_state["general_payments_amount"] > 0].count()

state_name                 48
general_payments_amount    48
dtype: int64

How many states had at least one doctor on such a board who received more than $1,000?

In [16]:
max_grouped_by_state = duplicates_removed.groupby("state_name")["general_payments_amount"].max().to_frame()
max_grouped_by_state.reset_index(inplace=True)
max_grouped_by_state[max_grouped_by_state["general_payments_amount"] > 1000].count()

state_name                 36
general_payments_amount    36
dtype: int64