In [1]:
# Leave commented out unless you recieve and error that you do not have psycopg2 installed.

import sys
import subprocess

try:
    import psycopg2
except ImportError:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "--user", "psycopg2-binary"])
    import psycopg2

print(psycopg2.__version__)

2.9.11 (dt dec pq3 ext lo64)


In [2]:
import pandas as pd
from sqlalchemy import create_engine, text

In [3]:
database_name = 'prescribers'   

connection_string = f"postgresql://postgres:postgres@localhost:5432/{database_name}"

In [4]:
engine = create_engine(connection_string)

In [None]:
# Is there an association between rates of opioid prescriptions and overdose deaths by county?
# 1. opioid prescription - prescription & drug name
# 2. overdose deaths -  overdose deaths
# 3. county - fips_county
# 4. opioid flag - drug

In [5]:

query4 = """


WITH opioid_prescription AS(
	SELECT f.county,
	COUNT(p.drug_name) AS prescription_count
FROM prescription p
LEFT JOIN drug d
ON d.drug_name = p.drug_name
LEFT JOIN prescriber
ON prescriber.npi = p.npi
LEFT JOIN zip_fips z
ON z.zip = prescriber.nppes_provider_zip5
LEFT JOIN fips_county f
ON f.state = prescriber.nppes_provider_state
AND f.fipscounty = z.fipscounty 
WHERE d.opioid_drug_flag = 'Y'
AND f.state = 'TN'
GROUP BY f.county
),
od AS(
SELECT f.county,
	SUM(o.overdose_deaths) AS total_overdose_deaths
FROM overdose_deaths o
LEFT JOIN fips_county f
ON f.fipscounty::INT = o.fipscounty
WHERE f.state = 'TN'
GROUP BY f.county
)

SELECT opioid_prescription.county,
	opioid_prescription.prescription_count,
	od.total_overdose_deaths,
	(od.total_overdose_deaths*100)/opioid_prescription.prescription_count AS percentage_of_opioid_death
FROM opioid_prescription
LEFT JOIN od
ON od.county = opioid_prescription.county
ORDER BY percentage_of_opioid_death DESC;

--Ans: No, there is not an association between rates of opioid prescriptions and overdose deaths by county.
"""

In [6]:
with engine.connect() as connection:
    result = connection.execute(text(query4))
    prescription_deaths = pd.DataFrame(result.fetchall(), columns=result.keys())

In [7]:
prescription_deaths.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 95 entries, 0 to 94
Data columns (total 4 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   county                      95 non-null     object
 1   prescription_count          95 non-null     int64 
 2   total_overdose_deaths       95 non-null     int64 
 3   percentage_of_opioid_death  95 non-null     int64 
dtypes: int64(3), object(1)
memory usage: 3.1+ KB


In [8]:
prescription_deaths.head()

Unnamed: 0,county,prescription_count,total_overdose_deaths,percentage_of_opioid_death
0,DECATUR,44,10,22
1,TIPTON,233,52,22
2,DICKSON,254,49,19
3,KNOX,3776,683,18
4,STEWART,47,8,17
