In [None]:
import pandas as pd
from sqlalchemy import create_engine, text
import matplotlib as plt
import seaborn as sns
import re 
import numpy as np

In [None]:
database_name = 'prescriptions'

connection_string = f"postgresql://postgres:postgres@localhost:5432/{database_name}"

In [None]:
engine = create_engine(connection_string)

In [None]:
query = 'SELECT * FROM drug'

In [None]:
with engine.connect() as connection:
    drug = pd.read_sql(query, con = connection)

# 1. Deaths over time.
###  a. How has total overdose deaths changed over time?

In [None]:
overdosedeaths = '''SELECT 
                        overdose_deaths,
                        year
                    FROM overdose_deaths'''
with engine.connect() as connection:
    overdosedeaths = pd.read_sql(overdosedeaths, con = connection)
od_by_year = overdosedeaths.groupby(['year']).sum('over_dose_deaths')
od_by_year['year'] = overdosedeaths['year'].astype('str')
od_by_year.plot()

###  b. How have overdose deaths changed over time for Davidson and Shelby counties.

In [None]:
overdosedeathsb = '''SELECT 
                        *
                    FROM overdose_deaths'''
with engine.connect() as connection:
    overdosedeathsb = pd.read_sql(overdosedeathsb, con = connection)
overdosedeathsb

In [None]:
fips_countyb = '''SELECT
                    *
                    FROM fips_county'''
with engine.connect() as connection:
    fips_countyb = pd.read_sql(fips_countyb, con = connection)
fips_countyb

In [None]:
fips_countyb['fipscounty'] = fips_countyb.fipscounty.astype('int')

In [None]:
od_county = pd.merge(overdosedeathsb, fips_countyb,
                                    on = 'fipscounty',
                                           how = 'inner')
two_county = od_county.loc[(od_county['county'] == 'DAVIDSON') | (od_county['county'] == 'SHELBY')]
dav_only = od_county.loc[(od_county['county'] == 'DAVIDSON')]
she_only = od_county.loc[(od_county['county'] == 'SHELBY')]
two_county

In [None]:
sns.lineplot(x='year', y='overdose_deaths', data=dav_only)
sns.lineplot(x='year', y='overdose_deaths', data=she_only)


###  c. Are there any counties in which overdose deaths are trending downward?

In [30]:
od_by_county = od_county.groupby('county').value_counts().to_frame().reset_index()
od_by_county

Unnamed: 0,county,overdose_deaths,year,fipscounty,state,fipsstate,count
0,ANDERSON,18,2018,47001,TN,47,1
1,ANDERSON,20,2015,47001,TN,47,1
2,ANDERSON,24,2016,47001,TN,47,1
3,ANDERSON,34,2017,47001,TN,47,1
4,BEDFORD,1,2016,47003,TN,47,1
...,...,...,...,...,...,...,...
375,WILLIAMSON,30,2018,47187,TN,47,1
376,WILSON,19,2018,47189,TN,47,1
377,WILSON,26,2015,47189,TN,47,1
378,WILSON,26,2017,47189,TN,47,1


# 2. Spending on opioids
###  a. What is the correlation between spending on opioids and overdose deaths?


In [35]:
overdosedeaths2 = '''SELECT 
                        *
                    FROM overdose_deaths'''
with engine.connect() as connection:
    overdosedeaths2 = pd.read_sql(overdosedeaths2, con = connection)
overdosedeaths2

Unnamed: 0,overdose_deaths,year,fipscounty
0,135,2015,47157
1,150,2016,47157
2,159,2017,47157
3,123,2018,47157
4,122,2015,47093
...,...,...,...
375,0,2018,47017
376,1,2015,47007
377,2,2016,47007
378,2,2017,47007


In [97]:
overdosedeaths2.loc[overdosedeaths2['year'] == 2015]['overdose_deaths'].sum()


1033

In [None]:
county_od_deaths.sort_values('percapita',ascending = False).reset_index(drop = True)

In [37]:
prescription2 = '''SELECT
	SUM(total_drug_cost) AS total_cost
FROM prescription
INNER JOIN drug
USING(drug_name)
WHERE opioid_drug_flag = 'Y'
'''
with engine.connect() as connection:
    prescription2 = pd.read_sql(prescription2, con = connection)
prescription2

Unnamed: 0,total_cost
0,105080600.0


In [38]:
od_county = pd.merge(overdosedeaths2, prescription2,
                                    on = 'fipscounty',
                                           how = 'inner')

KeyError: 'fipscounty'

###  b. What is the ratio for spending on opioid vs non-opioid prescriptions?

In [42]:
prescription2y= '''SELECT
	SUM(total_drug_cost) AS total_cost
FROM prescription
INNER JOIN drug
USING(drug_name)
WHERE opioid_drug_flag = 'Y'
'''
with engine.connect() as connection:
    prescription2y = pd.read_sql(prescription2y, con = connection)
prescription2y['total_cost']= prescription2y['total_cost'].astype(int)
pd.set_option('display.float_format', '{:.0f}'.format)
prescription2y

Unnamed: 0,total_cost
0,105080626


In [43]:
prescription2n = '''SELECT
	SUM(total_drug_cost) AS total_cost
FROM prescription
INNER JOIN drug
USING(drug_name)
WHERE opioid_drug_flag = 'N'
'''
with engine.connect() as connection:
    prescription2n = pd.read_sql(prescription2n, con = connection)
prescription2n['total_cost']= prescription2n['total_cost']#.astype(int)
pd.set_option('display.float_format', '{:.0f}'.format)
prescription2n

Unnamed: 0,total_cost
0,3011133831


In [45]:
opioidratio = prescription2n['total_cost']/prescription2y['total_cost']
opioidratio

0   29
Name: total_cost, dtype: float64

###  c. Are those who spend a higher ratio on opioids suffering from more deaths?

# 3. Per Capita
###  a. Which county has the highest overdose deaths per capita?


In [53]:
county_od_deaths = '''SELECT 
	SUM(overdose_deaths) AS total_deaths,
	county,
	population
FROM overdose_deaths
INNER JOIN population
ON overdose_deaths.fipscounty::INTEGER = population.fipscounty::INTEGER
INNER JOIN fips_county
ON overdose_deaths.fipscounty::INTEGER =fips_county.fipscounty::INTEGER
GROUP BY population,overdose_deaths.fipscounty,county
ORDER BY total_deaths DESC'''
with engine.connect() as connection:
    county_od_deaths = pd.read_sql(county_od_deaths, con = connection)
county_od_deaths

Unnamed: 0,total_deaths,county,population
0,689,DAVIDSON,678322
1,683,KNOX,452286
2,567,SHELBY,937847
3,205,RUTHERFORD,298456
4,191,HAMILTON,354589
...,...,...,...
90,2,PICKETT,5071
91,2,HAYWOOD,17944
92,2,DYER,37751
93,1,PERRY,7882


In [111]:
county_od_deaths['percapita']= county_od_deaths['total_deaths']/(county_od_deaths['population'])
county_od_deaths.sort_values('percapita',ascending = False).reset_index(drop = True)


Unnamed: 0,total_deaths,county,population,percapita
0,73,CHEATHAM,39713,0
1,10,HANCOCK,6605,0
2,683,KNOX,452286,0
3,77,ROANE,52926,0
4,11,CLAY,7684,0
...,...,...,...,...
90,4,CARROLL,28137,0
91,1,PERRY,7882,0
92,2,HAYWOOD,17944,0
93,2,DYER,37751,0


In [57]:
county_od_deaths[f"{'percapita':.12g}"]= county_od_deaths['total_deaths']/county_od_deaths['population']
county_od_deaths.sort_values('percapita',ascending = False).reset_index(drop = True)


ValueError: Unknown format code 'g' for object of type 'str'

###  b. Which county has the most spending overall per capita?

In [74]:
prescription3b= '''SELECT 
	SUM(total_drug_cost) AS total,
	population,
	county
FROM prescription
LEFT JOIN prescriber
USING(npi)
LEFT JOIN zip_fips
ON zip_fips.zip = prescriber.nppes_provider_zip5
LEFT JOIN population
USING(fipscounty)
LEFT JOIN fips_county
USING(fipscounty)
GROUP BY county,population
'''
with engine.connect() as connection:
    prescription3b = pd.read_sql(prescription3b, con = connection)
prescription3b


Unnamed: 0,total,population,county
0,3441876,,
1,2752404,11681,DECATUR
2,28949384,54074,COFFEE
3,16173404,17940,FENTRESS
4,17715902,51341,DICKSON
...,...,...,...
110,277,,GUILFORD
111,4876,,BAKER
112,51,,DALLAS
113,28095931,35262,COCKE


In [76]:
prescription3b['percapitatotal']= prescription3b['total']/prescription3b['population']
prescription3b.sort_values('percapitatotal',ascending = False).reset_index(drop = True)

Unnamed: 0,total,population,county,percapitatotal
0,40885538,6302,MOORE,6488
1,58186128,11573,JACKSON,5028
2,19339093,5675,VAN BUREN,3408
3,14063285,5071,PICKETT,2773
4,60174204,21995,OVERTON,2736
...,...,...,...,...
110,194,,WASHINGTON,
111,739,,BROOKS,
112,277,,GUILFORD,
113,4876,,BAKER,


###  c. Which county has the most spending on opioids per capita?

In [63]:
prescription3c= '''SELECT 
	SUM(total_drug_cost) as total,
	population,
	county
FROM prescription
LEFT JOIN prescriber
USING(npi)
LEFT JOIN zip_fips
ON zip_fips.zip = prescriber.nppes_provider_zip5
LEFT JOIN population
USING(fipscounty)
LEFT JOIN fips_county
USING(fipscounty)
LEFT JOIN drug
USING(drug_name)
WHERE opioid_drug_flag = 'Y'
GROUP BY county,population
'''
with engine.connect() as connection:
    prescription3c = pd.read_sql(prescription3c, con = connection)
prescription3c


Unnamed: 0,total,population,county
0,97042,,
1,34772,11681,DECATUR
2,2401119,54074,COFFEE
3,796630,17940,FENTRESS
4,1105066,51341,DICKSON
...,...,...,...
93,15498846,678322,DAVIDSON
94,599,,BROOKS
95,3258122,128874,WILSON
96,886228,35262,COCKE


In [64]:
prescription3c['percapitatotal']= prescription3c['total']/prescription3c['population']
prescription3c.sort_values('percapitatotal',ascending = False).reset_index(drop = True)

Unnamed: 0,total,population,county,percapitatotal
0,1957724,6302,MOORE,311
1,3137500,11573,JACKSON,271
2,1352207,8773,TROUSDALE,154
3,3270860,21995,OVERTON,149
4,697548,5071,PICKETT,138
...,...,...,...,...
93,34772,11681,DECATUR,3
94,2965,7588,LAKE,0
95,97042,,,
96,147,,TRAVIS,


# 4. Unemployment
### a. Is there a correlation between unemployment rate and overdose deaths?
### b. Is there a correlation between unemployment and spending on opioids?

# 5. Top prescribers
###  a. Where are the top 10 opioid prescribers located?
###  b. Who is the top prescriber in each county?
###  c. What proportion of opioids are prescribed by the top 10 prescribers?  Top 50? Top 100?

# 6. Nashville - Davidson County
###  a. Which zip codes in Davidson County have the most opioids prescribed?
###  b. Any correlation between the number of missed trash pick ups and number of opioids prescribed?