In [None]:
from sqlalchemy import create_engine, text

In [None]:
database_name = 'prescription_drug_project'    

connection_string = f"postgresql://postgres:postgres@localhost:5432/{database_name}"

In [None]:
engine = create_engine(connection_string)

In [None]:
import pandas as pd

In [None]:
import matplotlib.pyplot as plt

In [None]:
import numpy as np

1. Deaths over time.
  a. How has total overdose deaths changed over time?
  b. How have overdose deaths changed over time for Davidson and Shelby counties.
  c. Are there any counties in which overdose deaths are trending downward?

In [None]:
question_1 = 'SELECT * FROM overdose_deaths INNER JOIN fips_county ON overdose_deaths.fipscounty=fips_county.fipscounty::int'

In [None]:
with engine.connect() as connection:
    question_1 = pd.read_sql(text(question_1), con = connection)

question_1.head()

In [None]:
q1a=question_1[['year', 'overdose_deaths']]

In [None]:
q1a=q1a.groupby('year')['overdose_deaths'].sum().to_frame().reset_index()

TABLE FOR 1a

In [None]:
q1a

In [None]:
plt.figure(figsize=(8,6))
plt.xticks(q1a['year'].astype(int))
plt.plot(q1a['year'],q1a['overdose_deaths'])
plt.title('Overdose Deaths By Year')
plt.xlabel('Year')
plt.ylabel('Number Of Deaths')

In [None]:
davidson_shelby= question_1.loc[(question_1['county']=='DAVIDSON')| (question_1['county']=='SHELBY')]

In [None]:
davidson_shelby= davidson_shelby[['county', 'year', 'overdose_deaths']]

TABLE FOR 1b

In [None]:
davidson=davidson_shelby.loc[davidson_shelby['county']=='DAVIDSON']

In [None]:
shelby=davidson_shelby.loc[davidson_shelby['county']=='SHELBY']

In [None]:
davidson

In [None]:
shelby

In [None]:
plt.figure(figsize=(8,6))
plt.xticks(davidson['year'].astype(int))
plt.plot(davidson['year'],davidson['overdose_deaths'], color='blue')
plt.plot(shelby['year'],shelby['overdose_deaths'], color='orange')
plt.title('Davidson vs Shelby')
plt.xlabel('Year')
plt.ylabel('Number Of Deaths')
plt.legend(['Davidson', 'Shelby'])

In [None]:
question_1.sort_values(by=['overdose_deaths'])

2. Spending on opioids
  a. What is the correlation between spending on opioids and overdose deaths?
  b. What is the ratio for spending on opioid vs non-opioid prescriptions?
  c. Are those who spend a higher ratio on opioids suffering from more deaths?

In [None]:
q2= 'SELECT overdose_deaths, total_drug_cost, year, opioid_drug_flag FROM overdose_deaths as od INNER JOIN zip_fips as zf ON od.fipscounty::int = zf.fipscounty::int INNER JOIN prescriber as p1 ON zf.zip = p1.nppes_provider_zip5 INNER JOIN prescription as p2 ON p1.npi = p2.npi INNER JOIN drug as d ON p2.drug_name = d.drug_name GROUP BY year, overdose_deaths, total_drug_cost,opioid_drug_flag'

In [None]:
with engine.connect() as connection:
    q2 = pd.read_sql(text(q2), con = connection)

q2.head()

In [None]:
q2_opioid=q2.loc[q2['opioid_drug_flag']=='Y']

In [None]:
q2_opioid=q2_opioid.groupby('year')[['overdose_deaths','total_drug_cost']].sum()

In [None]:
q2_opioid

In [None]:
q2_opioid['overdose_deaths'].corr(q2_opioid['total_drug_cost'])

In [None]:
'total_drug_cost'

3. Per Capita
  a. Which county has the highest overdose deaths per capita?
  b. Which county has the most spending overall per capita?
  c. Which county has the most spending on opioids per capita?

4. Unemployment
 a. Is there a correlation between unemployment rate and overdose deaths?
 b. Is there a correlation between unemployment and spending on opioids?

5. Top prescribers
  a. Where are the top 10 opioid prescribers located?
  b. Who is the top prescriber in each county?
  c. What proportion of opioids are prescribed by the top 10 prescribers?  Top 50? Top 100?

6. Nashville - Davidson County
  a. Which zip codes in Davidson County have the most opioids prescribed?
  b. Any correlation between the number of missed trash pick ups and number of opioids prescribed?

In [None]:
question_6= "SELECT * FROM prescription INNER JOIN drug AS d USING(drug_name) INNER JOIN prescriber AS p USING(npi) INNER JOIN zip_fips AS z ON p.nppes_provider_zip5=z.zip INNER JOIN fips_county AS f USING(fipscounty) WHERE d.opioid_drug_flag = 'Y' AND f.county='DAVIDSON'"

In [None]:
with engine.connect() as connection:
    question_6 = pd.read_sql(text(question_6), con = connection)

question_6.head()

In [None]:
question_6=question_6[['zip','total_claim_count']]

In [None]:
question_6

In [None]:
question_6=question_6.groupby('zip')['total_claim_count'].sum().to_frame().reset_index()

In [None]:
question_6

In [None]:
top_10_question_6=question_6.sort_values(by = 'total_claim_count', ascending=False).head(10)

In [None]:
top_10_question_6['zip']=top_10_question_6['zip'].astype(int)

In [None]:
top_10_question_6=top_10_question_6.rename(columns={'zip':'Zip Code'})

In [None]:
top_10_question_6=top_10_question_6.astype(int)

In [None]:
top_10_question_6=top_10_question_6.rename(columns={'total_claim_count':'Opioid Claim Count'})

In [None]:
top_10_question_6['Zip Code']=top_10_question_6['Zip Code'].astype(str)

In [None]:
top_10_question_6=top_10_question_6.reset_index(drop=True)

In [None]:
plt.figure(figsize=(10, 6))
plt.bar(top_10_question_6['Zip Code'], top_10_question_6['Opioid Claim Count'],width = .7, color ='blue', edgecolor='black', linewidth=1.5)
plt.xlabel('Zip Code')
plt.ylabel('Opioid Claim Count')
plt.title('Top Opioid Prescriptions in Davidson County')
for i in range(0, len(top_10_question_6)):
 plt.text(top_10_question_6['Zip Code'][i], top_10_question_6['Opioid Claim Count'][i], f'{top_10_question_6['Opioid Claim Count'][i]}',va='bottom',color='black', ha='center')

1. 37203 = DAVIDSON
2. 37013 = DAVIDSON
3. 37232 = DAVIDSON
4. 37076 = WILSON
5. 37205 = DAVIDSON
6. 37027 = WILLIAMSON
7. 37207 = DAVIDSON
8. 37211 = DAVIDSON
9. 37122 = RUTHERFORD AND WILSON
10. 37204 = DAVIDSON

In [None]:
final_trash = pd.read_csv('../data/final_trash.csv')

In [None]:
final_trash=final_trash.loc[final_trash['complaint']==1]

In [None]:
davidson_trash=final_trash.loc[final_trash['Zip Code'].isin([37203,37013,37232,37205,37207,37211,37204])]

In [None]:
davidson_trash=davidson_trash['Zip Code'].value_counts().to_frame().reset_index().rename(columns={'count':'Missed Trash Count'})

In [None]:
davidson_trash['Zip Code']=davidson_trash['Zip Code'].astype(str)

In [None]:
davidson_trash

In [None]:
top_10_question_6

In [None]:
top_10_question_6.loc[top_10_question_6['Zip Code'].isin([37203,37013,37232,37205,37207,37211,37204])]

In [None]:
plt.figure(figsize=(10, 6))
plt.bar(davidson_trash['Zip Code'], davidson_trash['Missed Trash Count'],width = .7, color ='blue', edgecolor='black', linewidth=1.5)
plt.xlabel('Zip Code')
plt.ylabel('Missed Trash Count')
plt.title('Matching Zips in Davidson County')
for i in range(0, len(davidson_trash)):
 plt.text(davidson_trash['Zip Code'][i], davidson_trash['Missed Trash Count'][i], f'{davidson_trash['Missed Trash Count'][i]}',va='bottom',color='black', ha='center')

In [None]:
q6_merge=pd.merge(top_10_question_6, top_10_trash, left_on = 'Zip Code', right_on = 'Zip Code', how = 'outer')

In [None]:
q6_merge=q6_merge.rename(columns={'count':'Missed Trash Count'})

In [None]:
q6_merge

In [None]:
q6_merge=q6_merge.fillna(-1).astype(int)

In [None]:
q6_merge

In [None]:
final

In [None]:
top_10_question_6.to_csv(q6_formapping)