In this notebook, you'll see how to connect to a Postgres database using the sqlalchemy library.

For this notebook, you'll need both the `sqlalchemy` and `psycopg2` libraries installed.

In [10]:
#!pip install psycopg2-binary

In [11]:
#!pip install psycopg2

In [37]:
from sqlalchemy import create_engine, text



First, we need to create a connection string. The format is

 ```<dialect(+driver)>://<username>:<password>@<hostname>:<port>/<database>```

To connect to the Lahman baseball database, you can use the following connection string.

In [53]:
database_name = 'prescribers'    # Fill this in with your prescribers database name

connection_string = f"postgresql://postgres:postgres@localhost:5432/{'prescribers'}"

Now, we need to create an engine and use it to connect.

In [64]:
engine = create_engine(connection_string)

sqlalchemy works well with pandas to convert query results into dataframes.

In [66]:
import pandas as pd

First, let's write a meaningful query.

In [68]:
prescriber_query = 'SELECT * FROM prescriber'

Now, bring it all together using the following syntax.

In [76]:
# prescriber table
prescriber_query = 'SELECT * FROM prescriber'


with engine.connect() as connection:
    prescriber_df = pd.read_sql(text(prescriber_query), con = connection)

prescriber_df.head()

Unnamed: 0,npi,nppes_provider_last_org_name,nppes_provider_first_name,nppes_provider_mi,nppes_credentials,nppes_provider_gender,nppes_entity_code,nppes_provider_street1,nppes_provider_street2,nppes_provider_city,nppes_provider_zip5,nppes_provider_zip4,nppes_provider_state,nppes_provider_country,specialty_description,description_flag,medicare_prvdr_enroll_status
0,1003000000.0,BLAKEMORE,ROSIE,K,FNP,F,I,TENNESSEE PRISON FOR WOMEN,3881 STEWARTS LANE,NASHVILLE,37243,1,TN,US,Nurse Practitioner,S,N
1,1003012000.0,CUDZILO,COREY,,M.D.,M,I,2240 SUTHERLAND AVE,SUITE 103,KNOXVILLE,37919,2333,TN,US,Pulmonary Disease,S,E
2,1003013000.0,GRABENSTEIN,WILLIAM,P,M.D.,M,I,1822 MEMORIAL DR,,CLARKSVILLE,37043,4605,TN,US,Family Practice,S,E
3,1003014000.0,OTTO,ROBERT,J,M.D.,M,I,2400 PATTERSON STREET SUITE 100,,NASHVILLE,37203,2786,TN,US,Orthopedic Surgery,S,E
4,1003018000.0,TODD,JOSHUA,W,M.D.,M,I,1819 W CLINCH AVE,SUITE 108,KNOXVILLE,37916,2435,TN,US,Cardiology,S,E


In [78]:
# prescription table
prescription_query = 'SELECT * FROM prescription'


with engine.connect() as connection:
    prescription_df = pd.read_sql(text(prescription_query), con = connection)

prescription_df.head()

Unnamed: 0,npi,drug_name,bene_count,total_claim_count,total_30_day_fill_count,total_day_supply,total_drug_cost,bene_count_ge65,bene_count_ge65_suppress_flag,total_claim_count_ge65,ge65_suppress_flag,total_30_day_fill_count_ge65,total_day_supply_ge65,total_drug_cost_ge65
0,1245531000.0,MODAFINIL,14.0,59.0,71.0,2115.0,10193.55,,#,44.0,,56.0,1665.0,5877.96
1,1831181000.0,OLANZAPINE,,36.0,36.0,1036.0,3269.42,,*,36.0,,36.0,1036.0,3269.42
2,1720101000.0,MEDROXYPROGESTERONE ACETATE,,34.0,73.2,2196.0,1882.04,0.0,,0.0,,0.0,0.0,0.0
3,1548208000.0,OPANA ER,,16.0,16.0,449.0,6704.84,,*,,*,,,
4,1720233000.0,SULFAMETHOXAZOLE-TRIMETHOPRIM,12.0,16.0,16.0,163.0,72.89,,*,,*,,,


In [80]:
# drug table
drug_query = 'SELECT * FROM drug'


with engine.connect() as connection:
    drug_df = pd.read_sql(text(drug_query), con = connection)

drug_df.head()

Unnamed: 0,drug_name,generic_name,opioid_drug_flag,long_acting_opioid_drug_flag,antibiotic_drug_flag,antipsychotic_drug_flag
0,1ST TIER UNIFINE PENTIPS,"PEN NEEDLE, DIABETIC",N,N,N,N
1,1ST TIER UNIFINE PENTIPS PLUS,"PEN NEEDLE, DIABETIC",N,N,N,N
2,ABACAVIR,ABACAVIR SULFATE,N,N,N,N
3,ABACAVIR-LAMIVUDINE,ABACAVIR SULFATE/LAMIVUDINE,N,N,N,N
4,ABACAVIR-LAMIVUDINE-ZIDOVUDINE,ABACAVIR/LAMIVUDINE/ZIDOVUDINE,N,N,N,N


In [84]:
# zip_fips table
zip_fips_query = 'SELECT * FROM zip_fips'


with engine.connect() as connection:
    zip_fips_df = pd.read_sql(text(zip_fips_query), con = connection)

zip_fips_df.head()

Unnamed: 0,zip,fipscounty,res_ratio,bus_ratio,oth_ratio,tot_ratio
0,501,36103,0.0,1.0,0.0,1.0
1,601,72113,0.160724,0.20098,0.128834,0.1625
2,601,72001,0.839276,0.79902,0.871166,0.8375
3,602,72003,1.0,0.9988,1.0,0.999919
4,602,72005,0.0,0.0012,0.0,8.1e-05


In [86]:
# cbsa table
cbsa_query = 'SELECT * FROM cbsa'


with engine.connect() as connection:
    cbsa_df = pd.read_sql(text(cbsa_query), con = connection)

cbsa_df.head()

Unnamed: 0,fipscounty,cbsa,cbsaname
0,1001,33860,"Montgomery, AL"
1,1003,19300,"Daphne-Fairhope-Foley, AL"
2,1007,13820,"Birmingham-Hoover, AL"
3,1009,13820,"Birmingham-Hoover, AL"
4,1015,11500,"Anniston-Oxford-Jacksonville, AL"


In [90]:
# population table
population_query = 'SELECT * FROM population'


with engine.connect() as connection:
    population_df = pd.read_sql(text(population_query), con = connection)

population_df.head()

Unnamed: 0,fipscounty,population
0,47017,28137.0
1,47023,17097.0
2,47039,11681.0
3,47037,678322.0
4,47087,11573.0


In [94]:
# fips_county table
fips_county_query = 'SELECT * FROM fips_county'


with engine.connect() as connection:
    fips_county_df = pd.read_sql(text(fips_county_query), con = connection)

fips_county_df.head()

Unnamed: 0,county,state,fipscounty,fipsstate
0,AUTAUGA,AL,1001,1
1,BALDWIN,AL,1003,1
2,BARBOUR,AL,1005,1
3,BIBB,AL,1007,1
4,BLOUNT,AL,1009,1


In [96]:
# overdose_deaths table
overdose_deaths_query = 'SELECT * FROM overdose_deaths'


with engine.connect() as connection:
    overdose_deaths_df = pd.read_sql(text(overdose_deaths_query), con = connection)

overdose_deaths_df.head()

Unnamed: 0,overdose_deaths,year,fipscounty
0,135,2015,47157
1,150,2016,47157
2,159,2017,47157
3,123,2018,47157
4,122,2015,47093


1. Which Tennessee counties had a disproportionately high number of opioid prescriptions?

In [114]:
prescriber_prescription_drug_merged = prescriber_df.merge(prescription_df, on ='npi').merge(drug_df, on = 'drug_name')
print(prescriber_prescription_drug_merged)

                 npi nppes_provider_last_org_name nppes_provider_first_name  \
0       1.003000e+09                    BLAKEMORE                     ROSIE   
1       1.003000e+09                    BLAKEMORE                     ROSIE   
2       1.003012e+09                      CUDZILO                     COREY   
3       1.003012e+09                      CUDZILO                     COREY   
4       1.003012e+09                      CUDZILO                     COREY   
...              ...                          ...                       ...   
705010  1.992996e+09                        GILES                    WESLEY   
705011  1.992996e+09                        GILES                    WESLEY   
705012  1.993000e+09                       THOMAS                    SHELIA   
705013  1.993000e+09                       THOMAS                    SHELIA   
705014  1.993000e+09                       THOMAS                    SHELIA   

       nppes_provider_mi nppes_credentials nppes_pr

2. Who are the top opioid prescibers for the state of Tennessee?

3. What did the trend in overdose deaths due to opioids look like in Tennessee from 2015 to 2018?

4. Is there an association between rates of opioid prescriptions and overdose deaths by county?

5. Is there any association between a particular type of opioid and number of overdose deaths?

For much more information about SQLAlchemy and to see a more “Pythonic” way to execute queries, see Introduction to Databases in Python: https://www.datacamp.com/courses/introduction-to-relational-databases-in-python