In this notebook, you'll see how to connect to a Postgres database using the sqlalchemy library.

For this notebook, you'll need both the `sqlalchemy` and `psycopg2` libraries installed.

In [1]:
#!pip install psycopg2-binary

In [2]:
#!pip install psycopg2

In [3]:
import pandas as pd

In [4]:
import matplotlib.pyplot as plt

In [5]:
pd.set_option("display.max_columns", None)

In [6]:
from sqlalchemy import create_engine, text



First, we need to create a connection string. The format is

 ```<dialect(+driver)>://<username>:<password>@<hostname>:<port>/<database>```

To connect to the Lahman baseball database, you can use the following connection string.

In [7]:
database_name = 'prescribers'    # Fill this in with your prescribers database name

connection_string = f"postgresql://postgres:DADAgg7?z!jux3@localhost:5432/{database_name}"

Now, we need to create an engine and use it to connect.

In [8]:
engine = create_engine(connection_string)

sqlalchemy works well with pandas to convert query results into dataframes.

In [9]:
import pandas as pd

First, let's write a meaningful query.

In [10]:
#cope and reuse

query = 'SELECT * FROM prescriber'

Now, bring it all together using the following syntax.

In [11]:
#copy and reuse 

with engine.connect() as connection:
    people = pd.read_sql(text(query), con = connection)

people.head()

Unnamed: 0,npi,nppes_provider_last_org_name,nppes_provider_first_name,nppes_provider_mi,nppes_credentials,nppes_provider_gender,nppes_entity_code,nppes_provider_street1,nppes_provider_street2,nppes_provider_city,nppes_provider_zip5,nppes_provider_zip4,nppes_provider_state,nppes_provider_country,specialty_description,description_flag,medicare_prvdr_enroll_status
0,1003000000.0,BLAKEMORE,ROSIE,K,FNP,F,I,TENNESSEE PRISON FOR WOMEN,3881 STEWARTS LANE,NASHVILLE,37243,1,TN,US,Nurse Practitioner,S,N
1,1003012000.0,CUDZILO,COREY,,M.D.,M,I,2240 SUTHERLAND AVE,SUITE 103,KNOXVILLE,37919,2333,TN,US,Pulmonary Disease,S,E
2,1003013000.0,GRABENSTEIN,WILLIAM,P,M.D.,M,I,1822 MEMORIAL DR,,CLARKSVILLE,37043,4605,TN,US,Family Practice,S,E
3,1003014000.0,OTTO,ROBERT,J,M.D.,M,I,2400 PATTERSON STREET SUITE 100,,NASHVILLE,37203,2786,TN,US,Orthopedic Surgery,S,E
4,1003018000.0,TODD,JOSHUA,W,M.D.,M,I,1819 W CLINCH AVE,SUITE 108,KNOXVILLE,37916,2435,TN,US,Cardiology,S,E


opioid_drug_flag from drug
county from fips_county

In [12]:
TN_opioid = """select prescription.drug_name as drug_name, prescription.total_claim_count as claims, fips_county.county as county
from prescription
join prescriber 
using (npi)
join zip_fips
on prescriber.nppes_provider_zip5 = zip_fips.zip
join population
using (fipscounty)
join fips_county
using (fipscounty)
join drug
using (drug_name)
where opioid_drug_flag = 'Y'
	and fips_county.state = 'TN'
order by 2
desc;"""

In [13]:
with engine.connect() as connection:
    TN_opioid = pd.read_sql(text(TN_opioid), con = connection)

TN_opioid

Unnamed: 0,drug_name,claims,county
0,OXYCODONE HCL,4538.0,SCOTT
1,HYDROCODONE-ACETAMINOPHEN,3376.0,SCOTT
2,OXYCODONE HCL,2977.0,CAMPBELL
3,OXYCODONE HCL,2813.0,KNOX
4,OXYCODONE HCL,2478.0,MONTGOMERY
...,...,...,...
52584,TRAMADOL HCL,11.0,CARROLL
52585,MORPHINE SULFATE,11.0,OVERTON
52586,TRAMADOL HCL ER,11.0,HAMILTON
52587,EMBEDA,11.0,SHELBY


#merging tables 
merged = pd.merge(customers, orders, on='customer_id', how='outer')

In [14]:
TN_opioid.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52589 entries, 0 to 52588
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   drug_name  52589 non-null  object 
 1   claims     52589 non-null  float64
 2   county     52589 non-null  object 
dtypes: float64(1), object(2)
memory usage: 1.2+ MB


#  Which Tennessee counties had a disproportionately high number of opioid prescriptions?

In [26]:
test_plot = TN_opioid.groupby('county')['claims'].sum()

In [27]:
test_plot.head()

county
ANDERSON    52701.0
BEDFORD     41506.0
BENTON      12046.0
BLEDSOE     22145.0
BLOUNT      62747.0
Name: claims, dtype: float64

For much more information about SQLAlchemy and to see a more “Pythonic” way to execute queries, see Introduction to Databases in Python: https://www.datacamp.com/courses/introduction-to-relational-databases-in-python