In this notebook, you'll see how to connect to a Postgres database using the sqlalchemy library.

For this notebook, you'll need both the `sqlalchemy` and `psycopg2` libraries installed.

In [2]:
#!pip install psycopg2-binary

In [3]:
#!pip install psycopg2

In [4]:
from sqlalchemy import create_engine, text



First, we need to create a connection string. The format is

 ```<dialect(+driver)>://<username>:<password>@<hostname>:<port>/<database>```

To connect to the Lahman baseball database, you can use the following connection string.

In [6]:
database_name = 'prescribers'    # Fill this in with your prescribers database name

connection_string = f"postgresql://postgres:postgres@localhost:5432/{database_name}"

Now, we need to create an engine and use it to connect.

In [8]:
engine = create_engine(connection_string)

sqlalchemy works well with pandas to convert query results into dataframes.

In [10]:
import pandas as pd

First, let's write a meaningful query.

In [12]:
query = 'SELECT * FROM prescriber'

Now, bring it all together using the following syntax.

In [14]:
with engine.connect() as connection:
    people = pd.read_sql(text(query), con = connection)

people.head()

Unnamed: 0,npi,nppes_provider_last_org_name,nppes_provider_first_name,nppes_provider_mi,nppes_credentials,nppes_provider_gender,nppes_entity_code,nppes_provider_street1,nppes_provider_street2,nppes_provider_city,nppes_provider_zip5,nppes_provider_zip4,nppes_provider_state,nppes_provider_country,specialty_description,description_flag,medicare_prvdr_enroll_status
0,1003000000.0,BLAKEMORE,ROSIE,K,FNP,F,I,TENNESSEE PRISON FOR WOMEN,3881 STEWARTS LANE,NASHVILLE,37243,1,TN,US,Nurse Practitioner,S,N
1,1003012000.0,CUDZILO,COREY,,M.D.,M,I,2240 SUTHERLAND AVE,SUITE 103,KNOXVILLE,37919,2333,TN,US,Pulmonary Disease,S,E
2,1003013000.0,GRABENSTEIN,WILLIAM,P,M.D.,M,I,1822 MEMORIAL DR,,CLARKSVILLE,37043,4605,TN,US,Family Practice,S,E
3,1003014000.0,OTTO,ROBERT,J,M.D.,M,I,2400 PATTERSON STREET SUITE 100,,NASHVILLE,37203,2786,TN,US,Orthopedic Surgery,S,E
4,1003018000.0,TODD,JOSHUA,W,M.D.,M,I,1819 W CLINCH AVE,SUITE 108,KNOXVILLE,37916,2435,TN,US,Cardiology,S,E


In [15]:
people.groupby(['nppes_provider_last_org_name']).nunique().sort_values('npi', ascending = False)

Unnamed: 0_level_0,npi,nppes_provider_first_name,nppes_provider_mi,nppes_credentials,nppes_provider_gender,nppes_entity_code,nppes_provider_street1,nppes_provider_street2,nppes_provider_city,nppes_provider_zip5,nppes_provider_zip4,nppes_provider_state,nppes_provider_country,specialty_description,description_flag,medicare_prvdr_enroll_status
nppes_provider_last_org_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
SMITH,279,196,20,46,2,1,254,73,71,120,201,1,1,46,2,3
JOHNSON,156,124,20,39,2,1,144,48,55,90,125,1,1,34,2,3
JONES,152,120,21,39,2,1,146,47,54,90,122,1,1,28,2,3
WILLIAMS,137,104,20,39,2,1,125,47,49,75,112,1,1,32,2,2
MILLER,106,81,20,29,2,1,97,42,37,58,84,1,1,29,2,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HELDERMAN,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
HELLERVIK,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1
HELLGREN,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1
HELLMANN,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1


For much more information about SQLAlchemy and to see a more “Pythonic” way to execute queries, see Introduction to Databases in Python: https://www.datacamp.com/courses/introduction-to-relational-databases-in-python

In [17]:
rx_query = 'SELECT * FROM prescription'

In [18]:
with engine.connect() as connection:
    rx = pd.read_sql(text(rx_query), con = connection)

rx.head()

Unnamed: 0,npi,drug_name,bene_count,total_claim_count,total_30_day_fill_count,total_day_supply,total_drug_cost,bene_count_ge65,bene_count_ge65_suppress_flag,total_claim_count_ge65,ge65_suppress_flag,total_30_day_fill_count_ge65,total_day_supply_ge65,total_drug_cost_ge65
0,1427076000.0,RALOXIFENE HCL,,18.0,28.0,840.0,1009.66,,*,18.0,,28.0,840.0,1009.66
1,1003858000.0,GLIMEPIRIDE,,12.0,16.0,480.0,270.86,,*,,*,,,
2,1184627000.0,TAMSULOSIN HCL,,14.0,24.0,698.0,353.62,,#,,#,,,
3,1306111000.0,SPIRIVA,,13.0,13.0,390.0,4783.28,,*,,*,,,
4,1285658000.0,SPIRIVA,,13.0,13.0,390.0,4855.95,,#,,#,,,


In [19]:
drug_query = 'SELECT * FROM drug'

In [20]:
with engine.connect() as connection:
    drug_df = pd.read_sql(text(drug_query), con = connection)

drug_df.head()

Unnamed: 0,drug_name,generic_name,opioid_drug_flag,long_acting_opioid_drug_flag,antibiotic_drug_flag,antipsychotic_drug_flag
0,1ST TIER UNIFINE PENTIPS,"PEN NEEDLE, DIABETIC",N,N,N,N
1,1ST TIER UNIFINE PENTIPS PLUS,"PEN NEEDLE, DIABETIC",N,N,N,N
2,ABACAVIR,ABACAVIR SULFATE,N,N,N,N
3,ABACAVIR-LAMIVUDINE,ABACAVIR SULFATE/LAMIVUDINE,N,N,N,N
4,ABACAVIR-LAMIVUDINE-ZIDOVUDINE,ABACAVIR/LAMIVUDINE/ZIDOVUDINE,N,N,N,N


In [21]:
zip_query = 'SELECT * FROM zip_fips'

In [22]:
with engine.connect() as connection:
    zips = pd.read_sql(text(zip_query), con = connection)

zips.head()

Unnamed: 0,zip,fipscounty,res_ratio,bus_ratio,oth_ratio,tot_ratio
0,501,36103,0.0,1.0,0.0,1.0
1,601,72113,0.160724,0.20098,0.128834,0.1625
2,601,72001,0.839276,0.79902,0.871166,0.8375
3,602,72003,1.0,0.9988,1.0,0.999919
4,602,72005,0.0,0.0012,0.0,8.1e-05


In [23]:
cty_query = 'SELECT * FROM fips_county'

In [24]:
with engine.connect() as connection:
    cty_df = pd.read_sql(text(cty_query), con = connection)

cty_df.head()

Unnamed: 0,county,state,fipscounty,fipsstate
0,AUTAUGA,AL,1001,1
1,BALDWIN,AL,1003,1
2,BARBOUR,AL,1005,1
3,BIBB,AL,1007,1
4,BLOUNT,AL,1009,1


In [25]:
true_zip = zips.groupby(['fipscounty'])['tot_ratio'].max().reset_index()
true_zip

Unnamed: 0,fipscounty,tot_ratio
0,01001,1.000000
1,01003,1.000000
2,01005,1.000000
3,01007,1.000000
4,01009,1.000000
...,...,...
3222,72151,0.998907
3223,72153,0.994599
3224,78010,1.000000
3225,78020,1.000000


In [44]:
true_zip2 = pd.merge(true_zip, zips, on = 'fipscounty', how = 'inner')
true_zip2

Unnamed: 0,fipscounty,tot_ratio_x,zip,res_ratio,bus_ratio,oth_ratio,tot_ratio_y
0,01001,1.0,36003,1.000000,1.000000,1.000000,1.000000
1,01001,1.0,36006,0.711048,0.529412,0.555556,0.704918
2,01001,1.0,36008,1.000000,1.000000,0.000000,1.000000
3,01001,1.0,36022,0.366556,0.242991,0.350000,0.364454
4,01001,1.0,36051,0.652498,0.800000,1.000000,0.660445
...,...,...,...,...,...,...,...
54176,78010,1.0,00820,1.000000,1.000000,1.000000,1.000000
54177,78010,1.0,00840,1.000000,1.000000,1.000000,1.000000
54178,78010,1.0,00850,1.000000,1.000000,1.000000,1.000000
54179,78020,1.0,00830,1.000000,1.000000,1.000000,1.000000


In [None]:
zips.drop_duplicates(subset='A', keep="last")


In [46]:
cbsa_query = 'SELECT * FROM cbsa'

In [48]:
with engine.connect() as connection:
    cbsa = pd.read_sql(text(cbsa_query), con = connection)

cbsa.head()

Unnamed: 0,fipscounty,cbsa,cbsaname
0,1001,33860,"Montgomery, AL"
1,1003,19300,"Daphne-Fairhope-Foley, AL"
2,1007,13820,"Birmingham-Hoover, AL"
3,1009,13820,"Birmingham-Hoover, AL"
4,1015,11500,"Anniston-Oxford-Jacksonville, AL"


In [50]:
od_query = 'SELECT * FROM overdose_deaths'

In [52]:
with engine.connect() as connection:
    od_df = pd.read_sql(text(od_query), con = connection)

od_df.head()

Unnamed: 0,overdose_deaths,year,fipscounty
0,135,2015,47157
1,150,2016,47157
2,159,2017,47157
3,123,2018,47157
4,122,2015,47093


In [54]:
df1 = pd.merge(people, rx, on = 'npi')
df1

Unnamed: 0,npi,nppes_provider_last_org_name,nppes_provider_first_name,nppes_provider_mi,nppes_credentials,nppes_provider_gender,nppes_entity_code,nppes_provider_street1,nppes_provider_street2,nppes_provider_city,...,total_30_day_fill_count,total_day_supply,total_drug_cost,bene_count_ge65,bene_count_ge65_suppress_flag,total_claim_count_ge65,ge65_suppress_flag,total_30_day_fill_count_ge65,total_day_supply_ge65,total_drug_cost_ge65
0,1.003000e+09,BLAKEMORE,ROSIE,K,FNP,F,I,TENNESSEE PRISON FOR WOMEN,3881 STEWARTS LANE,NASHVILLE,...,34.0,620.0,383.12,,*,32.0,,34.0,620.0,383.12
1,1.003000e+09,BLAKEMORE,ROSIE,K,FNP,F,I,TENNESSEE PRISON FOR WOMEN,3881 STEWARTS LANE,NASHVILLE,...,32.0,852.0,276.87,,*,11.0,,21.0,522.0,163.02
2,1.003012e+09,CUDZILO,COREY,,M.D.,M,I,2240 SUTHERLAND AVE,SUITE 103,KNOXVILLE,...,30.0,900.0,13195.05,,*,30.0,,30.0,900.0,13195.05
3,1.003012e+09,CUDZILO,COREY,,M.D.,M,I,2240 SUTHERLAND AVE,SUITE 103,KNOXVILLE,...,13.0,359.0,252.30,,*,,*,,,
4,1.003012e+09,CUDZILO,COREY,,M.D.,M,I,2240 SUTHERLAND AVE,SUITE 103,KNOXVILLE,...,29.0,870.0,10602.62,,*,27.0,,29.0,870.0,10602.62
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
656053,1.992996e+09,GILES,WESLEY,H,MD,M,I,979 E 3RD ST STE 300,,CHATTANOOGA,...,25.0,99.0,151.96,,#,,#,,,
656054,1.992996e+09,GILES,WESLEY,H,MD,M,I,979 E 3RD ST STE 300,,CHATTANOOGA,...,133.0,3990.0,1508.74,,#,89.0,,97.0,2910.0,1010.50
656055,1.993000e+09,THOMAS,SHELIA,K,"RN, APN",F,I,6266 POPLAR AVE,,MEMPHIS,...,42.0,1230.0,782.27,,*,19.0,,19.0,570.0,292.28
656056,1.993000e+09,THOMAS,SHELIA,K,"RN, APN",F,I,6266 POPLAR AVE,,MEMPHIS,...,12.0,360.0,474.10,0.0,,0.0,,0.0,0.0,0.00


In [56]:
people.shape

(25050, 17)

In [58]:
rx.shape

(656058, 14)

In [60]:
df2 = pd.merge(df1, drug_df, on = 'drug_name', how = 'left')
df2

Unnamed: 0,npi,nppes_provider_last_org_name,nppes_provider_first_name,nppes_provider_mi,nppes_credentials,nppes_provider_gender,nppes_entity_code,nppes_provider_street1,nppes_provider_street2,nppes_provider_city,...,total_claim_count_ge65,ge65_suppress_flag,total_30_day_fill_count_ge65,total_day_supply_ge65,total_drug_cost_ge65,generic_name,opioid_drug_flag,long_acting_opioid_drug_flag,antibiotic_drug_flag,antipsychotic_drug_flag
0,1.003000e+09,BLAKEMORE,ROSIE,K,FNP,F,I,TENNESSEE PRISON FOR WOMEN,3881 STEWARTS LANE,NASHVILLE,...,32.0,,34.0,620.0,383.12,CALCITRIOL,N,N,N,N
1,1.003000e+09,BLAKEMORE,ROSIE,K,FNP,F,I,TENNESSEE PRISON FOR WOMEN,3881 STEWARTS LANE,NASHVILLE,...,11.0,,21.0,522.0,163.02,ALLOPURINOL,N,N,N,N
2,1.003012e+09,CUDZILO,COREY,,M.D.,M,I,2240 SUTHERLAND AVE,SUITE 103,KNOXVILLE,...,30.0,,30.0,900.0,13195.05,FLUTICASONE/SALMETEROL,N,N,N,N
3,1.003012e+09,CUDZILO,COREY,,M.D.,M,I,2240 SUTHERLAND AVE,SUITE 103,KNOXVILLE,...,,*,,,,AZITHROMYCIN,N,N,Y,N
4,1.003012e+09,CUDZILO,COREY,,M.D.,M,I,2240 SUTHERLAND AVE,SUITE 103,KNOXVILLE,...,27.0,,29.0,870.0,10602.62,TIOTROPIUM BROMIDE,N,N,N,N
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
705010,1.992996e+09,GILES,WESLEY,H,MD,M,I,979 E 3RD ST STE 300,,CHATTANOOGA,...,,#,,,,HYDROCODONE/ACETAMINOPHEN,Y,N,N,N
705011,1.992996e+09,GILES,WESLEY,H,MD,M,I,979 E 3RD ST STE 300,,CHATTANOOGA,...,89.0,,97.0,2910.0,1010.50,LEVOTHYROXINE SODIUM,N,N,N,N
705012,1.993000e+09,THOMAS,SHELIA,K,"RN, APN",F,I,6266 POPLAR AVE,,MEMPHIS,...,19.0,,19.0,570.0,292.28,HYDROCODONE/ACETAMINOPHEN,Y,N,N,N
705013,1.993000e+09,THOMAS,SHELIA,K,"RN, APN",F,I,6266 POPLAR AVE,,MEMPHIS,...,0.0,,0.0,0.0,0.00,OXYCODONE HCL/ACETAMINOPHEN,Y,N,N,N
