In [1]:
from dotenv import dotenv_values
import pandas as pd
from sqlalchemy import create_engine, text
from sshtunnel import SSHTunnelForwarder

In [2]:
# Load credentials from .env file
envDict = dotenv_values("secrets/.env")

In [3]:
# Create a bind to forward connections on the local port to the mysql port on the server
server = SSHTunnelForwarder(
    # Host URL and Login
    envDict['SSH_HOST'],
    ssh_username = envDict['SSH_USERNAME'],
    # Private key for SSH connections
    ssh_pkey = envDict["SSH_PKEY_PATH"],
    ssh_private_key_password = envDict['SSH_PKEY_PASSWORD'],
    # Bind to mysql port on server
    remote_bind_address = (envDict["DB_REMOTE_REF_URL"], int(envDict["DB_REMOTE_REF_PORT"])),
    # Don't look for keys on the local machine
    allow_agent = False,
    host_pkey_directories = [],
)
server.start()

In [4]:
# Create the engine to connect to the database using the bound port
engine = create_engine('mysql+pymysql://{}:{}@{}:{}/{}'.format(envDict["DB_USERNAME"], envDict["DB_PASSWORD"], "127.0.0.1", server.local_bind_port, "hospital_price_transparency"))

In [5]:
# This wildcard match crashes locally, takes about 35-40 seconds on the server
with engine.connect() as connection:
    prices_df = pd.read_sql_query(text("SELECT * FROM prices WHERE code LIKE '%84206%'"), con=connection)

prices_df.head()

Unnamed: 0,code,npi_number,payer,price
0,84206,1003139775.0,CASH,631.02
1,84206,1003858408.0,CASH,200.0
2,84206,1003858408.0,CASH,200.0
3,84206,1003862053.0,CASH,113.0
4,84206,1003908443.0,CASH,51.0


In [8]:
# This is the list of unique codes that match the wildcard
uniques = prices_df["code"].unique()
print("Unique count: ", len(uniques))
print("Unique codes: ", uniques)

Unique count:  253
Unique codes:  ['084206' '084206,1' '84206' '84206,1' '84206,2' '84206,3' '84206,4'
 '84206,90' '84206-00' '84206-01' '84206-02' '84206-1' 'CPT 84206'
 'CPTÂ® 84206' 'CPTÂ® 84206,1' 'CPTÂ® 84206,10' 'CPTÂ® 84206,100'
 'CPTÂ® 84206,101' 'CPTÂ® 84206,102' 'CPTÂ® 84206,103' 'CPTÂ® 84206,104'
 'CPTÂ® 84206,105' 'CPTÂ® 84206,106' 'CPTÂ® 84206,107' 'CPTÂ® 84206,108'
 'CPTÂ® 84206,109' 'CPTÂ® 84206,11' 'CPTÂ® 84206,110' 'CPTÂ® 84206,111'
 'CPTÂ® 84206,112' 'CPTÂ® 84206,113' 'CPTÂ® 84206,114' 'CPTÂ® 84206,115'
 'CPTÂ® 84206,116' 'CPTÂ® 84206,117' 'CPTÂ® 84206,118' 'CPTÂ® 84206,119'
 'CPTÂ® 84206,12' 'CPTÂ® 84206,120' 'CPTÂ® 84206,121' 'CPTÂ® 84206,122'
 'CPTÂ® 84206,123' 'CPTÂ® 84206,124' 'CPTÂ® 84206,125' 'CPTÂ® 84206,126'
 'CPTÂ® 84206,127' 'CPTÂ® 84206,128' 'CPTÂ® 84206,129' 'CPTÂ® 84206,13'
 'CPTÂ® 84206,130' 'CPTÂ® 84206,131' 'CPTÂ® 84206,132' 'CPTÂ® 84206,133'
 'CPTÂ® 84206,134' 'CPTÂ® 84206,135' 'CPTÂ® 84206,136' 'CPTÂ® 84206,137'
 'CPTÂ® 84206,138' 'CPTÂ® 84206,139' 