# Connect to PostgreSQL database

An database called *analytics* exists. Connect to *analytics*


1. Install psycopg2
    - conda install -c anaconda psycopg2
    - pip install psycopg2

2. Store secrets (e.g. JSON object)

3. Create a database connection 

4. Create cursor


In [15]:
import psycopg2
import pandas as pd
from aws_secrets import get_secret

In [16]:
def create_rw_conn(secrets):
    # pass along secrets to pyscopg2
    ENDPOINT = secrets['ENDPOINT']
    PORT = secrets['PORT']
    USER = secrets['USER']
    PASSWORD = secrets['PASSWORD']
    DATABASE = secrets['DATABASE']
    
    # create connection string    
    conn = psycopg2.connect(host=ENDPOINT, port=PORT, user=USER, 
        database=DATABASE, password=PASSWORD, sslmode='prefer', 
        sslrootcert="[full path]rds-combined-ca-bundle.pem")
    return conn



In [17]:
# connect using a read/write user
secrets = get_secret()
conn = create_rw_conn(secrets=secrets)

In [18]:
# Open a cursor to perform database operations
cur = conn.cursor()

In [19]:
# table: analytics
# schema: texas
# table: weather
cur.execute("SELECT * FROM texas.weather_county;")

In [20]:
cur.fetchmany(3) # fetch 3 records from the query
# cur.fetchall()
# cur.fetchone()

[(1,
  '48225',
  'Houston',
  Decimal('69'),
  True,
  datetime.datetime(2022, 11, 25, 5, 19, 15, 914938),
  datetime.datetime(2022, 11, 25, 5, 19, 15, 914938)),
 (2,
  '48019',
  'Bandera',
  Decimal('71'),
  True,
  datetime.datetime(2022, 11, 25, 5, 19, 15, 914938),
  datetime.datetime(2022, 11, 25, 5, 19, 15, 914938)),
 (3,
  '48113',
  'Dallas',
  Decimal('71'),
  True,
  datetime.datetime(2022, 11, 25, 5, 19, 15, 914938),
  datetime.datetime(2022, 11, 25, 5, 19, 15, 914938))]

In [21]:
def fetch_results(sql, conn):
    cur = conn.cursor()
    cur.execute(sql)
    columns = [desc[0] for desc in cur.description]
    df = pd.DataFrame(cur.fetchall(), columns=columns)
    cur.close()
    return df

In [22]:
sql = "SELECT * FROM texas.weather_county;"
df = fetch_results(sql=sql, conn=conn)
df.head(10)

Unnamed: 0,id,fips_code,county_name,temperature,fahrenheit,updated_at,created_at
0,1,48225,Houston,69,True,2022-11-25 05:19:15.914938,2022-11-25 05:19:15.914938
1,2,48019,Bandera,71,True,2022-11-25 05:19:15.914938,2022-11-25 05:19:15.914938
2,3,48113,Dallas,71,True,2022-11-25 05:19:15.914938,2022-11-25 05:19:15.914938
3,4,48141,El Paso,71,True,2022-11-25 05:19:15.914938,2022-11-25 05:19:15.914938


In [10]:
df['temperature'].describe()

count      4
unique     2
top       71
freq       3
Name: temperature, dtype: object