# Connect to PostgreSQL database

An database called *analytics* exists. Connect to *analytics*


1. Install psycopg2
    - conda install -c anaconda psycopg2
    - pip install psycopg2

2. Store secrets (e.g. JSON object)

3. Create a database connection 

4. Create cursor


In [1]:
import psycopg2
import pandas as pd
from aws_secrets import get_secret

In [2]:
def create_rw_conn(secrets):
    # pass along secrets to pyscopg2
    ENDPOINT = secrets['ENDPOINT']
    PORT = secrets['PORT']
    USER = secrets['USER']
    PASSWORD = secrets['PASSWORD']
    DATABASE = secrets['DATABASE']
    
    # create connection string    
    conn = psycopg2.connect(host=ENDPOINT, port=PORT, user=USER, 
        database=DATABASE, password=PASSWORD, sslmode='prefer', 
        sslrootcert="[full path]rds-combined-ca-bundle.pem")
    return conn



In [3]:
# connect using a read/write user
secrets = get_secret()
conn = create_rw_conn(secrets=secrets)

In [4]:
# Open a cursor to perform database operations
cur = conn.cursor()

In [5]:
# table: analytics
# schema: texas
# table: weather
cur.execute("SELECT * FROM texas.weather_county;")

In [6]:
cur.fetchmany(3) # fetch 3 records from the query
# cur.fetchall()
# cur.fetchone()

[(1,
  '48225',
  'Houston',
  Decimal('71'),
  True,
  datetime.datetime(2022, 11, 25, 22, 20, 23, 632376),
  datetime.datetime(2022, 11, 25, 22, 20, 23, 632376)),
 (2,
  '48019',
  'Bandera',
  Decimal('71'),
  True,
  datetime.datetime(2022, 11, 25, 22, 20, 23, 632376),
  datetime.datetime(2022, 11, 25, 22, 20, 23, 632376)),
 (3,
  '48113',
  'Dallas',
  Decimal('71'),
  True,
  datetime.datetime(2022, 11, 25, 22, 20, 23, 632376),
  datetime.datetime(2022, 11, 25, 22, 20, 23, 632376))]

In [7]:
cur.description

(Column(name='id', type_code=23),
 Column(name='fips_code', type_code=1043),
 Column(name='county_name', type_code=1043),
 Column(name='temperature', type_code=1700),
 Column(name='fahrenheit', type_code=16),
 Column(name='updated_at', type_code=1114),
 Column(name='created_at', type_code=1114))

In [8]:
def fetch_results(sql, conn):
    cur = conn.cursor()
    cur.execute(sql)
    columns = [desc[0] for desc in cur.description]
    df = pd.DataFrame(cur.fetchall(), columns=columns)
    cur.close()
    return df

In [9]:
sql = "SELECT * FROM texas.weather_county;"
df = fetch_results(sql=sql, conn=conn)
df.head(10)

Unnamed: 0,id,fips_code,county_name,temperature,fahrenheit,updated_at,created_at
0,1,48225,Houston,71,True,2022-11-25 22:20:23.632376,2022-11-25 22:20:23.632376
1,2,48019,Bandera,71,True,2022-11-25 22:20:23.632376,2022-11-25 22:20:23.632376
2,3,48113,Dallas,71,True,2022-11-25 22:20:23.632376,2022-11-25 22:20:23.632376
3,4,48141,El Paso,71,True,2022-11-25 22:20:23.632376,2022-11-25 22:20:23.632376
