In [1]:
#import dependencies 
import pandas as pd 
import requests 
from sqlalchemy import create_engine
import psycopg2
import config 

In [2]:
#connect to postgres database 
conn = psycopg2.connect(database="donniedata", user=config.db_user, password=config.db_password, sslmode="disable")
cur = conn.cursor();
conn.autocommit = True

In [3]:
#get sample of data -1000 rows
endpoint = "https://data.sfgov.org/resource/imvp-dq3v.json?$limit=1000"
r = requests.get(url=endpoint)

In [295]:
data = r.json()

In [297]:
data[0:2]

[{'transmission_datetime': '196158860_4_06092020073616',
  'post_id': '700-14150',
  'street_block': 'VALENCIA ST 1400',
  'payment_type': 'CASH',
  'session_start_dt': '2020-06-09T07:36:16.000',
  'session_end_dt': '2020-06-09T10:36:00.000',
  'meter_event_type': 'NS',
  'gross_paid_amt': '0.8'},
 {'transmission_datetime': '196158861_4_06092020073722',
  'post_id': '471-07180',
  'street_block': 'HARRISON ST 700',
  'payment_type': 'CREDIT CARD',
  'session_start_dt': '2020-06-09T07:37:22.000',
  'session_end_dt': '2020-06-09T17:30:10.000',
  'meter_event_type': 'NS',
  'gross_paid_amt': '4.25'}]

## payment table 

In [299]:
#insert data into street dimension
sql = '''
DO $$
BEGIN 
IF NOT EXISTS (select 1 from sf_ticket_trans.dim_payment where payment_type = %(payment)s) 
THEN INSERT INTO sf_ticket_trans.dim_payment (payment_type) VALUES (%(payment)s);
END IF;
END;
$$ 

'''

#refactored execution option to irate through json object itself 
[cur.execute(sql, {'payment':data[i]['payment_type']}) for i in list(range(len(data)))];
    

In [302]:
#data inserstion test succesful wiht only unqiue values being added
cur.execute('select count(*) from sf_ticket_trans.dim_payment')
print('Unique values inserted:', cur.fetchall()[0][0])

payment_type_list = [data[i]['payment_type'] for i in list(range(len(data)))]
print('Unique values from data pull:', len(set(payment_type_list)))

Unique values inserted: 3
Unique values from data pull: 3


## Streets data 

In [277]:
#create street list 
street_list = [data[i]['street_block'] for i in list(range(len(data)))]

In [279]:
#insert data into street dimension
sql = '''
DO $$
BEGIN 
IF NOT EXISTS (select 1 from sf_ticket_trans.dim_street where street_block = %(street)s) 
THEN INSERT INTO sf_ticket_trans.dim_street (street_block) VALUES (%(street)s);
END IF;
END;
$$ 

'''
    
[cur.execute(sql, {'street':i}) for i in street_list];
    

In [285]:
#data inserstion test succesful wiht only unqiue values being added
cur.execute('select count(*) from sf_ticket_trans.dim_street')
print('Unique values inserted:', cur.fetchall()[0][0])
print('Unique values from data pull:', len(set(street_list)))

Unique values inserted: 491
Unique values from data pull: 491


## fact table 

In [291]:
#columns 
[i for i in data[0].keys()]

['transmission_datetime',
 'post_id',
 'street_block',
 'payment_type',
 'session_start_dt',
 'session_end_dt',
 'meter_event_type',
 'gross_paid_amt']

In [292]:
#query to get street_id and insert 
#subquery to select id where value= lookup-value and insert with new data
sql ='''

INSERT INTO sf_ticket_trans.fact_transactions (
    transmission_datetime ,
    (SELECT payment_type_id where payment_type = %(payment_type)s) ,
    street_block_id ,
    post_id ,
    meter_event_type ,
    gross_paid_amt ,
    session_start_dt ,
    session_end_date 
    )
    VALUES (
    %(transmission_datetime)s ,
    (SELECT street_block_id where street_block = %(street)s) ,
    %(street_block_id)s ,
    %(post_id)s ,
    %(meter_event_type)s ,
    %(gross_paid_amt)s ,
    %(session_start_dt)s ,
    %(session_end_date)s ,
    )

'''
