In [1]:
#import dependencies 
import pandas as pd 
import requests 
from sqlalchemy import create_engine
import psycopg2
import config 

# Data 

In [20]:
#get sample of data -1000 rows
endpoint = "https://data.sfgov.org/resource/imvp-dq3v.json?$limit=1000"
r = requests.get(url=endpoint)

In [28]:
#convert to datframe 
data_df = pd.DataFrame(r.json())

In [29]:
data_df.shape

(1000, 8)

In [30]:
#review dataframe 
data_df.head()

Unnamed: 0,transmission_datetime,post_id,street_block,payment_type,session_start_dt,session_end_dt,meter_event_type,gross_paid_amt
0,135482724_4_09302017104221,352-05200,CASTRO ST 500,CREDIT CARD,2017-09-30T10:42:21.000,2017-09-30T11:02:21.000,NS,0.75
1,135482725_4_09302017104224,418-08670,FOLSOM ST 800,CREDIT CARD,2017-09-30T10:42:24.000,2017-09-30T12:42:17.000,NS,5.54
2,135482726_4_09302017104223,568-45310,MISSION ST 4500,CASH,2017-09-30T10:42:23.000,2017-09-30T11:26:07.000,NS,0.75
3,135482727_4_09302017104226,350-24090,CALIFORNIA ST 2400,CASH,2017-09-30T10:42:26.000,2017-09-30T11:27:56.000,AT,0.15
4,135482728_4_09302017104226,226-34480,26TH ST 3400,CASH,2017-09-30T10:42:26.000,2017-09-30T11:42:26.000,NS,2.25


### Review Dimensions that can be normailized 

In [39]:
data_df.meter_event_type.unique()

array(['NS', 'AT'], dtype=object)

In [67]:
len(data_df.street_block.unique())

534

In [64]:
pd.DataFrame(data_df.street_block.unique())

Unnamed: 0,0
0,CASTRO ST 500
1,FOLSOM ST 800
2,MISSION ST 4500
3,CALIFORNIA ST 2400
4,26TH ST 3400
...,...
529,TREAT AVE 1100
530,03RD ST 700
531,BAY ST 400
532,POLK ST 800


# Database Design 

In [2]:
#connect to postgres database 
conn = psycopg2.connect(database="donniedata", user=config.db_user, password=config.db_password, sslmode="disable")
cur = conn.cursor();
conn.autocommit = True

## Tables

In [78]:
#create transaction fact table 
sql = '''
CREATE TABLE sf_ticket_trans.fact_transactions ( 
transmission_datetime TEXT,
payment_type_id INTEGER,
street_block_id INTEGER,
post_id TEXT,
meter_event_type TEXT,
gross_paid_amt DECIMAL,
session_start_dt TIMESTAMP,
session_end_date TIMESTAMP
)
'''

cur.execute(sql)

In [80]:
#create payment type dimension table 
sql = '''
CREATE TABLE sf_ticket_trans.dim_payment ( 
payment_type_id INTEGER,
payment_type TEXT 
)
'''

cur.execute(sql)

In [17]:
#create street dimension table 
sql = '''
CREATE TABLE sf_ticket_trans.dim_street( 
street_block_id INTEGER,
street_block TEXT
)
'''

cur.execute(sql)

In [None]:
ALTER TABLE sf_ticket_trans.dim_street
ALTER COLUMN street_block_id TYPE IDENTITY;
  

In [15]:
#update dimension tables id column to identy - auto incremting and cant be isnerted on 
sql = '''

ALTER TABLE sf_ticket_trans.dim_payment
ALTER COLUMN payment_type_id ADD GENERATED ALWAYS AS IDENTITY;
  
ALTER TABLE sf_ticket_trans.dim_street
ALTER COLUMN street_block_id ADD GENERATED ALWAYS AS IDENTITY;
  
'''
cur.execute(sql)

## Constriants 

In [129]:
#add constraints to tables 
#transaction 
trans_constraint = '''
ALTER TABLE sf_ticket_trans.fact_transactions
ADD PRIMARY KEY (transmission_datetime),

ADD CONSTRAINT fk_streetblockid FOREIGN KEY (street_block_id) 
    REFERENCES sf_ticket_trans.dim_street (street_block_id);

'''



cur.execute(trans_constraint)

In [114]:
#dim_street
street_constraint = '''

ALTER TABLE sf_ticket_trans.dim_street
ADD PRIMARY KEY (street_block_id),
ADD CONSTRAINT unique_block UNIQUE (street_block)

'''

cur.execute(street_constraint)

In [127]:
#dim_payment 
payment_constraint = '''
ALTER TABLE sf_ticket_trans.dim_payment
ADD PRIMARY KEY (payment_type_id),
ADD CONSTRAINT unique_payment_type UNIQUE (payment_type)
'''

cur.execute(sql)

## Review 

In [94]:
#check tables 
sql = '''
select table_schema, table_name from information_schema.tables where table_schema = 'sf_ticket_trans'
'''
cur.execute(sql)
df = pd.DataFrame(cur.fetchall())
df.columns = [i[0] for i in cur.description]
df

Unnamed: 0,table_schema,table_name
0,sf_ticket_trans,fact_transactions
1,sf_ticket_trans,dim_street
2,sf_ticket_trans,dim_payment


In [134]:
#review constraints 
sql = '''
SELECT * FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS 
WHERE constraint_schema = 'sf_ticket_trans' ORDER BY  table_name
'''
cur.execute(sql)
df = pd.DataFrame(cur.fetchall())
df.columns = [i[0] for i in cur.description]
df

Unnamed: 0,constraint_catalog,constraint_schema,constraint_name,table_catalog,table_schema,table_name,constraint_type,is_deferrable,initially_deferred,enforced
0,donniedata,sf_ticket_trans,145793_145824_1_not_null,donniedata,sf_ticket_trans,dim_payment,CHECK,NO,NO,YES
1,donniedata,sf_ticket_trans,unique_payment_type,donniedata,sf_ticket_trans,dim_payment,UNIQUE,NO,NO,YES
2,donniedata,sf_ticket_trans,dim_payment_pkey,donniedata,sf_ticket_trans,dim_payment,PRIMARY KEY,NO,NO,YES
3,donniedata,sf_ticket_trans,145793_145818_1_not_null,donniedata,sf_ticket_trans,dim_street,CHECK,NO,NO,YES
4,donniedata,sf_ticket_trans,dim_street_pkey,donniedata,sf_ticket_trans,dim_street,PRIMARY KEY,NO,NO,YES
5,donniedata,sf_ticket_trans,unique_block,donniedata,sf_ticket_trans,dim_street,UNIQUE,NO,NO,YES
6,donniedata,sf_ticket_trans,145793_145812_1_not_null,donniedata,sf_ticket_trans,fact_transactions,CHECK,NO,NO,YES
7,donniedata,sf_ticket_trans,fact_transactions_pkey,donniedata,sf_ticket_trans,fact_transactions,PRIMARY KEY,NO,NO,YES
8,donniedata,sf_ticket_trans,fk_streetblockid,donniedata,sf_ticket_trans,fact_transactions,FOREIGN KEY,NO,NO,YES
