In [38]:
import pandas as pd
import requests
from secrets_config import api_key_id, api_secret_key 
import jinja2 as j2 

Extract data

In [39]:
# retrieve data for tesla between 2020-01-01 to 2020-01-02
# auth example: https://alpaca.markets/docs/api-references/trading-api/

stock_ticker = "tsla"
base_url = f"https://data.alpaca.markets/v2/stocks/{stock_ticker}/trades"
start_time = "2020-01-01T00:00:00.00Z"
end_time = "2020-01-02T00:00:00.00Z"

response_data = []

params = {
    "start": start_time,
    "end": end_time
}

headers = {
    "APCA-API-KEY-ID": api_key_id,
    "APCA-API-SECRET-KEY": api_secret_key
}
response = requests.get(base_url, params=params, headers=headers)
if response.json().get("trades") is not None: 
    response_data.extend(response.json().get("trades"))

In [40]:
# read trade data into a dataframe
df = pd.json_normalize(data=response_data, max_level=0)

In [41]:
# read exchange codes data into a dataframe 
df_exchange_codes = pd.read_csv("data/exchange_codes.csv")

Load data

In [42]:
# import libraries for sql 
from sqlalchemy import create_engine, Table, Column, Integer, String, MetaData, Float, JSON 
from sqlalchemy.engine import URL
from sqlalchemy.dialects import postgresql
from secrets_config import db_user, db_password, db_server_name, db_database_name

In [43]:
# create connection to database 
connection_url = URL.create(
    drivername = "postgresql+pg8000", 
    username = db_user,
    password = db_password,
    host = db_server_name, 
    port = 5432,
    database = db_database_name, 
)

engine = create_engine(connection_url)

In [44]:
# create meta object 
meta = MetaData()

# specify trade data table schema 
# see field definition here: https://alpaca.markets/docs/api-references/market-data-api/stock-pricing-data/historical/#trade 
trades_table = Table(
    "raw_trades", meta, 
    Column("i", Integer, primary_key=True),
    Column("t", String),
    Column("x", String),
    Column("p", Float),
    Column("s", Integer),
    Column("c", String),
    Column("z", String)
)

# specify exchange codes table schema 
exchange_codes_table = Table(
    "raw_exchange_codes", meta, 
    Column("exchange_code", String, primary_key=True), 
    Column("exchange_name", String)
)

# creates table if it does not exist 
meta.create_all(engine) 

In [45]:
# upsert data to trade table 
insert_statement = postgresql.insert(trades_table).values(df.to_dict(orient='records'))
upsert_statement = insert_statement.on_conflict_do_update(
    index_elements=['i'],
    set_={c.key: c for c in insert_statement.excluded if c.key not in ['i']})
engine.execute(upsert_statement)

# upsert data to exchange codes table 
insert_statement = postgresql.insert(exchange_codes_table).values(df_exchange_codes.to_dict(orient='records'))
upsert_statement = insert_statement.on_conflict_do_update(
    index_elements=['exchange_code'],
    set_={c.key: c for c in insert_statement.excluded if c.key not in ['exchange_code']})
engine.execute(upsert_statement)

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7fed848c7790>

Transform

In [46]:
import os 
import logging 

def build_model(model, engine, models_path="models")->bool:
    """
    Builds models with a matching file name in the models_path folder. 
    - `model`: the name of the model (without .sql)
    - `models_path`: the path to the models directory containing the sql files. defaults to `models`
    """
    logging.basicConfig(level=logging.INFO, format="[%(levelname)s][%(asctime)s]: %(message)s")
    
    if f"{model}.sql" in os.listdir(models_path):
        logging.info(f"Building model: {model}")
    
        # read sql contents into a variable 
        with open(f"{models_path}/{model}.sql") as f: 
            raw_sql = f.read()

        # parse sql using jinja 
        parsed_sql = j2.Template(raw_sql).render(target_table = model, engine=engine)

        # execute parsed sql 
        result = engine.execute(parsed_sql)
        logging.info(f"Successfully built model: {model}, rows inserted/updated: {result.rowcount}")
        return True 
    else: 
        logging.error(f"Could not find model: {model}")

In [47]:
# import TopologicalSorter
from graphlib import TopologicalSorter

In [48]:
# create a DAG of models using TopologicalSorter
ts = TopologicalSorter()
ts.add("staging_trades")
ts.add("staging_exchange_codes")
ts.add("serving_trades", "staging_trades", "staging_exchange_codes")
ts.add("serving_exchange_summary", "serving_trades")
dag = tuple(ts.static_order())
print(dag)

('staging_trades', 'staging_exchange_codes', 'serving_trades', 'serving_exchange_summary')


In [49]:
# execute each node in the dag in order using a for loop 
for node in dag: 
    build_model(model=node, engine=engine, models_path="models/")

[INFO][2022-07-24 00:12:30,125]: Building model: staging_trades
[INFO][2022-07-24 00:12:30,146]: Successfully built model: staging_trades, rows inserted/updated: 0
[INFO][2022-07-24 00:12:30,147]: Building model: staging_exchange_codes
[INFO][2022-07-24 00:12:30,161]: Successfully built model: staging_exchange_codes, rows inserted/updated: 22
[INFO][2022-07-24 00:12:30,162]: Building model: serving_trades
[INFO][2022-07-24 00:12:30,253]: Successfully built model: serving_trades, rows inserted/updated: 0
[INFO][2022-07-24 00:12:30,254]: Building model: serving_exchange_summary
[INFO][2022-07-24 00:12:30,265]: Successfully built model: serving_exchange_summary, rows inserted/updated: 0
