In [38]:
import requests
# get alpaca api keys using this guide: https://alpaca.markets/docs/market-data/getting-started/#creating-an-alpaca-account-and-finding-your-api-keys
from secrets_config import api_key_id, api_secret_key 
import pandas as pd

In [39]:
# docs: https://alpaca.markets/docs/api-references/market-data-api/stock-pricing-data/historical/

"""
request data with the following parameters: 
- stock: tesla 
- start date: 01/01/2020
- end date: 02/01/2020
"""
stock_ticker = "tsla" # tlsa maps to tesla
base_url = f"https://data.alpaca.markets/v2/stocks/{stock_ticker}/trades"
start_time = "2020-01-01T00:00:00.00Z"
end_time = "2020-01-02T00:00:00.00Z"

"""
to authenticate to the api, you will need to use the APCA-API-KEY-ID and APCA-API-SECRET-KEY fields. 
for example:
    headers = {
        "APCA-API-KEY-ID": "<your_api_key_id>",
        "APCA-API-SECRET-KEY": "<your_api_secret_key>"
    }   

and in the request.get() method, you will have to use: 
    requests.get(url=url, params=params, headers=headers)
"""

response_data = []

params = {
    "start": start_time,
    "end": end_time
}

# auth example: https://alpaca.markets/docs/api-references/trading-api/
headers = {
    "APCA-API-KEY-ID": api_key_id,
    "APCA-API-SECRET-KEY": api_secret_key
}
response = requests.get(base_url, params=params, headers=headers)
if response.json().get("trades") is not None: 
    response_data.extend(response.json().get("trades"))

In [40]:
# read json data to a dataframe 
df_quotes = pd.json_normalize(data=response_data, meta=["symbol"])
# rename columns to more meaningful names
df_quotes_renamed = df_quotes.rename(columns={
    "t": "timestamp",
    "x": "exchange",
    "p": "price",
    "s": "size",
})

In [25]:
# select only 'timestamp', 'exchange', 'price', 'size'
df_quotes_selected = df_quotes_renamed[['timestamp', 'exchange', 'price', 'size']]

In [26]:
df_exchange_codes = pd.read_csv("data/exchange_codes.csv")

In [27]:
df_exchange = pd.merge(left=df_quotes_selected, right=df_exchange_codes, left_on="exchange", right_on="exchange_code").drop(columns=["exchange_code", "exchange"]).rename(columns={"exchange_name": "exchange"})
df_exchange.head()

Unnamed: 0,timestamp,price,size,exchange
0,2020-01-01T00:00:20.4997Z,418.93,60,Cboe EDGX
1,2020-01-01T00:00:38.5731Z,418.7,10,Cboe EDGX
2,2020-01-01T00:09:35.5596Z,418.75,20,Cboe EDGX
3,2020-01-01T00:10:13.0418Z,418.75,115,Cboe EDGX
4,2020-01-01T00:10:28.0684Z,418.75,200,Cboe EDGX


In [28]:
# remove duplicates by doing a group by on the keys: timestamp and exchange
# get the mean of price, and sum of size
df_ask_bid_exchange_de_dup = df_exchange.groupby(["timestamp", "exchange"]).agg({
    "price": "mean",
    "size": "sum",
}).reset_index()
df_ask_bid_exchange_de_dup.head()

Unnamed: 0,timestamp,exchange,price,size
0,2020-01-01T00:00:20.4997Z,Cboe EDGX,418.93,60
1,2020-01-01T00:00:38.5731Z,Cboe EDGX,418.7,10
2,2020-01-01T00:00:50.8222Z,NASDAQ Int,418.74,5
3,2020-01-01T00:06:12.9975Z,FINRA ADF,418.75,8
4,2020-01-01T00:09:35.5596Z,Cboe EDGX,418.75,20


Create new file with new timestamp of the ingestion start and end times

In [29]:
df_ask_bid_exchange_de_dup.to_parquet(f"data/exchange_data_{start_time}_{end_time}.parquet", index=False)

Upsert data into postgres database table using timestamp as the primary key

In [30]:
from sqlalchemy import create_engine, Table, Column, Integer, String, MetaData, Float # https://www.tutorialspoint.com/sqlalchemy/sqlalchemy_core_creating_table.htm
from sqlalchemy.engine import URL
from sqlalchemy.dialects import postgresql
from secrets_config import db_user, db_password, db_server_name, db_database_name
from sqlalchemy.schema import CreateTable 

In [31]:
# create connection to database 
connection_url = URL.create(
    drivername = "postgresql+pg8000", 
    username = db_user,
    password = db_password,
    host = db_server_name, 
    port = 5432,
    database = db_database_name, 
)

engine = create_engine(connection_url)


In [32]:
df_ask_bid_exchange_de_dup.columns

Index(['timestamp', 'exchange', 'price', 'size'], dtype='object')

In [35]:
df_ask_bid_exchange_de_dup

Unnamed: 0,timestamp,exchange,price,size
0,2020-01-01T00:00:20.4997Z,Cboe EDGX,418.93,60
1,2020-01-01T00:00:38.5731Z,Cboe EDGX,418.7,10
2,2020-01-01T00:00:50.8222Z,NASDAQ Int,418.74,5
3,2020-01-01T00:06:12.9975Z,FINRA ADF,418.75,8
4,2020-01-01T00:09:35.5596Z,Cboe EDGX,418.75,20
5,2020-01-01T00:10:13.0418Z,Cboe EDGX,418.75,115
6,2020-01-01T00:10:28.0684Z,Cboe EDGX,418.75,200
7,2020-01-01T00:10:59.0336Z,Cboe EDGX,418.75,100
8,2020-01-01T00:12:00.7345Z,FINRA ADF,418.75,10
9,2020-01-01T00:13:44.0227Z,Cboe EDGX,418.7,31


In [36]:
meta = MetaData()
stock_price_tesla_table = Table(
    "stock_price_tesla", meta, 
    Column("timestamp", String, primary_key=True),
    Column("exchange", String, primary_key=True),
    Column("price", Float),
    Column("size", Integer)
)
meta.create_all(engine) # creates table if it does not exist 


In [37]:
insert_statement = postgresql.insert(stock_price_tesla_table).values(df_ask_bid_exchange_de_dup.to_dict(orient='records'))
upsert_statement = insert_statement.on_conflict_do_update(
    index_elements=['timestamp', 'exchange'],
    set_={c.key: c for c in insert_statement.excluded if c.key not in ['timestamp','exchange']})
engine.execute(upsert_statement)

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7fbfb24cd640>