In [0]:
#import libraries
from pyspark.sql import SparkSession, Window
from pyspark.sql import functions as f
from delta.tables import DeltaTable

In [0]:
#create Spark session
spark = SparkSession.builder.appName('alpaca-batch').getOrCreate()
spark.getActiveSession()

In [0]:
%sql
--create database

CREATE DATABASE IF NOT EXISTS feature_store;

In [0]:
%sql
--dropping tables if they exists
--used only when rerunning the commands below which create CakeAll1 and CakeMA1
DROP TABLE IF EXISTS feature_store.CakeALL3;

DROP TABLE IF EXISTS feature_store.CakeMA3

In [0]:
%sql
--reading in "alpaca.bars" (silver) table using SQL and writing it to a new table called "CakeALL"
--CREATE OR REPLACE TABLE feature_store.CakeALL1 USING CSV
CREATE TABLE feature_store.CakeALL3 USING CSV

AS

SELECT open
  ,high
  ,low
  ,close
  ,symbol
  ,year(timestamp) AS Year
  ,date(timestamp) AS Date
  ,hour(timestamp) AS Time
  ,trade_count
  ,volume
  ,vwap

FROM alpaca.bars

WHERE symbol = 'CAKE'

ORDER BY year(timestamp) desc,
date(timestamp) desc,
hour(timestamp) desc

In [0]:
%sql
--creating 100 day moving average table

CREATE TABLE feature_store.CakeMA3 USING CSV

AS

SELECT Year
  ,Date
  ,Time
  ,Symbol
  ,AVG(Close)
OVER (
    ORDER BY Date
    ROWS BETWEEN 100 PRECEDING AND 100 FOLLOWING
--    ROWS BETWEEN 100 PRECEDING AND CURRENT ROW
  )MA100Day

FROM feature_store.CakeALL3

WHERE symbol = 'CAKE'

ORDER BY Year DESC,
  Date DESC,
  Time DESC

In [0]:
%sql
DROP TABLE IF EXISTS feature_store.CakeMain1

In [0]:
%sql
--merging silver table with moving-average table

CREATE TABLE feature_store.CakeMain1 USING CSV

AS

SELECT feature_store.CakeAll3.Year,
  feature_store.CakeAll3.Date,
  feature_store.CakeAll3.Time,
  feature_store.CakeAll3.Symbol,
  feature_store.CakeMA3.MA100Day
  
FROM feature_store.CakeAll3
  
INNER JOIN feature_store.CakeMA3
  ON feature_store.CakeAll3.Year = feature_store.CakeMA3.Year
  AND feature_store.CakeAll3.Date = feature_store.CakeMA3.Date
  AND feature_store.CakeAll3.Time = feature_store.CakeMA3.Time
  AND feature_store.CakeAll3.Symbol = feature_store.CakeMA3.Symbol

In [0]:
%sql
DROP TABLE IF EXISTS feature_store.MainLag1

In [0]:
%sql
--creating lag and lead features in main table
CREATE TABLE feature_store.MainLag1 USING CSV

AS

SELECT Year,
  Date,
  Time,
  Symbol,
  MA100Day,
  LAG(MA100Day,1) OVER (PARTITION BY symbol ORDER BY Date desc, Time desc) AS MA_minus1,
  LAG(MA100Day,2) OVER (PARTITION BY symbol ORDER BY Date desc, Time desc) AS MA_minus2,
  LAG(MA100Day,3) OVER (PARTITION BY symbol ORDER BY Date desc, Time desc) AS MA_minus3,
  LAG(MA100Day,4) OVER (PARTITION BY symbol ORDER BY Date desc, Time desc) AS MA_minus4,
  LAG(MA100Day,5) OVER (PARTITION BY symbol ORDER BY Date desc, Time desc) AS MA_minus5,
  LAG(MA100Day,6) OVER (PARTITION BY symbol ORDER BY Date desc, Time desc) AS MA_minus6,
  LEAD(MA100Day,1) OVER (PARTITION BY symbol ORDER BY Date desc, Time desc) AS MA_plus1,
  LEAD(MA100Day,2) OVER (PARTITION BY symbol ORDER BY Date desc, Time desc) AS MA_plus2,
  LEAD(MA100Day,3) OVER (PARTITION BY symbol ORDER BY Date desc, Time desc) AS MA_plus3,
  LEAD(MA100Day,4) OVER (PARTITION BY symbol ORDER BY Date desc, Time desc) AS MA_plus4,
  LEAD(MA100Day,5) OVER (PARTITION BY symbol ORDER BY Date desc, Time desc) AS MA_plus5,
  LEAD(MA100Day,6) OVER (PARTITION BY symbol ORDER BY Date desc, Time desc) AS MA_plus6

FROM feature_store.CakeMain1

ORDER BY Year DESC,
  Date DESC,
  Time DESC

In [0]:
%sql
--drop gold table if it exists

DROP TABLE IF EXISTS feature_store.gold_table

In [0]:
%sql
--creating gold table
CREATE TABLE feature_store.gold_table

(
Year INT,
Date DATE,
Time INT,
Symbol STRING,
MA100Day DOUBLE,
MA_minus1 DOUBLE,
MA_minus2 DOUBLE,
MA_minus3 DOUBLE,
MA_minus4 DOUBLE,
MA_minus5 DOUBLE,
MA_minus6 DOUBLE,
MA_plus1 DOUBLE,
MA_plus2 DOUBLE,
MA_plus3 DOUBLE,
MA_plus4 DOUBLE,
MA_plus5 DOUBLE,
MA_plus6 DOUBLE
)

USING DELTA

--TBLPROPERTIES(DELTA.enableChangeDataFeed = true)

LOCATION '/alpaca/barsNEW'

In [0]:
%sql
--writing data to gold table
INSERT OVERWRITE TABLE feature_store.gold_table
SELECT *
FROM feature_store.MainLag1

num_affected_rows,num_inserted_rows
2944,2944


In [0]:
%sql

select *

FROM feature_store.gold_table

Year,Date,Time,Symbol,MA100Day,MA_minus1,MA_minus2,MA_minus3,MA_minus4,MA_minus5,MA_minus6,MA_plus1,MA_plus2,MA_plus3,MA_plus4,MA_plus5,MA_plus6
2020,2020-12-31,21,CAKE,37.12094636363634,,,,,,,37.13352385321099,37.14864907407406,37.16508504672896,37.16881226415093,37.17261047619046,37.17523173076921
2020,2020-12-31,20,CAKE,37.13352385321099,37.12094636363634,,,,,,37.14864907407406,37.16508504672896,37.16881226415093,37.17261047619046,37.17523173076921,37.17809805825241
2020,2020-12-31,19,CAKE,37.14864907407406,37.13352385321099,37.12094636363634,,,,,37.16508504672896,37.16881226415093,37.17261047619046,37.17523173076921,37.17809805825241,37.18082450980391
2020,2020-12-31,18,CAKE,37.16508504672896,37.14864907407406,37.13352385321099,37.12094636363634,,,,37.16881226415093,37.17261047619046,37.17523173076921,37.17809805825241,37.18082450980391,37.18291287128712
2020,2020-12-31,17,CAKE,37.16881226415093,37.16508504672896,37.14864907407406,37.13352385321099,37.12094636363634,,,37.17261047619046,37.17523173076921,37.17809805825241,37.18082450980391,37.18291287128712,37.09364789915968
2020,2020-12-31,16,CAKE,37.17261047619046,37.16881226415093,37.16508504672896,37.14864907407406,37.13352385321099,37.12094636363634,,37.17523173076921,37.17809805825241,37.18082450980391,37.18291287128712,37.09364789915968,37.09189915254238
2020,2020-12-31,15,CAKE,37.17523173076921,37.17261047619046,37.16881226415093,37.16508504672896,37.14864907407406,37.13352385321099,37.12094636363634,37.17809805825241,37.18082450980391,37.18291287128712,37.09364789915968,37.09189915254238,37.08952222222223
2020,2020-12-31,14,CAKE,37.17809805825241,37.17523173076921,37.17261047619046,37.16881226415093,37.16508504672896,37.14864907407406,37.13352385321099,37.18082450980391,37.18291287128712,37.09364789915968,37.09189915254238,37.08952222222223,37.08693189655173
2020,2020-12-31,13,CAKE,37.18082450980391,37.17809805825241,37.17523173076921,37.17261047619046,37.16881226415093,37.16508504672896,37.14864907407406,37.18291287128712,37.09364789915968,37.09189915254238,37.08952222222223,37.08693189655173,37.08751391304348
2020,2020-12-31,0,CAKE,37.18291287128712,37.18082450980391,37.17809805825241,37.17523173076921,37.17261047619046,37.16881226415093,37.16508504672896,37.09364789915968,37.09189915254238,37.08952222222223,37.08693189655173,37.08751391304348,37.08867631578947
