In [2]:
import os
import numpy as np
import pandas as pd
import json
import tensorflow as tf
from datetime import datetime, timedelta

from dotenv import load_dotenv

from alpaca.data.requests import StockBarsRequest
from alpaca.data.historical.stock import StockHistoricalDataClient
from alpaca.data.timeframe import TimeFrame, TimeFrameUnit

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.base import BaseEstimator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Reshape

import matplotlib.pyplot as plt

import hvplot.pandas

In [3]:
# Load .env file

load_dotenv()

True

In [4]:
# Set Alpaca API key and secret

ALPACA_API_KEY = os.getenv("ALPACA_API_KEY")
ALPACA_SECRET_KEY = os.getenv("ALPACA_API_SECRET")
client = StockHistoricalDataClient(ALPACA_API_KEY, ALPACA_SECRET_KEY)

# Create the Alpaca API object

timeframe = TimeFrame(1, TimeFrameUnit.Day)
symbol = 'SPY'
start = datetime.utcnow() - timedelta(days=3650)
end=datetime.utcnow() - timedelta(days=730)
request = StockBarsRequest(symbol_or_symbols=symbol, start=start, end=end, timeframe=timeframe)

In [5]:
source_df = client.get_stock_bars(request).df.tz_convert('America/New_York', level=1)

In [6]:
display(source_df.head())
display(source_df.tail())

Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close,volume,trade_count,vwap
symbol,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SPY,2016-01-04 00:00:00-05:00,200.49,201.03,198.59,201.0192,225903783.0,655489.0,200.656423
SPY,2016-01-05 00:00:00-05:00,201.405,201.9,200.05,201.36,112719152.0,418709.0,201.08428
SPY,2016-01-06 00:00:00-05:00,198.33,200.06,197.6,198.82,153948196.0,548386.0,198.95646
SPY,2016-01-07 00:00:00-05:00,195.33,197.44,193.59,194.05,216191953.0,796451.0,195.345911
SPY,2016-01-08 00:00:00-05:00,195.19,195.85,191.58,191.923,216105404.0,754102.0,193.644537


Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close,volume,trade_count,vwap
symbol,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SPY,2022-02-18 00:00:00-05:00,437.33,438.66,431.82,434.23,132619100.0,1134344.0,435.238076
SPY,2022-02-22 00:00:00-05:00,431.89,435.5,425.86,429.57,126971668.0,1213515.0,430.690333
SPY,2022-02-23 00:00:00-05:00,432.66,433.26,421.35,422.07,132864067.0,1166042.0,425.926191
SPY,2022-02-24 00:00:00-05:00,411.02,428.76,410.64,427.99,220033458.0,2316988.0,419.89052
SPY,2022-02-25 00:00:00-05:00,429.61,437.84,427.86,437.75,123640399.0,1140051.0,434.397245


In [7]:
bars_df = source_df.copy()
# pct_change is profit from last close
bars_df["pct_change"] = bars_df["close"].pct_change()
# signal for when we want to be in or out of a stock
#bars_df["signal"] = np.where(bars_df["pct_change"] > 0, 1.0, 0.0)
# reaction is the signal diff
#bars_df["reaction"] = bars_df["signal"].diff()
# action is if we could perfectly predict the next close
#bars_df["action"] = bars_df["reaction"].shift(-1)
# these values are the high, low, and open as a percentage of the current close
bars_df["high %"] = (bars_df["high"] - bars_df["close"])/bars_df["close"]
bars_df["low %"] = (bars_df["low"] - bars_df["close"])/bars_df["close"]
bars_df["open %"] = (bars_df["open"] - bars_df["close"])/bars_df["close"]

In [8]:
bars_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 1549 entries, ('SPY', Timestamp('2016-01-04 00:00:00-0500', tz='America/New_York')) to ('SPY', Timestamp('2022-02-25 00:00:00-0500', tz='America/New_York'))
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   open         1549 non-null   float64
 1   high         1549 non-null   float64
 2   low          1549 non-null   float64
 3   close        1549 non-null   float64
 4   volume       1549 non-null   float64
 5   trade_count  1549 non-null   float64
 6   vwap         1549 non-null   float64
 7   pct_change   1548 non-null   float64
 8   high %       1549 non-null   float64
 9   low %        1549 non-null   float64
 10  open %       1549 non-null   float64
dtypes: float64(11)
memory usage: 182.3+ KB


In [9]:
# Cleanup DF for model

bars_df = bars_df.droplevel(level=0).dropna()
bars_df.head()

Unnamed: 0_level_0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,open %
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2016-01-05 00:00:00-05:00,201.405,201.9,200.05,201.36,112719152.0,418709.0,201.08428,0.001695,0.002682,-0.006506,0.000223
2016-01-06 00:00:00-05:00,198.33,200.06,197.6,198.82,153948196.0,548386.0,198.95646,-0.012614,0.006237,-0.006136,-0.002465
2016-01-07 00:00:00-05:00,195.33,197.44,193.59,194.05,216191953.0,796451.0,195.345911,-0.023992,0.01747,-0.002371,0.006596
2016-01-08 00:00:00-05:00,195.19,195.85,191.58,191.923,216105404.0,754102.0,193.644537,-0.010961,0.020461,-0.001787,0.017022
2016-01-11 00:00:00-05:00,193.01,193.41,189.82,192.11,205368067.0,701548.0,191.757659,0.000974,0.006767,-0.01192,0.004685


In [10]:
# Normalize data set

scaler = StandardScaler()
bars_df_scaled = scaler.fit_transform(bars_df)


In [11]:
# Define constants

# input shape

input_shape = bars_df_scaled.shape[1]
latent_dim = 11
num_samples = bars_df_scaled.shape[0]
batch_size = 16


In [12]:
# Define generator (Sequential)

build_generator = Sequential([
    Dense(128,input_shape=(input_shape,), activation="relu"),
    Dense(256, activation="relu"),
    Dense(512, activation="relu"),
    Dense(input_shape, activation="sigmoid")
])

# Define descriminator

build_discriminator = Sequential([
    Dense(512, input_shape=(input_shape,), activation="relu"),
    Dense(256, activation="relu"),
    Dense(128, activation="relu"),
    Dense(1, activation="sigmoid")
])

2024-02-27 20:09:44.427752: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Max
2024-02-27 20:09:44.427777: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 64.00 GB
2024-02-27 20:09:44.427787: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 24.00 GB
2024-02-27 20:09:44.427847: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-02-27 20:09:44.427879: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [13]:
# Compile generator
build_generator.compile(loss = "mse", optimizer="adam")

# Compile discriminator
build_discriminator.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [14]:
# Combine models

# Confirm that discriminator weights are not training during generators training

build_discriminator.trainable=False

# Setup for generators data

z = tf.keras.Input(shape=(latent_dim,))

# Generated data by the generator

generated_data = build_generator(z)

# Discriminators verdict

validity = build_discriminator(generated_data)

In [15]:
# Define combined models

combined = tf.keras.Model(z, validity)
combined.compile(optimizer="adam", loss="mse", metrics=["accuracy"])

# Print summary of the combined model
combined.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 11)]              0         
                                                                 
 sequential (Sequential)     (None, 11)                171787    
                                                                 
 sequential_1 (Sequential)   (None, 1)                 170497    
                                                                 
Total params: 342284 (1.31 MB)
Trainable params: 171787 (671.04 KB)
Non-trainable params: 170497 (666.00 KB)
_________________________________________________________________


In [16]:
# Define training loop

epochs = 10000

generator_weights_path = "./generator_model.h5"
discriminator_weights_path = "./discriminator_model.h5"

for epoch in range(epochs):

    # Train discriminator
    # Sample real data
    idx = np.random.choice(num_samples, batch_size, replace=False)
    real_data = bars_df_scaled[idx]

 # Generate fake data (Random Walk noise)

    gaussian_noise = np.random.normal(0,1,size=(batch_size,input_shape))
    random_walk_noise = np.cumsum(gaussian_noise, axis=0)

    fake_data = build_generator.predict(random_walk_noise)

    # Train discriminator

    d_loss_real = build_discriminator.train_on_batch(real_data, np.ones((batch_size, 1)))
    d_loss_fake = build_discriminator.train_on_batch(fake_data, np.zeros((batch_size, 1)))
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
    
    # Train generator 

    noise = np.random.normal(0, 1, (batch_size, input_shape))
    g_loss = combined.train_on_batch(noise, np.ones((batch_size, 1)))
    
    # Print progress

    print(f"Epoch {epoch}, Discriminator Loss: {d_loss}, Generator Loss: {g_loss}")




2024-02-27 20:09:44.788122: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2024-02-27 20:09:44.988708: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 0, Discriminator Loss: [0.64417121 0.84375   ], Generator Loss: [0.42269283533096313, 0.0]


2024-02-27 20:09:45.391842: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 1, Discriminator Loss: [0.57947214 0.84375   ], Generator Loss: [0.5774932503700256, 0.0]
Epoch 2, Discriminator Loss: [0.6446152 0.84375  ], Generator Loss: [0.6719194054603577, 0.0]
Epoch 3, Discriminator Loss: [0.80646469 0.84375   ], Generator Loss: [0.6574541926383972, 0.0]
Epoch 4, Discriminator Loss: [0.62423594 0.8125    ], Generator Loss: [0.6067336797714233, 0.0]
Epoch 5, Discriminator Loss: [0.86669205 0.71875   ], Generator Loss: [0.5459598302841187, 0.0]
Epoch 6, Discriminator Loss: [0.39708297 0.9375    ], Generator Loss: [0.5187119245529175, 0.0]
Epoch 7, Discriminator Loss: [0.6797449 0.8125   ], Generator Loss: [0.4673958420753479, 0.0]
Epoch 8, Discriminator Loss: [0.56141844 0.84375   ], Generator Loss: [0.6007342338562012, 0.0]
Epoch 9, Discriminator Loss: [0.39011303 0.84375   ], Generator Loss: [0.7245146036148071, 0.0]
Epoch 10, Discriminator Loss: [0.49789628 0.8125    ], Generator Loss: [0.7508599758148193, 0.0]
Epoch 11, Discriminator Loss: [0.33678913 0

In [17]:
# Saving Model Weights

build_generator.save("generator_model.h5")
build_discriminator.save("discriminator_model.h5")

  saving_api.save_model(


In [18]:
# Create alpaca object for testing

timeframe = TimeFrame(1, TimeFrameUnit.Day)
symbol = 'SPY'
start = datetime.utcnow() - timedelta(days=730)
end=datetime.utcnow() - timedelta(days=1)
request = StockBarsRequest(symbol_or_symbols=symbol, start=start, end=end, timeframe=timeframe)

In [19]:
# Create df for testing

test_df = client.get_stock_bars(request).df.tz_convert('America/New_York', level=1)

# data preprocessing for testing

# pct_change is profit from last close
test_df["pct_change"] = test_df["close"].pct_change()
# signal for when we want to be in or out of a stock
#bars_df["signal"] = np.where(bars_df["pct_change"] > 0, 1.0, 0.0)
# reaction is the signal diff
#bars_df["reaction"] = bars_df["signal"].diff()
# action is if we could perfectly predict the next close
#bars_df["action"] = bars_df["reaction"].shift(-1)
# these values are the high, low, and open as a percentage of the current close
test_df["high %"] = (test_df["high"] - test_df["close"])/test_df["close"]
test_df["low %"] = (test_df["low"] - test_df["close"])/test_df["close"]
test_df["open %"] = (test_df["open"] - test_df["close"])/test_df["close"]

# set timestamp as index, drop nan

test_df = test_df.droplevel(level=0).dropna()

test_df.info()
display(test_df.head())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 500 entries, 2022-03-01 00:00:00-05:00 to 2024-02-26 00:00:00-05:00
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   open         500 non-null    float64
 1   high         500 non-null    float64
 2   low          500 non-null    float64
 3   close        500 non-null    float64
 4   volume       500 non-null    float64
 5   trade_count  500 non-null    float64
 6   vwap         500 non-null    float64
 7   pct_change   500 non-null    float64
 8   high %       500 non-null    float64
 9   low %        500 non-null    float64
 10  open %       500 non-null    float64
dtypes: float64(11)
memory usage: 46.9 KB


Unnamed: 0_level_0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,open %
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-03-01 00:00:00-05:00,435.04,437.17,427.11,429.98,139991559.0,1422070.0,431.644816,-0.01523,0.016722,-0.006675,0.011768
2022-03-02 00:00:00-05:00,432.37,439.72,431.57,437.89,120234911.0,1252759.0,436.251696,0.018396,0.004179,-0.014433,-0.012606
2022-03-03 00:00:00-05:00,440.47,441.11,433.8,435.71,105501865.0,1039162.0,436.908456,-0.004978,0.012394,-0.004384,0.010925
2022-03-04 00:00:00-05:00,431.75,433.37,427.88,432.17,113974262.0,1083494.0,430.920534,-0.008125,0.002777,-0.009927,-0.000972
2022-03-07 00:00:00-05:00,431.55,432.3018,419.36,419.43,138047941.0,1448623.0,423.968129,-0.029479,0.030689,-0.000167,0.028896


In [20]:
# Normalize test data set

test_df_scaled = scaler.fit_transform(test_df)

print(test_df_scaled)
print(len(test_df_scaled))

[[ 0.44109752  0.42846558  0.27715549 ...  1.49491728  0.08071015
   1.25560793]
 [ 0.35843155  0.50883098  0.41268044 ... -0.31876709 -1.15477737
  -1.27141897]
 [ 0.60921597  0.552638    0.48044292 ...  0.86905813  0.44556662
   1.16817748]
 ...
 [ 2.57648042  2.67617544  2.58381448 ... -0.64099764 -0.26213429
  -0.67744396]
 [ 2.73933548  2.72786142  2.70779246 ... -0.27388504  0.90848282
   0.32542412]
 [ 2.70930328  2.68436956  2.67011288 ... -0.13432385  1.10275172
   0.50885119]]
500


In [21]:
# Test generator against real data

#lists to store generated and timestamps

column_names=[
    "Predicted Open",
    "Predicted High",
    "Predicted Low",
    "Predicted Close",
    "Predicted Volume",
    "Predicted Trade Count",
    "Predicted VWAP",
    "Predicted pct_change",
    "Predicted high %",
    "Predicted low %",
    "Predicted open %"
    ]

# sliding window

window_size = 1

# init lists to store generated data with associated timestamps

timestamps = []
predictions = []

In [22]:
# shape of scaled test data

print(test_df_scaled.shape)

(500, 11)


In [23]:
predictions_df = pd.DataFrame(columns = column_names)

predictions_df[:] = 0

In [24]:
print(test_df.index)
print(window_size)

DatetimeIndex(['2022-03-01 00:00:00-05:00', '2022-03-02 00:00:00-05:00',
               '2022-03-03 00:00:00-05:00', '2022-03-04 00:00:00-05:00',
               '2022-03-07 00:00:00-05:00', '2022-03-08 00:00:00-05:00',
               '2022-03-09 00:00:00-05:00', '2022-03-10 00:00:00-05:00',
               '2022-03-11 00:00:00-05:00', '2022-03-14 00:00:00-04:00',
               ...
               '2024-02-12 00:00:00-05:00', '2024-02-13 00:00:00-05:00',
               '2024-02-14 00:00:00-05:00', '2024-02-15 00:00:00-05:00',
               '2024-02-16 00:00:00-05:00', '2024-02-20 00:00:00-05:00',
               '2024-02-21 00:00:00-05:00', '2024-02-22 00:00:00-05:00',
               '2024-02-23 00:00:00-05:00', '2024-02-26 00:00:00-05:00'],
              dtype='datetime64[ns, America/New_York]', name='timestamp', length=500, freq=None)
1


In [25]:
# Prediction loop

for i in range(0,len(test_df_scaled),window_size):
    print(i)
    # extract sliding window of real data

    input_data = test_df_scaled[i:i+window_size]

    # predict the next row using generator

    shaped_data = input_data.reshape(1,window_size,11)
    print(shaped_data)

    predicted_row = build_generator.predict(shaped_data)
    
    #predicted_row = build_generator.predict(input_data)

    #append generated row to list
    # predictions.append(predicted_row)

    # extract timestamp for the prediction

    if (i + window_size) < len(test_df):
        timestamp = test_df.index[i+window_size]
    else:
        timestamp = test_df.index[-1] + timedelta(days=1)
    
    print(timestamp)
    print(predicted_row.squeeze())
    predictions_df.loc[timestamp] = predicted_row.squeeze()
    print(predictions_df.head())
    

    # predictions_df = pd.concat([predictions_df,predicted_row],ignore_index=True)


0
[[[ 0.44109752  0.42846558  0.27715549  0.27821595  2.23585754
    3.68164804  0.33464607 -1.30236988  1.49491728  0.08071015
    1.25560793]]]
2022-03-02 00:00:00-05:00
[1.0000000e+00 1.0000000e+00 1.0000000e+00 1.0000000e+00 0.0000000e+00
 2.5489964e-37 1.0000000e+00 2.2469451e-35 0.0000000e+00 2.6530172e-13
 0.0000000e+00]
                           Predicted Open  Predicted High  Predicted Low  \
2022-03-02 00:00:00-05:00             1.0             1.0            1.0   

                           Predicted Close  Predicted Volume  \
2022-03-02 00:00:00-05:00              1.0               0.0   

                           Predicted Trade Count  Predicted VWAP  \
2022-03-02 00:00:00-05:00           2.548996e-37             1.0   

                           Predicted pct_change  Predicted high %  \
2022-03-02 00:00:00-05:00          2.246945e-35               0.0   

                           Predicted low %  Predicted open %  
2022-03-02 00:00:00-05:00     2.653017e-13       

2024-02-27 20:21:28.149872: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


2022-03-08 00:00:00-05:00
[1.0000000e+00 1.0000000e+00 1.0000000e+00 1.0000000e+00 0.0000000e+00
 0.0000000e+00 1.0000000e+00 4.1589005e-38 0.0000000e+00 9.9942327e-01
 0.0000000e+00]
                           Predicted Open  Predicted High  Predicted Low  \
2022-03-02 00:00:00-05:00             1.0             1.0            1.0   
2022-03-03 00:00:00-05:00             1.0             1.0            1.0   
2022-03-04 00:00:00-05:00             1.0             1.0            1.0   
2022-03-07 00:00:00-05:00             1.0             1.0            1.0   
2022-03-08 00:00:00-05:00             1.0             1.0            1.0   

                           Predicted Close  Predicted Volume  \
2022-03-02 00:00:00-05:00              1.0      0.000000e+00   
2022-03-03 00:00:00-05:00              1.0      0.000000e+00   
2022-03-04 00:00:00-05:00              1.0      7.022287e-30   
2022-03-07 00:00:00-05:00              1.0      1.102519e-18   
2022-03-08 00:00:00-05:00              

In [26]:
index = predictions_df.index

predictions_df_inverse_scaled = scaler.inverse_transform(predictions_df)
generated_data_df = pd.DataFrame(predictions_df_inverse_scaled, columns=column_names,index=index)
generated_data_df.head()



Unnamed: 0,Predicted Open,Predicted High,Predicted Low,Predicted Close,Predicted Volume,Predicted Trade Count,Predicted VWAP,Predicted pct_change,Predicted high %,Predicted low %,Predicted open %
2022-03-02 00:00:00-05:00,453.091797,455.30484,450.898132,453.371094,85294904.0,622406.625,453.127838,0.000367,0.006384,-0.007182,-0.000343
2022-03-03 00:00:00-05:00,453.091797,455.30484,450.898132,453.371094,85294904.0,622406.625,453.127838,0.000367,0.006384,-0.007182,-0.000343
2022-03-04 00:00:00-05:00,453.091797,455.30484,450.898132,453.371094,85294904.0,622406.625,453.127838,0.000367,0.006384,-0.007182,-0.000343
2022-03-07 00:00:00-05:00,453.091797,455.30484,450.898132,453.371094,85294904.0,622406.625,453.127838,0.000367,0.006384,-0.007182,-0.000343
2022-03-08 00:00:00-05:00,453.091797,455.30484,450.898132,453.371094,85294904.0,622406.625,453.127838,0.000367,0.006384,-0.000906,-0.000343


In [27]:
test_df.head()

Unnamed: 0_level_0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,open %
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-03-01 00:00:00-05:00,435.04,437.17,427.11,429.98,139991559.0,1422070.0,431.644816,-0.01523,0.016722,-0.006675,0.011768
2022-03-02 00:00:00-05:00,432.37,439.72,431.57,437.89,120234911.0,1252759.0,436.251696,0.018396,0.004179,-0.014433,-0.012606
2022-03-03 00:00:00-05:00,440.47,441.11,433.8,435.71,105501865.0,1039162.0,436.908456,-0.004978,0.012394,-0.004384,0.010925
2022-03-04 00:00:00-05:00,431.75,433.37,427.88,432.17,113974262.0,1083494.0,430.920534,-0.008125,0.002777,-0.009927,-0.000972
2022-03-07 00:00:00-05:00,431.55,432.3018,419.36,419.43,138047941.0,1448623.0,423.968129,-0.029479,0.030689,-0.000167,0.028896


In [28]:
bars_df = pd.concat([test_df,generated_data_df],axis=1)

In [29]:
bars_df.head()

Unnamed: 0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,...,Predicted High,Predicted Low,Predicted Close,Predicted Volume,Predicted Trade Count,Predicted VWAP,Predicted pct_change,Predicted high %,Predicted low %,Predicted open %
2022-03-01 00:00:00-05:00,435.04,437.17,427.11,429.98,139991559.0,1422070.0,431.644816,-0.01523,0.016722,-0.006675,...,,,,,,,,,,
2022-03-02 00:00:00-05:00,432.37,439.72,431.57,437.89,120234911.0,1252759.0,436.251696,0.018396,0.004179,-0.014433,...,455.30484,450.898132,453.371094,85294904.0,622406.625,453.127838,0.000367,0.006384,-0.007182,-0.000343
2022-03-03 00:00:00-05:00,440.47,441.11,433.8,435.71,105501865.0,1039162.0,436.908456,-0.004978,0.012394,-0.004384,...,455.30484,450.898132,453.371094,85294904.0,622406.625,453.127838,0.000367,0.006384,-0.007182,-0.000343
2022-03-04 00:00:00-05:00,431.75,433.37,427.88,432.17,113974262.0,1083494.0,430.920534,-0.008125,0.002777,-0.009927,...,455.30484,450.898132,453.371094,85294904.0,622406.625,453.127838,0.000367,0.006384,-0.007182,-0.000343
2022-03-07 00:00:00-05:00,431.55,432.3018,419.36,419.43,138047941.0,1448623.0,423.968129,-0.029479,0.030689,-0.000167,...,455.30484,450.898132,453.371094,85294904.0,622406.625,453.127838,0.000367,0.006384,-0.007182,-0.000343


In [30]:
test_shift_df = test_df.copy()

test_shift_df = test_shift_df.rename(columns={
    "open": "next open",
    "high": "next high",
    "low": "next low",
    "close": "next close",
    "volume": "next volume",
    "trade_count": "next trade_count",
    "vwap": "next vwap",
    "pct_change": "next pct_change",
    "high %": "next high %",
    "low %": "next low %",
    "open %": "next open %"
})

test_shift_df.head()


Unnamed: 0_level_0,next open,next high,next low,next close,next volume,next trade_count,next vwap,next pct_change,next high %,next low %,next open %
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-03-01 00:00:00-05:00,435.04,437.17,427.11,429.98,139991559.0,1422070.0,431.644816,-0.01523,0.016722,-0.006675,0.011768
2022-03-02 00:00:00-05:00,432.37,439.72,431.57,437.89,120234911.0,1252759.0,436.251696,0.018396,0.004179,-0.014433,-0.012606
2022-03-03 00:00:00-05:00,440.47,441.11,433.8,435.71,105501865.0,1039162.0,436.908456,-0.004978,0.012394,-0.004384,0.010925
2022-03-04 00:00:00-05:00,431.75,433.37,427.88,432.17,113974262.0,1083494.0,430.920534,-0.008125,0.002777,-0.009927,-0.000972
2022-03-07 00:00:00-05:00,431.55,432.3018,419.36,419.43,138047941.0,1448623.0,423.968129,-0.029479,0.030689,-0.000167,0.028896


In [31]:
test_shift_df = test_shift_df.shift(-1)

In [32]:
bars_df = bars_df.dropna()
bars_df.head()

Unnamed: 0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,...,Predicted High,Predicted Low,Predicted Close,Predicted Volume,Predicted Trade Count,Predicted VWAP,Predicted pct_change,Predicted high %,Predicted low %,Predicted open %
2022-03-02 00:00:00-05:00,432.37,439.72,431.57,437.89,120234911.0,1252759.0,436.251696,0.018396,0.004179,-0.014433,...,455.30484,450.898132,453.371094,85294904.0,622406.625,453.127838,0.000367,0.006384,-0.007182,-0.000343
2022-03-03 00:00:00-05:00,440.47,441.11,433.8,435.71,105501865.0,1039162.0,436.908456,-0.004978,0.012394,-0.004384,...,455.30484,450.898132,453.371094,85294904.0,622406.625,453.127838,0.000367,0.006384,-0.007182,-0.000343
2022-03-04 00:00:00-05:00,431.75,433.37,427.88,432.17,113974262.0,1083494.0,430.920534,-0.008125,0.002777,-0.009927,...,455.30484,450.898132,453.371094,85294904.0,622406.625,453.127838,0.000367,0.006384,-0.007182,-0.000343
2022-03-07 00:00:00-05:00,431.55,432.3018,419.36,419.43,138047941.0,1448623.0,423.968129,-0.029479,0.030689,-0.000167,...,455.30484,450.898132,453.371094,85294904.0,622406.625,453.127838,0.000367,0.006384,-0.007182,-0.000343
2022-03-08 00:00:00-05:00,419.62,427.21,415.12,416.25,167638902.0,1950904.0,419.707824,-0.007582,0.02633,-0.002715,...,455.30484,450.898132,453.371094,85294904.0,622406.625,453.127838,0.000367,0.006384,-0.000906,-0.000343


In [33]:
bars_df = pd.concat([bars_df,test_shift_df],axis=1)
bars_df = bars_df.dropna()
bars_df.head()

Unnamed: 0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,...,next high,next low,next close,next volume,next trade_count,next vwap,next pct_change,next high %,next low %,next open %
2022-03-02 00:00:00-05:00,432.37,439.72,431.57,437.89,120234911.0,1252759.0,436.251696,0.018396,0.004179,-0.014433,...,441.11,433.8,435.71,105501865.0,1039162.0,436.908456,-0.004978,0.012394,-0.004384,0.010925
2022-03-03 00:00:00-05:00,440.47,441.11,433.8,435.71,105501865.0,1039162.0,436.908456,-0.004978,0.012394,-0.004384,...,433.37,427.88,432.17,113974262.0,1083494.0,430.920534,-0.008125,0.002777,-0.009927,-0.000972
2022-03-04 00:00:00-05:00,431.75,433.37,427.88,432.17,113974262.0,1083494.0,430.920534,-0.008125,0.002777,-0.009927,...,432.3018,419.36,419.43,138047941.0,1448623.0,423.968129,-0.029479,0.030689,-0.000167,0.028896
2022-03-07 00:00:00-05:00,431.55,432.3018,419.36,419.43,138047941.0,1448623.0,423.968129,-0.029479,0.030689,-0.000167,...,427.21,415.12,416.25,167638902.0,1950904.0,419.707824,-0.007582,0.02633,-0.002715,0.008096
2022-03-08 00:00:00-05:00,419.62,427.21,415.12,416.25,167638902.0,1950904.0,419.707824,-0.007582,0.02633,-0.002715,...,429.51,422.82,427.41,117271076.0,1133573.0,426.179666,0.026811,0.004913,-0.010739,-0.005311


In [34]:
bars_df["High/Low Success"] = np.where(
    (bars_df["next close"] >= bars_df["Predicted Low"]),
    np.where(
        bars_df["close"] <= bars_df["Predicted High"], 1, 0
    ), 0
)

In [35]:
bars_df["Predicted Close Delta"] = bars_df["Predicted Close"] - bars_df["close"]
bars_df["Next Close Delta"] = bars_df["next close"] - bars_df["close"]
bars_df["Close Product"] = bars_df["Next Close Delta"] * bars_df["Predicted Close Delta"]
bars_df["Close Success"] = np.where(
    bars_df["Close Product"] >= 0, 1, 0
)

In [36]:
bars_df.head()

Unnamed: 0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,...,next vwap,next pct_change,next high %,next low %,next open %,High/Low Success,Predicted Close Delta,Next Close Delta,Close Product,Close Success
2022-03-02 00:00:00-05:00,432.37,439.72,431.57,437.89,120234911.0,1252759.0,436.251696,0.018396,0.004179,-0.014433,...,436.908456,-0.004978,0.012394,-0.004384,0.010925,0,15.481094,-2.18,-33.748784,0
2022-03-03 00:00:00-05:00,440.47,441.11,433.8,435.71,105501865.0,1039162.0,436.908456,-0.004978,0.012394,-0.004384,...,430.920534,-0.008125,0.002777,-0.009927,-0.000972,0,17.661094,-3.54,-62.520272,0
2022-03-04 00:00:00-05:00,431.75,433.37,427.88,432.17,113974262.0,1083494.0,430.920534,-0.008125,0.002777,-0.009927,...,423.968129,-0.029479,0.030689,-0.000167,0.028896,0,21.201094,-12.74,-270.101934,0
2022-03-07 00:00:00-05:00,431.55,432.3018,419.36,419.43,138047941.0,1448623.0,423.968129,-0.029479,0.030689,-0.000167,...,419.707824,-0.007582,0.02633,-0.002715,0.008096,0,33.941094,-3.18,-107.932678,0
2022-03-08 00:00:00-05:00,419.62,427.21,415.12,416.25,167638902.0,1950904.0,419.707824,-0.007582,0.02633,-0.002715,...,426.179666,0.026811,0.004913,-0.010739,-0.005311,0,37.121094,11.16,414.271406,1


In [37]:
bars_df = bars_df.rename_axis("Date")
bars_df.reset_index(inplace=True)
bars_df.head()

Unnamed: 0,Date,open,high,low,close,volume,trade_count,vwap,pct_change,high %,...,next vwap,next pct_change,next high %,next low %,next open %,High/Low Success,Predicted Close Delta,Next Close Delta,Close Product,Close Success
0,2022-03-02 00:00:00-05:00,432.37,439.72,431.57,437.89,120234911.0,1252759.0,436.251696,0.018396,0.004179,...,436.908456,-0.004978,0.012394,-0.004384,0.010925,0,15.481094,-2.18,-33.748784,0
1,2022-03-03 00:00:00-05:00,440.47,441.11,433.8,435.71,105501865.0,1039162.0,436.908456,-0.004978,0.012394,...,430.920534,-0.008125,0.002777,-0.009927,-0.000972,0,17.661094,-3.54,-62.520272,0
2,2022-03-04 00:00:00-05:00,431.75,433.37,427.88,432.17,113974262.0,1083494.0,430.920534,-0.008125,0.002777,...,423.968129,-0.029479,0.030689,-0.000167,0.028896,0,21.201094,-12.74,-270.101934,0
3,2022-03-07 00:00:00-05:00,431.55,432.3018,419.36,419.43,138047941.0,1448623.0,423.968129,-0.029479,0.030689,...,419.707824,-0.007582,0.02633,-0.002715,0.008096,0,33.941094,-3.18,-107.932678,0
4,2022-03-08 00:00:00-05:00,419.62,427.21,415.12,416.25,167638902.0,1950904.0,419.707824,-0.007582,0.02633,...,426.179666,0.026811,0.004913,-0.010739,-0.005311,0,37.121094,11.16,414.271406,1


In [38]:
actual_close = bars_df.hvplot.line(
    x="Date",
    y="next close",
)

In [39]:
predicted_high = bars_df.hvplot.line(
    x="Date",
    y="Predicted High",
)

In [40]:
predicted_low = bars_df.hvplot.line(
    x="Date",
    y="Predicted Low",
)

In [41]:
predicted_close = bars_df.hvplot.line(
    x="Date",
    y="Predicted Close",
)

In [42]:
actual_close * predicted_high * predicted_low * predicted_close

In [43]:
bars_df.describe()

Unnamed: 0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,...,next vwap,next pct_change,next high %,next low %,next open %,High/Low Success,Predicted Close Delta,Next Close Delta,Close Product,Close Success
count,498.0,498.0,498.0,498.0,498.0,498.0,498.0,498.0,498.0,498.0,...,498.0,498.0,498.0,498.0,498.0,498.0,498.0,498.0,498.0,498.0
mean,420.588818,423.376421,417.794308,420.77492,85255230.0,621179.8,420.644851,0.000406,0.006365,-0.007196,...,420.787077,0.000362,0.006367,-0.007168,-0.000342,0.036145,19.729352,0.136747,7.389577,0.465863
std,32.150314,31.589022,32.768932,32.277389,24363750.0,214713.6,32.149312,0.01199,0.006921,0.006291,...,32.374377,0.011964,0.00692,0.00629,0.009643,0.186837,42.200249,4.870481,253.632909,0.499335
min,349.205,359.8179,348.11,356.56,29737380.0,184647.0,357.98524,-0.043482,0.0,-0.048802,...,357.98524,-0.043482,0.0,-0.048802,-0.04581,0.0,-86.874261,-17.87,-940.852188,0.0
25%,395.815,399.2825,393.165,396.05,68274640.0,488379.5,395.966001,-0.006378,0.001478,-0.009904,...,395.966001,-0.006378,0.001478,-0.009826,-0.005944,0.0,-15.654269,-2.68,-87.0814,0.0
50%,414.88,417.1,411.8,414.565,80560560.0,576104.5,414.127244,0.000376,0.003791,-0.00553,...,414.127244,0.000367,0.003791,-0.005517,-0.00078,0.0,23.771584,0.155,-4.589632,0.0
75%,443.865,445.7,441.7025,443.565,96225020.0,672470.0,443.474206,0.007562,0.009223,-0.002644,...,443.619707,0.007512,0.009223,-0.002618,0.004985,0.0,54.868594,3.0775,76.913236,1.0
max,509.27,510.13,507.1,507.85,189443800.0,1950904.0,508.438618,0.054954,0.036706,0.0,...,508.438618,0.054954,0.036706,0.0,0.034739,1.0,96.811094,20.56,1629.196888,1.0


In [44]:
bars_df["Action"] = np.where(
    bars_df["next close"] > bars_df["close"], 1, -1
)

In [58]:
display(bars_df.head())
display(bars_df.tail())
bars_df.describe()

Unnamed: 0,Date,open,high,low,close,volume,trade_count,vwap,pct_change,high %,...,Action,Position,Cash,Holdings,Strategy Value,Stock Value,Stock Returns,Strategy Returns,Stock Cumulative Returns,Strategy Cumulative Returns
0,2022-03-02 00:00:00-05:00,432.37,439.72,431.57,437.89,120234911.0,1252759.0,436.251696,0.018396,0.004179,...,-1,0.0,1000.0,0.0,1000.0,875.78,,,,
1,2022-03-03 00:00:00-05:00,440.47,441.11,433.8,435.71,105501865.0,1039162.0,436.908456,-0.004978,0.012394,...,-1,0.0,1000.0,0.0,1000.0,871.42,-0.004978,0.0,0.995022,1.0
2,2022-03-04 00:00:00-05:00,431.75,433.37,427.88,432.17,113974262.0,1083494.0,430.920534,-0.008125,0.002777,...,-1,0.0,1000.0,0.0,1000.0,864.34,-0.008125,0.0,0.986937,1.0
3,2022-03-07 00:00:00-05:00,431.55,432.3018,419.36,419.43,138047941.0,1448623.0,423.968129,-0.029479,0.030689,...,-1,0.0,1000.0,0.0,1000.0,838.86,-0.029479,0.0,0.957843,1.0
4,2022-03-08 00:00:00-05:00,419.62,427.21,415.12,416.25,167638902.0,1950904.0,419.707824,-0.007582,0.02633,...,1,1.0,583.75,416.25,1000.0,832.5,-0.007582,0.0,0.950581,1.0


Unnamed: 0,Date,open,high,low,close,volume,trade_count,vwap,pct_change,high %,...,Action,Position,Cash,Holdings,Strategy Value,Stock Value,Stock Returns,Strategy Returns,Stock Cumulative Returns,Strategy Cumulative Returns
493,2024-02-16 00:00:00-05:00,501.7,502.87,498.75,499.51,75481032.0,531239.0,500.92474,-0.00498,0.006727,...,-1,2.0,1447.65,999.02,2446.67,999.02,-0.00498,-0.003056,1.14072,2.44667
494,2024-02-20 00:00:00-05:00,497.72,498.41,494.45,496.76,71736740.0,595486.0,496.441094,-0.005505,0.003322,...,1,3.0,950.89,1490.28,2441.17,993.52,-0.005505,-0.002248,1.13444,2.44117
495,2024-02-21 00:00:00-05:00,495.42,497.37,493.56,497.21,59603771.0,499405.0,495.831166,0.000906,0.000322,...,1,4.0,453.68,1988.84,2442.52,994.42,0.000906,0.000553,1.135468,2.44252
496,2024-02-22 00:00:00-05:00,504.01,508.49,503.02,507.5,76402535.0,613961.0,505.832024,0.020695,0.001951,...,1,4.0,453.68,2030.0,2483.68,1015.0,0.020695,0.016851,1.158967,2.48368
497,2024-02-23 00:00:00-05:00,509.27,510.13,507.1,507.85,61309016.0,489047.0,508.438618,0.00069,0.00449,...,-1,3.0,961.53,1523.55,2485.08,1015.7,0.00069,0.000564,1.159766,2.48508


Unnamed: 0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,...,Action,Position,Cash,Holdings,Strategy Value,Stock Value,Stock Returns,Strategy Returns,Stock Cumulative Returns,Strategy Cumulative Returns
count,498.0,498.0,498.0,498.0,498.0,498.0,498.0,498.0,498.0,498.0,...,498.0,498.0,498.0,498.0,498.0,498.0,497.0,497.0,497.0,497.0
mean,420.588818,423.376421,417.794308,420.77492,85255230.0,621179.8,420.644851,0.000406,0.006365,-0.007196,...,0.016064,1.809237,934.517129,779.919839,1714.436968,841.549839,0.00037,0.001848,0.960836,1.715874
std,32.150314,31.589022,32.768932,32.277389,24363750.0,214713.6,32.149312,0.01199,0.006921,0.006291,...,1.000876,1.393343,523.527696,621.752358,396.316386,64.554779,0.011975,0.005436,0.073765,0.395414
min,349.205,359.8179,348.11,356.56,29737380.0,184647.0,357.98524,-0.043482,0.0,-0.048802,...,-1.0,0.0,55.53,0.0,1000.0,713.12,-0.043482,-0.025083,0.814268,1.0
25%,395.815,399.2825,393.165,396.05,68274640.0,488379.5,395.966001,-0.006378,0.001478,-0.009904,...,-1.0,1.0,481.35,376.8075,1381.71,792.1,-0.006394,-0.000115,0.904405,1.38171
50%,414.88,417.1,411.8,414.565,80560560.0,576104.5,414.127244,0.000376,0.003791,-0.00553,...,1.0,2.0,916.52,788.13,1726.18,829.13,0.000375,0.000139,0.946539,1.72618
75%,443.865,445.7,441.7025,443.565,96225020.0,672470.0,443.474206,0.007562,0.009223,-0.002644,...,1.0,3.0,1331.81,1274.9775,2035.105,887.13,0.007539,0.003972,1.013108,2.03511
max,509.27,510.13,507.1,507.85,189443800.0,1950904.0,508.438618,0.054954,0.036706,0.0,...,1.0,4.0,2338.76,2030.0,2485.08,1015.7,0.054954,0.041363,1.159766,2.48508


In [46]:
current_position = 0
starting_cash = 1000
current_cash = starting_cash
max_position = 50
for index, row in bars_df.iterrows():
    # Get the current action
    close = row["close"]
    action = row["Action"]

    # Take the action if possible
    if action > 0:
        if action + current_position <= max_position and action * close < current_cash:
            current_position += action
            current_cash -= action*close
        else:
            action = 0
    elif action < 0:
        if action + current_position >= 0:
            current_position += action
            current_cash += -action*close
        else:
            action = 0

    # Update enabled and position
    bars_df.loc[index, "Position"] = current_position
    bars_df.loc[index, "Cash"] = current_cash

In [47]:
import math

# Compute Holdings
bars_df["Holdings"] = bars_df["close"] * bars_df["Position"]

# Compute Profit
bars_df["Strategy Value"] = bars_df["Holdings"] + bars_df["Cash"]
starting_close = bars_df.iloc[0]["close"] 
display(f"starting_close {starting_close}")
starting_shares = math.floor(starting_cash / starting_close)
display(f"starting_shares {starting_shares}")
bars_df["Stock Value"] =  bars_df["close"] * starting_shares

# Compute Returns
bars_df["Stock Returns"] = bars_df["close"].pct_change()
bars_df["Strategy Returns"] = bars_df["Strategy Value"].pct_change()

# Compute Cumulative Daily Returns
bars_df["Stock Cumulative Returns"] = (bars_df["Stock Returns"] + 1).cumprod()
bars_df["Strategy Cumulative Returns"] = (bars_df["Strategy Returns"] + 1).cumprod()

'starting_close 437.89'

'starting_shares 2'

In [48]:
bars_df.tail()

Unnamed: 0,Date,open,high,low,close,volume,trade_count,vwap,pct_change,high %,...,Action,Position,Cash,Holdings,Strategy Value,Stock Value,Stock Returns,Strategy Returns,Stock Cumulative Returns,Strategy Cumulative Returns
493,2024-02-16 00:00:00-05:00,501.7,502.87,498.75,499.51,75481032.0,531239.0,500.92474,-0.00498,0.006727,...,-1,2.0,1447.65,999.02,2446.67,999.02,-0.00498,-0.003056,1.14072,2.44667
494,2024-02-20 00:00:00-05:00,497.72,498.41,494.45,496.76,71736740.0,595486.0,496.441094,-0.005505,0.003322,...,1,3.0,950.89,1490.28,2441.17,993.52,-0.005505,-0.002248,1.13444,2.44117
495,2024-02-21 00:00:00-05:00,495.42,497.37,493.56,497.21,59603771.0,499405.0,495.831166,0.000906,0.000322,...,1,4.0,453.68,1988.84,2442.52,994.42,0.000906,0.000553,1.135468,2.44252
496,2024-02-22 00:00:00-05:00,504.01,508.49,503.02,507.5,76402535.0,613961.0,505.832024,0.020695,0.001951,...,1,4.0,453.68,2030.0,2483.68,1015.0,0.020695,0.016851,1.158967,2.48368
497,2024-02-23 00:00:00-05:00,509.27,510.13,507.1,507.85,61309016.0,489047.0,508.438618,0.00069,0.00449,...,-1,3.0,961.53,1523.55,2485.08,1015.7,0.00069,0.000564,1.159766,2.48508


In [49]:
stock_cumulative_returns = bars_df.hvplot.line(
    x="Date",
    y="Stock Cumulative Returns",
)

In [61]:
import panel as pn
pn.config.theme = 'dark'

In [62]:
returns = bars_df.hvplot.line(
    x="Date",
    y=["Strategy Cumulative Returns", "Stock Cumulative Returns"],
).opts(
    title=f"Stock vs Strategy Returns for {symbol} using LSGAN",
    ylabel="Percent Returns"
)
returns

In [50]:
strategy_cumulative_returns = bars_df.hvplot.line(
    x="Date",
    y="Strategy Cumulative Returns",
)

In [51]:
stock_cumulative_returns * strategy_cumulative_returns

In [52]:
cash = bars_df.hvplot.line(
    x="Date",
    y="Cash",
)

In [53]:
holdings = bars_df.hvplot.line(
    x="Date",
    y="Holdings",
)

In [54]:
cash*holdings

In [63]:
position = bars_df.hvplot.line(
    x="Date",
    y=["Position"],
).opts(
    title=f"Number of shares of {symbol} using LSGAN",
    ylabel="Number of Shares"
)
position

In [55]:
position = bars_df.hvplot.line(
    x="Date",
    y="Position",
)
position

In [60]:
bars_df.describe()["High/Low Success"]

count    498.000000
mean       0.036145
std        0.186837
min        0.000000
25%        0.000000
50%        0.000000
75%        0.000000
max        1.000000
Name: High/Low Success, dtype: float64