In [1]:
import os
import numpy as np
import pandas as pd
import json
import tensorflow as tf
from datetime import datetime, timedelta

from dotenv import load_dotenv

from alpaca.data.requests import StockBarsRequest
from alpaca.data.historical.stock import StockHistoricalDataClient
from alpaca.data.timeframe import TimeFrame, TimeFrameUnit

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.base import BaseEstimator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Reshape

import matplotlib.pyplot as plt

import hvplot.pandas

In [2]:
# Load .env file

load_dotenv()

True

In [3]:
# Set Alpaca API key and secret

ALPACA_API_KEY = os.getenv("ALPACA_API_KEY")
ALPACA_SECRET_KEY = os.getenv("ALPACA_API_SECRET")
client = StockHistoricalDataClient(ALPACA_API_KEY, ALPACA_SECRET_KEY)

# Create the Alpaca API object

timeframe = TimeFrame(1, TimeFrameUnit.Day)
symbol = 'SPY'
start = datetime.utcnow() - timedelta(days=3650)
end=datetime.utcnow() - timedelta(days=730)
request = StockBarsRequest(symbol_or_symbols=symbol, start=start, end=end, timeframe=timeframe)

In [4]:
source_df = client.get_stock_bars(request).df.tz_convert('America/New_York', level=1)

In [5]:
display(source_df.head())
display(source_df.tail())

Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close,volume,trade_count,vwap
symbol,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SPY,2016-01-04 00:00:00-05:00,200.49,201.03,198.59,201.0192,225903783.0,655489.0,200.656423
SPY,2016-01-05 00:00:00-05:00,201.405,201.9,200.05,201.36,112719152.0,418709.0,201.08428
SPY,2016-01-06 00:00:00-05:00,198.33,200.06,197.6,198.82,153948196.0,548386.0,198.95646
SPY,2016-01-07 00:00:00-05:00,195.33,197.44,193.59,194.05,216191953.0,796451.0,195.345911
SPY,2016-01-08 00:00:00-05:00,195.19,195.85,191.58,191.923,216105404.0,754102.0,193.644537


Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close,volume,trade_count,vwap
symbol,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SPY,2022-02-23 00:00:00-05:00,432.66,433.26,421.35,422.07,132864067.0,1166042.0,425.926191
SPY,2022-02-24 00:00:00-05:00,411.02,428.76,410.64,427.99,220033458.0,2316988.0,419.89052
SPY,2022-02-25 00:00:00-05:00,429.61,437.84,427.86,437.75,123640399.0,1140051.0,434.397245
SPY,2022-02-28 00:00:00-05:00,432.03,438.2,430.7,436.63,149221337.0,1305978.0,434.612834
SPY,2022-03-01 00:00:00-05:00,435.04,437.17,427.11,429.98,139991559.0,1422070.0,431.644816


In [6]:
bars_df = source_df.copy()
# pct_change is profit from last close
bars_df["pct_change"] = bars_df["close"].pct_change()
# signal for when we want to be in or out of a stock
#bars_df["signal"] = np.where(bars_df["pct_change"] > 0, 1.0, 0.0)
# reaction is the signal diff
#bars_df["reaction"] = bars_df["signal"].diff()
# action is if we could perfectly predict the next close
#bars_df["action"] = bars_df["reaction"].shift(-1)
# these values are the high, low, and open as a percentage of the current close
bars_df["high %"] = (bars_df["high"] - bars_df["close"])/bars_df["close"]
bars_df["low %"] = (bars_df["low"] - bars_df["close"])/bars_df["close"]
bars_df["open %"] = (bars_df["open"] - bars_df["close"])/bars_df["close"]

In [7]:
bars_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 1551 entries, ('SPY', Timestamp('2016-01-04 00:00:00-0500', tz='America/New_York')) to ('SPY', Timestamp('2022-03-01 00:00:00-0500', tz='America/New_York'))
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   open         1551 non-null   float64
 1   high         1551 non-null   float64
 2   low          1551 non-null   float64
 3   close        1551 non-null   float64
 4   volume       1551 non-null   float64
 5   trade_count  1551 non-null   float64
 6   vwap         1551 non-null   float64
 7   pct_change   1550 non-null   float64
 8   high %       1551 non-null   float64
 9   low %        1551 non-null   float64
 10  open %       1551 non-null   float64
dtypes: float64(11)
memory usage: 182.5+ KB


In [8]:
# Cleanup DF for model

bars_df = bars_df.droplevel(level=0).dropna()
bars_df.head()

Unnamed: 0_level_0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,open %
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2016-01-05 00:00:00-05:00,201.405,201.9,200.05,201.36,112719152.0,418709.0,201.08428,0.001695,0.002682,-0.006506,0.000223
2016-01-06 00:00:00-05:00,198.33,200.06,197.6,198.82,153948196.0,548386.0,198.95646,-0.012614,0.006237,-0.006136,-0.002465
2016-01-07 00:00:00-05:00,195.33,197.44,193.59,194.05,216191953.0,796451.0,195.345911,-0.023992,0.01747,-0.002371,0.006596
2016-01-08 00:00:00-05:00,195.19,195.85,191.58,191.923,216105404.0,754102.0,193.644537,-0.010961,0.020461,-0.001787,0.017022
2016-01-11 00:00:00-05:00,193.01,193.41,189.82,192.11,205368067.0,701548.0,191.757659,0.000974,0.006767,-0.01192,0.004685


In [9]:
# Normalize data set

scaler = StandardScaler()
bars_df_scaled = scaler.fit_transform(bars_df)


In [10]:
# Define constants

# input shape

input_shape = bars_df_scaled.shape[1]
latent_dim = 11
num_samples = bars_df_scaled.shape[0]
batch_size = 16


In [11]:
# Define generator (Sequential)

build_generator = Sequential([
    Dense(128,input_shape=(input_shape,), activation="relu"),
    Dense(256, activation="relu"),
    Dense(512, activation="relu"),
    Dense(input_shape, activation="sigmoid")
])

# Define descriminator

build_discriminator = Sequential([
    Dense(512, input_shape=(input_shape,), activation="relu"),
    Dense(256, activation="relu"),
    Dense(128, activation="relu"),
    Dense(1, activation="sigmoid")
])

2024-02-29 17:45:21.694395: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Max
2024-02-29 17:45:21.694416: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 64.00 GB
2024-02-29 17:45:21.694425: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 24.00 GB
2024-02-29 17:45:21.694458: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-02-29 17:45:21.694474: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [12]:
# Compile generator
build_generator.compile(loss = "mse", optimizer="adam")

# Compile discriminator
build_discriminator.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [13]:
# Combine models

# Confirm that discriminator weights are not training during generators training

build_discriminator.trainable=False

# Setup for generators data

z = tf.keras.Input(shape=(latent_dim,))

# Generated data by the generator

generated_data = build_generator(z)

# Discriminators verdict

validity = build_discriminator(generated_data)

In [14]:
# Define combined models

combined = tf.keras.Model(z, validity)
combined.compile(optimizer="adam", loss="mse", metrics=["accuracy"])

# Print summary of the combined model
combined.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 11)]              0         
                                                                 
 sequential (Sequential)     (None, 11)                171787    
                                                                 
 sequential_1 (Sequential)   (None, 1)                 170497    
                                                                 
Total params: 342284 (1.31 MB)
Trainable params: 171787 (671.04 KB)
Non-trainable params: 170497 (666.00 KB)
_________________________________________________________________


In [15]:
# Define training loop

epochs = 10000

generator_weights_path = "./generator_model.h5"
discriminator_weights_path = "./discriminator_model.h5"

for epoch in range(epochs):

    # Train discriminator
    # Sample real data
    idx = np.random.choice(num_samples, batch_size, replace=False)
    real_data = bars_df_scaled[idx]

 # Generate fake data (Random Walk noise)

    gaussian_noise = np.random.normal(0,1,size=(batch_size,input_shape))
    random_walk_noise = np.cumsum(gaussian_noise, axis=0)

    fake_data = build_generator.predict(random_walk_noise)

    # Train discriminator

    d_loss_real = build_discriminator.train_on_batch(real_data, np.ones((batch_size, 1)))
    d_loss_fake = build_discriminator.train_on_batch(fake_data, np.zeros((batch_size, 1)))
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
    
    # Train generator 

    noise = np.random.normal(0, 1, (batch_size, input_shape))
    g_loss = combined.train_on_batch(noise, np.ones((batch_size, 1)))
    
    # Print progress

    print(f"Epoch {epoch}, Discriminator Loss: {d_loss}, Generator Loss: {g_loss}")




2024-02-29 17:45:22.033100: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2024-02-29 17:45:22.234134: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 0, Discriminator Loss: [0.58883739 0.875     ], Generator Loss: [0.4791582524776459, 0.0]


2024-02-29 17:45:22.611011: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 1, Discriminator Loss: [0.56863466 0.8125    ], Generator Loss: [0.6053946614265442, 0.0]
Epoch 2, Discriminator Loss: [0.5203789 0.8125   ], Generator Loss: [0.6557582020759583, 0.0]
Epoch 3, Discriminator Loss: [0.39613602 0.90625   ], Generator Loss: [0.7430413961410522, 0.0]
Epoch 4, Discriminator Loss: [0.58910995 0.78125   ], Generator Loss: [0.7379785776138306, 0.0]
Epoch 5, Discriminator Loss: [0.28813163 0.90625   ], Generator Loss: [0.763230562210083, 0.0]
Epoch 6, Discriminator Loss: [0.50616568 0.84375   ], Generator Loss: [0.771754264831543, 0.0]
Epoch 7, Discriminator Loss: [0.57279149 0.875     ], Generator Loss: [0.8046104311943054, 0.0]
Epoch 8, Discriminator Loss: [0.57072848 0.71875   ], Generator Loss: [0.779057502746582, 0.0]
Epoch 9, Discriminator Loss: [0.77314626 0.78125   ], Generator Loss: [0.7599878907203674, 0.0]
Epoch 10, Discriminator Loss: [0.54680881 0.875     ], Generator Loss: [0.7393326163291931, 0.0]
Epoch 11, Discriminator Loss: [0.53204155 0.

In [16]:
# Saving Model Weights

build_generator.save("generator_model.h5")
build_discriminator.save("discriminator_model.h5")

  saving_api.save_model(


In [17]:
# Create alpaca object for testing

timeframe = TimeFrame(1, TimeFrameUnit.Day)
symbol = 'SPY'
start = datetime.utcnow() - timedelta(days=3650)
end=datetime.utcnow() - timedelta(days=1)
request = StockBarsRequest(symbol_or_symbols=symbol, start=start, end=end, timeframe=timeframe)

In [18]:
# Create df for testing

test_df = client.get_stock_bars(request).df.tz_convert('America/New_York', level=1)

# data preprocessing for testing

# pct_change is profit from last close
test_df["pct_change"] = test_df["close"].pct_change()
# signal for when we want to be in or out of a stock
#bars_df["signal"] = np.where(bars_df["pct_change"] > 0, 1.0, 0.0)
# reaction is the signal diff
#bars_df["reaction"] = bars_df["signal"].diff()
# action is if we could perfectly predict the next close
#bars_df["action"] = bars_df["reaction"].shift(-1)
# these values are the high, low, and open as a percentage of the current close
test_df["high %"] = (test_df["high"] - test_df["close"])/test_df["close"]
test_df["low %"] = (test_df["low"] - test_df["close"])/test_df["close"]
test_df["open %"] = (test_df["open"] - test_df["close"])/test_df["close"]

# set timestamp as index, drop nan

test_df = test_df.droplevel(level=0).dropna()

test_df.info()
display(test_df.head())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2051 entries, 2016-01-05 00:00:00-05:00 to 2024-02-28 00:00:00-05:00
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   open         2051 non-null   float64
 1   high         2051 non-null   float64
 2   low          2051 non-null   float64
 3   close        2051 non-null   float64
 4   volume       2051 non-null   float64
 5   trade_count  2051 non-null   float64
 6   vwap         2051 non-null   float64
 7   pct_change   2051 non-null   float64
 8   high %       2051 non-null   float64
 9   low %        2051 non-null   float64
 10  open %       2051 non-null   float64
dtypes: float64(11)
memory usage: 192.3 KB


Unnamed: 0_level_0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,open %
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2016-01-05 00:00:00-05:00,201.405,201.9,200.05,201.36,112719152.0,418709.0,201.08428,0.001695,0.002682,-0.006506,0.000223
2016-01-06 00:00:00-05:00,198.33,200.06,197.6,198.82,153948196.0,548386.0,198.95646,-0.012614,0.006237,-0.006136,-0.002465
2016-01-07 00:00:00-05:00,195.33,197.44,193.59,194.05,216191953.0,796451.0,195.345911,-0.023992,0.01747,-0.002371,0.006596
2016-01-08 00:00:00-05:00,195.19,195.85,191.58,191.923,216105404.0,754102.0,193.644537,-0.010961,0.020461,-0.001787,0.017022
2016-01-11 00:00:00-05:00,193.01,193.41,189.82,192.11,205368067.0,701548.0,191.757659,0.000974,0.006767,-0.01192,0.004685


In [19]:
# Normalize test data set

test_df_scaled = scaler.fit_transform(test_df)

print(test_df_scaled)
print(len(test_df_scaled))

[[-1.49294744 -1.49966193 -1.49510909 ... -0.38251642 -0.04738244
   0.05014812]
 [-1.52906807 -1.52114268 -1.52407379 ...  0.12186319  0.00887147
  -0.27076464]
 [-1.56430771 -1.55172939 -1.57148133 ...  1.71556409  0.58208247
   0.81096779]
 ...
 [ 2.11200915  2.08260233  2.12027679 ...  0.01089449  0.90381431
   0.56850261]
 [ 2.09321468  2.06404016  2.10715401 ... -0.69862719  0.28831759
  -0.03069928]
 [ 2.07712191  2.06047949  2.1096367  ... -0.59625201  0.55204533
  -0.19584499]]
2051


In [20]:
# Test generator against real data

#lists to store generated and timestamps

column_names=[
    "Predicted Open",
    "Predicted High",
    "Predicted Low",
    "Predicted Close",
    "Predicted Volume",
    "Predicted Trade Count",
    "Predicted VWAP",
    "Predicted pct_change",
    "Predicted high %",
    "Predicted low %",
    "Predicted open %"
    ]

# sliding window

window_size = 1

# init lists to store generated data with associated timestamps

timestamps = []
predictions = []

In [21]:
# shape of scaled test data

print(test_df_scaled.shape)

(2051, 11)


In [22]:
predictions_df = pd.DataFrame(columns = column_names)

predictions_df[:] = 0

In [23]:
print(test_df.index)
print(window_size)

DatetimeIndex(['2016-01-05 00:00:00-05:00', '2016-01-06 00:00:00-05:00',
               '2016-01-07 00:00:00-05:00', '2016-01-08 00:00:00-05:00',
               '2016-01-11 00:00:00-05:00', '2016-01-12 00:00:00-05:00',
               '2016-01-13 00:00:00-05:00', '2016-01-14 00:00:00-05:00',
               '2016-01-15 00:00:00-05:00', '2016-01-19 00:00:00-05:00',
               ...
               '2024-02-14 00:00:00-05:00', '2024-02-15 00:00:00-05:00',
               '2024-02-16 00:00:00-05:00', '2024-02-20 00:00:00-05:00',
               '2024-02-21 00:00:00-05:00', '2024-02-22 00:00:00-05:00',
               '2024-02-23 00:00:00-05:00', '2024-02-26 00:00:00-05:00',
               '2024-02-27 00:00:00-05:00', '2024-02-28 00:00:00-05:00'],
              dtype='datetime64[ns, America/New_York]', name='timestamp', length=2051, freq=None)
1


In [24]:
# Prediction loop

for i in range(0,len(test_df_scaled),window_size):
    print(i)
    # extract sliding window of real data

    input_data = test_df_scaled[i:i+window_size]

    # predict the next row using generator

    shaped_data = input_data.reshape(1,window_size,11)
    print(shaped_data)

    predicted_row = build_generator.predict(shaped_data)
    
    #predicted_row = build_generator.predict(input_data)

    #append generated row to list
    # predictions.append(predicted_row)

    # extract timestamp for the prediction

    if (i + window_size) < len(test_df):
        timestamp = test_df.index[i+window_size]
    else:
        timestamp = test_df.index[-1] + timedelta(days=1)
    
    print(timestamp)
    print(predicted_row.squeeze())
    predictions_df.loc[timestamp] = predicted_row.squeeze()
    print(predictions_df.head())
    

    # predictions_df = pd.concat([predictions_df,predicted_row],ignore_index=True)


0
[[[-1.49294744 -1.49966193 -1.49510909 -1.49376581  0.5748571
   -0.14767612 -1.49674001  0.10387462 -0.38251642 -0.04738244
    0.05014812]]]
2016-01-06 00:00:00-05:00
[1.0000000e+00 1.0000000e+00 1.0000000e+00 1.0000000e+00 0.0000000e+00
 9.9993503e-01 1.0000000e+00 6.0946929e-12 0.0000000e+00 1.3858943e-26
 0.0000000e+00]
                           Predicted Open  Predicted High  Predicted Low  \
2016-01-06 00:00:00-05:00             1.0             1.0            1.0   

                           Predicted Close  Predicted Volume  \
2016-01-06 00:00:00-05:00              1.0               0.0   

                           Predicted Trade Count  Predicted VWAP  \
2016-01-06 00:00:00-05:00               0.999935             1.0   

                           Predicted pct_change  Predicted high %  \
2016-01-06 00:00:00-05:00          6.094693e-12               0.0   

                           Predicted low %  Predicted open %  
2016-01-06 00:00:00-05:00     1.385894e-26        

2024-02-29 17:57:20.185322: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


2016-01-13 00:00:00-05:00
[1.0000000e+00 1.0000000e+00 1.0000000e+00 1.0000000e+00 5.2473221e-24
 5.2205348e-01 1.0000000e+00 1.2362995e-10 5.7764524e-19 2.0019588e-07
 3.8394064e-15]
                           Predicted Open  Predicted High  Predicted Low  \
2016-01-06 00:00:00-05:00             1.0             1.0            1.0   
2016-01-07 00:00:00-05:00             1.0             1.0            1.0   
2016-01-08 00:00:00-05:00             1.0             1.0            1.0   
2016-01-11 00:00:00-05:00             1.0             1.0            1.0   
2016-01-12 00:00:00-05:00             1.0             1.0            1.0   

                           Predicted Close  Predicted Volume  \
2016-01-06 00:00:00-05:00              1.0      0.000000e+00   
2016-01-07 00:00:00-05:00              1.0      0.000000e+00   
2016-01-08 00:00:00-05:00              1.0      0.000000e+00   
2016-01-11 00:00:00-05:00              1.0      0.000000e+00   
2016-01-12 00:00:00-05:00              

In [25]:
index = predictions_df.index

predictions_df_inverse_scaled = scaler.inverse_transform(predictions_df)
generated_data_df = pd.DataFrame(predictions_df_inverse_scaled, columns=column_names,index=index)
generated_data_df.head()



Unnamed: 0,Predicted Open,Predicted High,Predicted Low,Predicted Close,Predicted Volume,Predicted Trade Count,Predicted VWAP,Predicted pct_change,Predicted high %,Predicted low %,Predicted open %
2016-01-06 00:00:00-05:00,413.633118,416.016327,411.100586,413.744843,88155216.0,800728.0625,413.623016,0.000515,0.005378,-0.006194,-0.000197
2016-01-07 00:00:00-05:00,413.633118,416.016327,411.100586,413.744843,88155216.0,800749.6875,413.623016,0.011876,0.012209,-0.00602,-0.000197
2016-01-08 00:00:00-05:00,413.633118,416.016327,411.100586,413.744843,88155216.0,800749.6875,413.623016,0.011876,0.012426,0.000375,-0.000197
2016-01-11 00:00:00-05:00,413.633118,416.016327,411.100586,413.744843,88155216.0,800749.6875,413.623016,0.011876,0.005378,-0.006194,-0.000197
2016-01-12 00:00:00-05:00,413.633118,416.016327,411.100586,413.744843,88155216.0,800749.6875,413.623016,0.008698,0.005719,0.000263,-0.000197


In [26]:
test_df.head()

Unnamed: 0_level_0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,open %
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2016-01-05 00:00:00-05:00,201.405,201.9,200.05,201.36,112719152.0,418709.0,201.08428,0.001695,0.002682,-0.006506,0.000223
2016-01-06 00:00:00-05:00,198.33,200.06,197.6,198.82,153948196.0,548386.0,198.95646,-0.012614,0.006237,-0.006136,-0.002465
2016-01-07 00:00:00-05:00,195.33,197.44,193.59,194.05,216191953.0,796451.0,195.345911,-0.023992,0.01747,-0.002371,0.006596
2016-01-08 00:00:00-05:00,195.19,195.85,191.58,191.923,216105404.0,754102.0,193.644537,-0.010961,0.020461,-0.001787,0.017022
2016-01-11 00:00:00-05:00,193.01,193.41,189.82,192.11,205368067.0,701548.0,191.757659,0.000974,0.006767,-0.01192,0.004685


In [27]:
bars_df = pd.concat([test_df,generated_data_df],axis=1)

In [28]:
bars_df.head()

Unnamed: 0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,...,Predicted High,Predicted Low,Predicted Close,Predicted Volume,Predicted Trade Count,Predicted VWAP,Predicted pct_change,Predicted high %,Predicted low %,Predicted open %
2016-01-05 00:00:00-05:00,201.405,201.9,200.05,201.36,112719152.0,418709.0,201.08428,0.001695,0.002682,-0.006506,...,,,,,,,,,,
2016-01-06 00:00:00-05:00,198.33,200.06,197.6,198.82,153948196.0,548386.0,198.95646,-0.012614,0.006237,-0.006136,...,416.016327,411.100586,413.744843,88155216.0,800728.0625,413.623016,0.000515,0.005378,-0.006194,-0.000197
2016-01-07 00:00:00-05:00,195.33,197.44,193.59,194.05,216191953.0,796451.0,195.345911,-0.023992,0.01747,-0.002371,...,416.016327,411.100586,413.744843,88155216.0,800749.6875,413.623016,0.011876,0.012209,-0.00602,-0.000197
2016-01-08 00:00:00-05:00,195.19,195.85,191.58,191.923,216105404.0,754102.0,193.644537,-0.010961,0.020461,-0.001787,...,416.016327,411.100586,413.744843,88155216.0,800749.6875,413.623016,0.011876,0.012426,0.000375,-0.000197
2016-01-11 00:00:00-05:00,193.01,193.41,189.82,192.11,205368067.0,701548.0,191.757659,0.000974,0.006767,-0.01192,...,416.016327,411.100586,413.744843,88155216.0,800749.6875,413.623016,0.011876,0.005378,-0.006194,-0.000197


In [29]:
test_shift_df = test_df.copy()

test_shift_df = test_shift_df.rename(columns={
    "open": "next open",
    "high": "next high",
    "low": "next low",
    "close": "next close",
    "volume": "next volume",
    "trade_count": "next trade_count",
    "vwap": "next vwap",
    "pct_change": "next pct_change",
    "high %": "next high %",
    "low %": "next low %",
    "open %": "next open %"
})

test_shift_df.head()


Unnamed: 0_level_0,next open,next high,next low,next close,next volume,next trade_count,next vwap,next pct_change,next high %,next low %,next open %
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2016-01-05 00:00:00-05:00,201.405,201.9,200.05,201.36,112719152.0,418709.0,201.08428,0.001695,0.002682,-0.006506,0.000223
2016-01-06 00:00:00-05:00,198.33,200.06,197.6,198.82,153948196.0,548386.0,198.95646,-0.012614,0.006237,-0.006136,-0.002465
2016-01-07 00:00:00-05:00,195.33,197.44,193.59,194.05,216191953.0,796451.0,195.345911,-0.023992,0.01747,-0.002371,0.006596
2016-01-08 00:00:00-05:00,195.19,195.85,191.58,191.923,216105404.0,754102.0,193.644537,-0.010961,0.020461,-0.001787,0.017022
2016-01-11 00:00:00-05:00,193.01,193.41,189.82,192.11,205368067.0,701548.0,191.757659,0.000974,0.006767,-0.01192,0.004685


In [30]:
test_shift_df = test_shift_df.shift(-1)

In [31]:
bars_df = bars_df.dropna()
bars_df.head()

Unnamed: 0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,...,Predicted High,Predicted Low,Predicted Close,Predicted Volume,Predicted Trade Count,Predicted VWAP,Predicted pct_change,Predicted high %,Predicted low %,Predicted open %
2016-01-06 00:00:00-05:00,198.33,200.06,197.6,198.82,153948196.0,548386.0,198.95646,-0.012614,0.006237,-0.006136,...,416.016327,411.100586,413.744843,88155216.0,800728.0625,413.623016,0.000515,0.005378,-0.006194,-0.000197
2016-01-07 00:00:00-05:00,195.33,197.44,193.59,194.05,216191953.0,796451.0,195.345911,-0.023992,0.01747,-0.002371,...,416.016327,411.100586,413.744843,88155216.0,800749.6875,413.623016,0.011876,0.012209,-0.00602,-0.000197
2016-01-08 00:00:00-05:00,195.19,195.85,191.58,191.923,216105404.0,754102.0,193.644537,-0.010961,0.020461,-0.001787,...,416.016327,411.100586,413.744843,88155216.0,800749.6875,413.623016,0.011876,0.012426,0.000375,-0.000197
2016-01-11 00:00:00-05:00,193.01,193.41,189.82,192.11,205368067.0,701548.0,191.757659,0.000974,0.006767,-0.01192,...,416.016327,411.100586,413.744843,88155216.0,800749.6875,413.623016,0.011876,0.005378,-0.006194,-0.000197
2016-01-12 00:00:00-05:00,193.82,194.55,191.14,193.6608,175844276.0,635749.0,192.902295,0.008072,0.004592,-0.013017,...,416.016327,411.100586,413.744843,88155216.0,800749.6875,413.623016,0.008698,0.005719,0.000263,-0.000197


In [32]:
bars_df = pd.concat([bars_df,test_shift_df],axis=1)
bars_df = bars_df.dropna()
bars_df.head()

Unnamed: 0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,...,next high,next low,next close,next volume,next trade_count,next vwap,next pct_change,next high %,next low %,next open %
2016-01-06 00:00:00-05:00,198.33,200.06,197.6,198.82,153948196.0,548386.0,198.95646,-0.012614,0.006237,-0.006136,...,197.44,193.59,194.05,216191953.0,796451.0,195.345911,-0.023992,0.01747,-0.002371,0.006596
2016-01-07 00:00:00-05:00,195.33,197.44,193.59,194.05,216191953.0,796451.0,195.345911,-0.023992,0.01747,-0.002371,...,195.85,191.58,191.923,216105404.0,754102.0,193.644537,-0.010961,0.020461,-0.001787,0.017022
2016-01-08 00:00:00-05:00,195.19,195.85,191.58,191.923,216105404.0,754102.0,193.644537,-0.010961,0.020461,-0.001787,...,193.41,189.82,192.11,205368067.0,701548.0,191.757659,0.000974,0.006767,-0.01192,0.004685
2016-01-11 00:00:00-05:00,193.01,193.41,189.82,192.11,205368067.0,701548.0,191.757659,0.000974,0.006767,-0.01192,...,194.55,191.14,193.6608,175844276.0,635749.0,192.902295,0.008072,0.004592,-0.013017,0.000822
2016-01-12 00:00:00-05:00,193.82,194.55,191.14,193.6608,175844276.0,635749.0,192.902295,0.008072,0.004592,-0.013017,...,194.86,188.38,188.83,223632834.0,812694.0,191.13459,-0.024945,0.031933,-0.002383,0.029762


In [33]:
bars_df["High/Low Success"] = np.where(
    (bars_df["next close"] >= bars_df["Predicted Low"]),
    np.where(
        bars_df["close"] <= bars_df["Predicted High"], 1, 0
    ), 0
)

In [34]:
bars_df["Predicted Close Delta"] = bars_df["Predicted Close"] - bars_df["close"]
bars_df["Next Close Delta"] = bars_df["next close"] - bars_df["close"]
bars_df["Close Product"] = bars_df["Next Close Delta"] * bars_df["Predicted Close Delta"]
bars_df["Close Success"] = np.where(
    bars_df["Close Product"] >= 0, 1, 0
)

In [35]:
bars_df.head()

Unnamed: 0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,...,next vwap,next pct_change,next high %,next low %,next open %,High/Low Success,Predicted Close Delta,Next Close Delta,Close Product,Close Success
2016-01-06 00:00:00-05:00,198.33,200.06,197.6,198.82,153948196.0,548386.0,198.95646,-0.012614,0.006237,-0.006136,...,195.345911,-0.023992,0.01747,-0.002371,0.006596,0,214.924843,-4.77,-1025.191499,0
2016-01-07 00:00:00-05:00,195.33,197.44,193.59,194.05,216191953.0,796451.0,195.345911,-0.023992,0.01747,-0.002371,...,193.644537,-0.010961,0.020461,-0.001787,0.017022,0,219.694843,-2.127,-467.29093,0
2016-01-08 00:00:00-05:00,195.19,195.85,191.58,191.923,216105404.0,754102.0,193.644537,-0.010961,0.020461,-0.001787,...,191.757659,0.000974,0.006767,-0.01192,0.004685,0,221.821843,0.187,41.480685,1
2016-01-11 00:00:00-05:00,193.01,193.41,189.82,192.11,205368067.0,701548.0,191.757659,0.000974,0.006767,-0.01192,...,192.902295,0.008072,0.004592,-0.013017,0.000822,0,221.634843,1.5508,343.711314,1
2016-01-12 00:00:00-05:00,193.82,194.55,191.14,193.6608,175844276.0,635749.0,192.902295,0.008072,0.004592,-0.013017,...,191.13459,-0.024945,0.031933,-0.002383,0.029762,0,220.084043,-4.8308,-1063.181993,0


In [36]:
bars_df = bars_df.rename_axis("Date")
bars_df.reset_index(inplace=True)
bars_df.head()

Unnamed: 0,Date,open,high,low,close,volume,trade_count,vwap,pct_change,high %,...,next vwap,next pct_change,next high %,next low %,next open %,High/Low Success,Predicted Close Delta,Next Close Delta,Close Product,Close Success
0,2016-01-06 00:00:00-05:00,198.33,200.06,197.6,198.82,153948196.0,548386.0,198.95646,-0.012614,0.006237,...,195.345911,-0.023992,0.01747,-0.002371,0.006596,0,214.924843,-4.77,-1025.191499,0
1,2016-01-07 00:00:00-05:00,195.33,197.44,193.59,194.05,216191953.0,796451.0,195.345911,-0.023992,0.01747,...,193.644537,-0.010961,0.020461,-0.001787,0.017022,0,219.694843,-2.127,-467.29093,0
2,2016-01-08 00:00:00-05:00,195.19,195.85,191.58,191.923,216105404.0,754102.0,193.644537,-0.010961,0.020461,...,191.757659,0.000974,0.006767,-0.01192,0.004685,0,221.821843,0.187,41.480685,1
3,2016-01-11 00:00:00-05:00,193.01,193.41,189.82,192.11,205368067.0,701548.0,191.757659,0.000974,0.006767,...,192.902295,0.008072,0.004592,-0.013017,0.000822,0,221.634843,1.5508,343.711314,1
4,2016-01-12 00:00:00-05:00,193.82,194.55,191.14,193.6608,175844276.0,635749.0,192.902295,0.008072,0.004592,...,191.13459,-0.024945,0.031933,-0.002383,0.029762,0,220.084043,-4.8308,-1063.181993,0


In [37]:
actual_close = bars_df.hvplot.line(
    x="Date",
    y="next close",
)

In [38]:
predicted_high = bars_df.hvplot.line(
    x="Date",
    y="Predicted High",
)

In [39]:
predicted_low = bars_df.hvplot.line(
    x="Date",
    y="Predicted Low",
)

In [40]:
predicted_close = bars_df.hvplot.line(
    x="Date",
    y="Predicted Close",
)

In [41]:
actual_close * predicted_high * predicted_low * predicted_close

In [42]:
bars_df.describe()

Unnamed: 0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,...,next vwap,next pct_change,next high %,next low %,next open %,High/Low Success,Predicted Close Delta,Next Close Delta,Close Product,Close Success
count,2049.0,2049.0,2049.0,2049.0,2049.0,2049.0,2049.0,2049.0,2049.0,2049.0,...,2049.0,2049.0,2049.0,2049.0,2049.0,2049.0,2049.0,2049.0,2049.0,2049.0
mean,328.477434,330.334768,326.489503,328.553901,88158700.0,467915.2,328.472048,0.000516,0.005381,-0.006196,...,328.621916,0.000521,0.005379,-0.006194,-0.000196,0.016105,53.234261,0.150044,17.951364,0.533431
std,85.05772,85.584999,84.50956,85.091759,42752610.0,333122.2,85.051851,0.011369,0.007053,0.006574,...,85.094061,0.011365,0.007053,0.006574,0.008382,0.125912,114.955663,3.751187,366.127331,0.499003
min,182.34,184.1,181.02,182.86,20550100.0,84307.0,182.856127,-0.107828,0.0,-0.080223,...,182.856127,-0.107828,0.0,-0.080223,-0.04581,0.0,-179.271478,-29.135,-2951.764587,0.0
25%,259.4,262.43,257.81,259.85,61281270.0,259517.0,259.650633,-0.003671,0.001207,-0.00808,...,259.731067,-0.003662,0.001206,-0.00808,-0.004186,0.0,-62.981478,-1.2,-109.662144,0.0
50%,303.47,306.205,300.68,303.34,77933380.0,393451.0,303.426502,0.000636,0.002876,-0.004318,...,303.439282,0.000636,0.002876,-0.004309,-0.000549,0.0,82.884843,0.19,11.732531,1.0
75%,410.58,413.07,408.14,410.97,101244600.0,569817.0,411.156512,0.005967,0.006499,-0.002169,...,411.187191,0.005967,0.006499,-0.002169,0.003091,0.0,148.604843,1.85,165.655461,1.0
max,509.27,510.13,507.1,507.85,394825000.0,3709929.0,508.438618,0.086761,0.065693,0.0,...,508.438618,0.086761,0.065693,0.0,0.057236,1.0,230.884843,20.56,3691.372758,1.0


In [43]:
bars_df["Action"] = np.where(
    bars_df["next close"] > bars_df["close"], 1, -1
)

In [44]:
display(bars_df.head())
display(bars_df.tail())
bars_df.describe()

Unnamed: 0,Date,open,high,low,close,volume,trade_count,vwap,pct_change,high %,...,next pct_change,next high %,next low %,next open %,High/Low Success,Predicted Close Delta,Next Close Delta,Close Product,Close Success,Action
0,2016-01-06 00:00:00-05:00,198.33,200.06,197.6,198.82,153948196.0,548386.0,198.95646,-0.012614,0.006237,...,-0.023992,0.01747,-0.002371,0.006596,0,214.924843,-4.77,-1025.191499,0,-1
1,2016-01-07 00:00:00-05:00,195.33,197.44,193.59,194.05,216191953.0,796451.0,195.345911,-0.023992,0.01747,...,-0.010961,0.020461,-0.001787,0.017022,0,219.694843,-2.127,-467.29093,0,-1
2,2016-01-08 00:00:00-05:00,195.19,195.85,191.58,191.923,216105404.0,754102.0,193.644537,-0.010961,0.020461,...,0.000974,0.006767,-0.01192,0.004685,0,221.821843,0.187,41.480685,1,1
3,2016-01-11 00:00:00-05:00,193.01,193.41,189.82,192.11,205368067.0,701548.0,191.757659,0.000974,0.006767,...,0.008072,0.004592,-0.013017,0.000822,0,221.634843,1.5508,343.711314,1,1
4,2016-01-12 00:00:00-05:00,193.82,194.55,191.14,193.6608,175844276.0,635749.0,192.902295,0.008072,0.004592,...,-0.024945,0.031933,-0.002383,0.029762,0,220.084043,-4.8308,-1063.181993,0,-1


Unnamed: 0,Date,open,high,low,close,volume,trade_count,vwap,pct_change,high %,...,next pct_change,next high %,next low %,next open %,High/Low Success,Predicted Close Delta,Next Close Delta,Close Product,Close Success,Action
2044,2024-02-21 00:00:00-05:00,495.42,497.37,493.56,497.21,59603771.0,499405.0,495.831166,0.000906,0.000322,...,0.020695,0.001951,-0.008828,-0.006877,0,-168.631478,10.29,-1735.217911,0,1
2045,2024-02-22 00:00:00-05:00,504.01,508.49,503.02,507.5,76402535.0,613961.0,505.832024,0.020695,0.001951,...,0.00069,0.00449,-0.001477,0.002796,0,-178.921478,0.35,-62.622517,0,1
2046,2024-02-23 00:00:00-05:00,509.27,510.13,507.1,507.85,61309016.0,489047.0,508.438618,0.00069,0.00449,...,-0.003662,0.005455,-0.000257,0.004565,0,-179.271478,-1.86,333.44495,1,-1
2047,2024-02-26 00:00:00-05:00,508.3,508.75,505.86,505.99,50386738.0,434429.0,507.079425,-0.003662,0.005455,...,0.001858,0.000454,-0.0043,-0.000454,0,-177.411478,0.94,-166.76679,0,1
2048,2024-02-27 00:00:00-05:00,506.7,507.16,504.75,506.93,48831828.0,400260.0,506.231051,0.001858,0.000454,...,-0.001322,0.001175,-0.002568,-0.001837,0,-178.351478,-0.67,119.49549,1,-1


Unnamed: 0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,...,next pct_change,next high %,next low %,next open %,High/Low Success,Predicted Close Delta,Next Close Delta,Close Product,Close Success,Action
count,2049.0,2049.0,2049.0,2049.0,2049.0,2049.0,2049.0,2049.0,2049.0,2049.0,...,2049.0,2049.0,2049.0,2049.0,2049.0,2049.0,2049.0,2049.0,2049.0,2049.0
mean,328.477434,330.334768,326.489503,328.553901,88158700.0,467915.2,328.472048,0.000516,0.005381,-0.006196,...,0.000521,0.005379,-0.006194,-0.000196,0.016105,53.234261,0.150044,17.951364,0.533431,0.088336
std,85.05772,85.584999,84.50956,85.091759,42752610.0,333122.2,85.051851,0.011369,0.007053,0.006574,...,0.011365,0.007053,0.006574,0.008382,0.125912,114.955663,3.751187,366.127331,0.499003,0.996334
min,182.34,184.1,181.02,182.86,20550100.0,84307.0,182.856127,-0.107828,0.0,-0.080223,...,-0.107828,0.0,-0.080223,-0.04581,0.0,-179.271478,-29.135,-2951.764587,0.0,-1.0
25%,259.4,262.43,257.81,259.85,61281270.0,259517.0,259.650633,-0.003671,0.001207,-0.00808,...,-0.003662,0.001206,-0.00808,-0.004186,0.0,-62.981478,-1.2,-109.662144,0.0,-1.0
50%,303.47,306.205,300.68,303.34,77933380.0,393451.0,303.426502,0.000636,0.002876,-0.004318,...,0.000636,0.002876,-0.004309,-0.000549,0.0,82.884843,0.19,11.732531,1.0,1.0
75%,410.58,413.07,408.14,410.97,101244600.0,569817.0,411.156512,0.005967,0.006499,-0.002169,...,0.005967,0.006499,-0.002169,0.003091,0.0,148.604843,1.85,165.655461,1.0,1.0
max,509.27,510.13,507.1,507.85,394825000.0,3709929.0,508.438618,0.086761,0.065693,0.0,...,0.086761,0.065693,0.0,0.057236,1.0,230.884843,20.56,3691.372758,1.0,1.0


In [45]:
current_position = 0
starting_cash = 1000
current_cash = starting_cash
max_position = 50
for index, row in bars_df.iterrows():
    # Get the current action
    close = row["close"]
    action = row["Action"]

    # Take the action if possible
    if action > 0:
        if action + current_position <= max_position and action * close < current_cash:
            current_position += action
            current_cash -= action*close
        else:
            action = 0
    elif action < 0:
        if action + current_position >= 0:
            current_position += action
            current_cash += -action*close
        else:
            action = 0

    # Update enabled and position
    bars_df.loc[index, "Position"] = current_position
    bars_df.loc[index, "Cash"] = current_cash

In [46]:
import math

# Compute Holdings
bars_df["Holdings"] = bars_df["close"] * bars_df["Position"]

# Compute Profit
bars_df["Strategy Value"] = bars_df["Holdings"] + bars_df["Cash"]
starting_close = bars_df.iloc[0]["close"] 
display(f"starting_close {starting_close}")
starting_shares = math.floor(starting_cash / starting_close)
display(f"starting_shares {starting_shares}")
bars_df["Stock Value"] =  bars_df["close"] * starting_shares

# Compute Returns
bars_df["Stock Returns"] = bars_df["close"].pct_change()
bars_df["Strategy Returns"] = bars_df["Strategy Value"].pct_change()

# Compute Cumulative Daily Returns
bars_df["Stock Cumulative Returns"] = (bars_df["Stock Returns"] + 1).cumprod()
bars_df["Strategy Cumulative Returns"] = (bars_df["Strategy Returns"] + 1).cumprod()

'starting_close 198.82'

'starting_shares 5'

In [47]:
bars_df.tail()

Unnamed: 0,Date,open,high,low,close,volume,trade_count,vwap,pct_change,high %,...,Action,Position,Cash,Holdings,Strategy Value,Stock Value,Stock Returns,Strategy Returns,Stock Cumulative Returns,Strategy Cumulative Returns
2044,2024-02-21 00:00:00-05:00,495.42,497.37,493.56,497.21,59603771.0,499405.0,495.831166,0.000906,0.000322,...,1,14.0,346.7318,6960.94,7307.6718,2486.05,0.000906,0.000801,2.500805,7.307672
2045,2024-02-22 00:00:00-05:00,504.01,508.49,503.02,507.5,76402535.0,613961.0,505.832024,0.020695,0.001951,...,1,14.0,346.7318,7105.0,7451.7318,2537.5,0.020695,0.019714,2.55256,7.451732
2046,2024-02-23 00:00:00-05:00,509.27,510.13,507.1,507.85,61309016.0,489047.0,508.438618,0.00069,0.00449,...,-1,13.0,854.5818,6602.05,7456.6318,2539.25,0.00069,0.000658,2.55432,7.456632
2047,2024-02-26 00:00:00-05:00,508.3,508.75,505.86,505.99,50386738.0,434429.0,507.079425,-0.003662,0.005455,...,1,14.0,348.5918,7083.86,7432.4518,2529.95,-0.003662,-0.003243,2.544965,7.432452
2048,2024-02-27 00:00:00-05:00,506.7,507.16,504.75,506.93,48831828.0,400260.0,506.231051,0.001858,0.000454,...,-1,13.0,855.5218,6590.09,7445.6118,2534.65,0.001858,0.001771,2.549693,7.445612


In [48]:
stock_cumulative_returns = bars_df.hvplot.line(
    x="Date",
    y="Stock Cumulative Returns",
)

In [49]:
import panel as pn
pn.config.theme = 'dark'

In [50]:
returns = bars_df.hvplot.line(
    x="Date",
    y=["Strategy Cumulative Returns", "Stock Cumulative Returns"],
).opts(
    title=f"Stock vs Strategy Returns for {symbol} using LSGAN",
    ylabel="Percent Returns"
)
returns

In [51]:
strategy_cumulative_returns = bars_df.hvplot.line(
    x="Date",
    y="Strategy Cumulative Returns",
)

In [52]:
stock_cumulative_returns * strategy_cumulative_returns

In [53]:
cash = bars_df.hvplot.line(
    x="Date",
    y="Cash",
)

In [54]:
holdings = bars_df.hvplot.line(
    x="Date",
    y="Holdings",
)

In [55]:
cash*holdings

In [56]:
position = bars_df.hvplot.line(
    x="Date",
    y=["Position"],
).opts(
    title=f"Number of shares of {symbol} using LSGAN",
    ylabel="Number of Shares"
)
position

In [57]:
position = bars_df.hvplot.line(
    x="Date",
    y="Position",
)
position

In [58]:
bars_df.describe()["High/Low Success"]

count    2049.000000
mean        0.016105
std         0.125912
min         0.000000
25%         0.000000
50%         0.000000
75%         0.000000
max         1.000000
Name: High/Low Success, dtype: float64