In [106]:
import os
import numpy as np
import pandas as pd
import json
import tensorflow as tf
from datetime import datetime, timedelta

from dotenv import load_dotenv

from alpaca.data.requests import StockBarsRequest
from alpaca.data.historical.stock import StockHistoricalDataClient
from alpaca.data.timeframe import TimeFrame, TimeFrameUnit

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.base import BaseEstimator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Reshape

import matplotlib.pyplot as plt

import hvplot.pandas

In [107]:
# Load .env file

load_dotenv()

True

In [108]:
# Set Alpaca API key and secret

ALPACA_API_KEY = os.getenv("ALPACA_API_KEY")
ALPACA_SECRET_KEY = os.getenv("ALPACA_API_SECRET")
client = StockHistoricalDataClient(ALPACA_API_KEY, ALPACA_SECRET_KEY)

# Create the Alpaca API object

timeframe = TimeFrame(1, TimeFrameUnit.Day)
symbol = 'SPY'
start = datetime.utcnow() - timedelta(days=3650)
end=datetime.utcnow() - timedelta(days=730)
request = StockBarsRequest(symbol_or_symbols=symbol, start=start, end=end, timeframe=timeframe)

In [109]:
source_df = client.get_stock_bars(request).df.tz_convert('America/New_York', level=1)

In [110]:
display(source_df.head())
display(source_df.tail())

Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close,volume,trade_count,vwap
symbol,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SPY,2016-01-04 00:00:00-05:00,200.49,201.03,198.59,201.0192,225903783.0,655489.0,200.656423
SPY,2016-01-05 00:00:00-05:00,201.405,201.9,200.05,201.36,112719152.0,418709.0,201.08428
SPY,2016-01-06 00:00:00-05:00,198.33,200.06,197.6,198.82,153948196.0,548386.0,198.95646
SPY,2016-01-07 00:00:00-05:00,195.33,197.44,193.59,194.05,216191953.0,796451.0,195.345911
SPY,2016-01-08 00:00:00-05:00,195.19,195.85,191.58,191.923,216105404.0,754102.0,193.644537


Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close,volume,trade_count,vwap
symbol,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SPY,2022-02-18 00:00:00-05:00,437.33,438.66,431.82,434.23,132619100.0,1134344.0,435.238076
SPY,2022-02-22 00:00:00-05:00,431.89,435.5,425.86,429.57,126971668.0,1213515.0,430.690333
SPY,2022-02-23 00:00:00-05:00,432.66,433.26,421.35,422.07,132864067.0,1166042.0,425.926191
SPY,2022-02-24 00:00:00-05:00,411.02,428.76,410.64,427.99,220033458.0,2316988.0,419.89052
SPY,2022-02-25 00:00:00-05:00,429.61,437.84,427.86,437.75,123640399.0,1140051.0,434.397245


In [111]:
bars_df = source_df.copy()
# pct_change is profit from last close
bars_df["pct_change"] = bars_df["close"].pct_change()
# signal for when we want to be in or out of a stock
#bars_df["signal"] = np.where(bars_df["pct_change"] > 0, 1.0, 0.0)
# reaction is the signal diff
#bars_df["reaction"] = bars_df["signal"].diff()
# action is if we could perfectly predict the next close
#bars_df["action"] = bars_df["reaction"].shift(-1)
# these values are the high, low, and open as a percentage of the current close
bars_df["high %"] = (bars_df["high"] - bars_df["close"])/bars_df["close"]
bars_df["low %"] = (bars_df["low"] - bars_df["close"])/bars_df["close"]
bars_df["open %"] = (bars_df["open"] - bars_df["close"])/bars_df["close"]

In [112]:
bars_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 1549 entries, ('SPY', Timestamp('2016-01-04 00:00:00-0500', tz='America/New_York')) to ('SPY', Timestamp('2022-02-25 00:00:00-0500', tz='America/New_York'))
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   open         1549 non-null   float64
 1   high         1549 non-null   float64
 2   low          1549 non-null   float64
 3   close        1549 non-null   float64
 4   volume       1549 non-null   float64
 5   trade_count  1549 non-null   float64
 6   vwap         1549 non-null   float64
 7   pct_change   1548 non-null   float64
 8   high %       1549 non-null   float64
 9   low %        1549 non-null   float64
 10  open %       1549 non-null   float64
dtypes: float64(11)
memory usage: 182.3+ KB


In [113]:
# Cleanup DF for model

bars_df = bars_df.droplevel(level=0).dropna()
bars_df.head()

Unnamed: 0_level_0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,open %
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2016-01-05 00:00:00-05:00,201.405,201.9,200.05,201.36,112719152.0,418709.0,201.08428,0.001695,0.002682,-0.006506,0.000223
2016-01-06 00:00:00-05:00,198.33,200.06,197.6,198.82,153948196.0,548386.0,198.95646,-0.012614,0.006237,-0.006136,-0.002465
2016-01-07 00:00:00-05:00,195.33,197.44,193.59,194.05,216191953.0,796451.0,195.345911,-0.023992,0.01747,-0.002371,0.006596
2016-01-08 00:00:00-05:00,195.19,195.85,191.58,191.923,216105404.0,754102.0,193.644537,-0.010961,0.020461,-0.001787,0.017022
2016-01-11 00:00:00-05:00,193.01,193.41,189.82,192.11,205368067.0,701548.0,191.757659,0.000974,0.006767,-0.01192,0.004685


In [114]:
# Normalize data set

scaler = StandardScaler()
bars_df_scaled = scaler.fit_transform(bars_df)


In [115]:
# Define constants

# input shape

input_shape = bars_df_scaled.shape[1]
latent_dim = 11
num_samples = bars_df_scaled.shape[0]
batch_size = 16


In [116]:
# Define generator (Sequential)

build_generator = Sequential([
    Dense(128,input_shape=(input_shape,), activation="relu"),
    Dense(256, activation="relu"),
    Dense(512, activation="relu"),
    Dense(input_shape, activation="sigmoid")
])

# Define descriminator

build_discriminator = Sequential([
    Dense(512, input_shape=(input_shape,), activation="relu"),
    Dense(256, activation="relu"),
    Dense(128, activation="relu"),
    Dense(1, activation="sigmoid")
])

In [117]:
# Compile generator
build_generator.compile(loss = "mse", optimizer="adam")

# Compile discriminator
build_discriminator.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [118]:
# Combine models

# Confirm that discriminator weights are not training during generators training

build_discriminator.trainable=False

# Setup for generators data

z = tf.keras.Input(shape=(latent_dim,))

# Generated data by the generator

generated_data = build_generator(z)

# Discriminators verdict

validity = build_discriminator(generated_data)

In [119]:
# Define combined models

combined = tf.keras.Model(z, validity)
combined.compile(optimizer="adam", loss="mse", metrics=["accuracy"])

# Print summary of the combined model
combined.summary()

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 11)]              0         
                                                                 
 sequential_6 (Sequential)   (None, 11)                171787    
                                                                 
 sequential_7 (Sequential)   (None, 1)                 170497    
                                                                 
Total params: 342284 (1.31 MB)
Trainable params: 171787 (671.04 KB)
Non-trainable params: 170497 (666.00 KB)
_________________________________________________________________


In [120]:
# Define training loop

epochs = 10000

generator_weights_path = "./generator_model.h5"
discriminator_weights_path = "./discriminator_model.h5"

for epoch in range(epochs):

    # Train discriminator
    # Sample real data
    idx = np.random.choice(num_samples, batch_size, replace=False)
    real_data = bars_df_scaled[idx]

 # Generate fake data (Random Walk noise)

    gaussian_noise = np.random.normal(0,1,size=(batch_size,input_shape))
    random_walk_noise = np.cumsum(gaussian_noise, axis=0)

    fake_data = build_generator.predict(random_walk_noise)

    # Train discriminator

    d_loss_real = build_discriminator.train_on_batch(real_data, np.ones((batch_size, 1)))
    d_loss_fake = build_discriminator.train_on_batch(fake_data, np.zeros((batch_size, 1)))
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
    
    # Train generator 

    noise = np.random.normal(0, 1, (batch_size, input_shape))
    g_loss = combined.train_on_batch(noise, np.ones((batch_size, 1)))
    
    # Print progress

    print(f"Epoch {epoch}, Discriminator Loss: {d_loss}, Generator Loss: {g_loss}")




2024-02-26 21:04:40.463935: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2024-02-26 21:04:40.602385: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 0, Discriminator Loss: [0.83127254 0.3125    ], Generator Loss: [0.23217466473579407, 1.0]


2024-02-26 21:04:40.901925: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 1, Discriminator Loss: [0.71004203 0.46875   ], Generator Loss: [0.3342113196849823, 0.0]
Epoch 2, Discriminator Loss: [0.57701379 0.84375   ], Generator Loss: [0.5027201771736145, 0.0]
Epoch 3, Discriminator Loss: [0.60114183 0.75      ], Generator Loss: [0.5922297239303589, 0.0]
Epoch 4, Discriminator Loss: [0.53986181 0.84375   ], Generator Loss: [0.6899710893630981, 0.0]
Epoch 5, Discriminator Loss: [0.51843806 0.8125    ], Generator Loss: [0.73101407289505, 0.0]
Epoch 6, Discriminator Loss: [0.42764026 0.8125    ], Generator Loss: [0.7907794713973999, 0.0]
Epoch 7, Discriminator Loss: [0.288794 0.90625 ], Generator Loss: [0.854180097579956, 0.0]
Epoch 8, Discriminator Loss: [0.47024684 0.84375   ], Generator Loss: [0.8406004905700684, 0.0]
Epoch 9, Discriminator Loss: [0.42950429 0.75      ], Generator Loss: [0.8517748117446899, 0.0]
Epoch 10, Discriminator Loss: [0.68373026 0.8125    ], Generator Loss: [0.822345495223999, 0.0]
Epoch 11, Discriminator Loss: [0.44736055 0.812

In [121]:
# Saving Model Weights

build_generator.save("generator_model.h5")
build_discriminator.save("discriminator_model.h5")

  saving_api.save_model(


In [122]:
# Create alpaca object for testing

timeframe = TimeFrame(1, TimeFrameUnit.Day)
symbol = 'SPY'
start = datetime.utcnow() - timedelta(days=730)
end=datetime.utcnow() - timedelta(days=1)
request = StockBarsRequest(symbol_or_symbols=symbol, start=start, end=end, timeframe=timeframe)

In [123]:
# Create df for testing

test_df = client.get_stock_bars(request).df.tz_convert('America/New_York', level=1)

# data preprocessing for testing

# pct_change is profit from last close
test_df["pct_change"] = test_df["close"].pct_change()
# signal for when we want to be in or out of a stock
#bars_df["signal"] = np.where(bars_df["pct_change"] > 0, 1.0, 0.0)
# reaction is the signal diff
#bars_df["reaction"] = bars_df["signal"].diff()
# action is if we could perfectly predict the next close
#bars_df["action"] = bars_df["reaction"].shift(-1)
# these values are the high, low, and open as a percentage of the current close
test_df["high %"] = (test_df["high"] - test_df["close"])/test_df["close"]
test_df["low %"] = (test_df["low"] - test_df["close"])/test_df["close"]
test_df["open %"] = (test_df["open"] - test_df["close"])/test_df["close"]

# set timestamp as index, drop nan

test_df = test_df.droplevel(level=0).dropna()

test_df.info()
display(test_df.head())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 499 entries, 2022-03-01 00:00:00-05:00 to 2024-02-23 00:00:00-05:00
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   open         499 non-null    float64
 1   high         499 non-null    float64
 2   low          499 non-null    float64
 3   close        499 non-null    float64
 4   volume       499 non-null    float64
 5   trade_count  499 non-null    float64
 6   vwap         499 non-null    float64
 7   pct_change   499 non-null    float64
 8   high %       499 non-null    float64
 9   low %        499 non-null    float64
 10  open %       499 non-null    float64
dtypes: float64(11)
memory usage: 46.8 KB


Unnamed: 0_level_0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,open %
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-03-01 00:00:00-05:00,435.04,437.17,427.11,429.98,139991559.0,1422070.0,431.644816,-0.01523,0.016722,-0.006675,0.011768
2022-03-02 00:00:00-05:00,432.37,439.72,431.57,437.89,120234911.0,1252759.0,436.251696,0.018396,0.004179,-0.014433,-0.012606
2022-03-03 00:00:00-05:00,440.47,441.11,433.8,435.71,105501865.0,1039162.0,436.908456,-0.004978,0.012394,-0.004384,0.010925
2022-03-04 00:00:00-05:00,431.75,433.37,427.88,432.17,113974262.0,1083494.0,430.920534,-0.008125,0.002777,-0.009927,-0.000972
2022-03-07 00:00:00-05:00,431.55,432.3018,419.36,419.43,138047941.0,1448623.0,423.968129,-0.029479,0.030689,-0.000167,0.028896


In [124]:
# Normalize test data set

test_df_scaled = scaler.fit_transform(test_df)

print(test_df_scaled)
print(len(test_df_scaled))

[[ 0.44939782  0.43657467  0.28426178 ...  1.4931797   0.08293824
   1.25569624]
 [ 0.36620036  0.5174457   0.42062882 ... -0.31872284 -1.15281987
  -1.26945759]
 [ 0.61859714  0.56152834  0.48881235 ...  0.86793541  0.44787461
   1.1683306 ]
 ...
 [ 2.33084439  2.34576518  2.3160084  ... -0.87595423 -0.02318329
  -0.33644817]
 [ 2.59850961  2.69842629  2.60525329 ... -0.64063682 -0.2599813
  -0.67592284]
 [ 2.76241172  2.75043746  2.73000161 ... -0.27388488  0.91089219
   0.3262019 ]]
499


In [125]:
# Test generator against real data

#lists to store generated and timestamps

column_names=[
    "Predicted Open",
    "Predicted High",
    "Predicted Low",
    "Predicted Close",
    "Predicted Volume",
    "Predicted Trade Count",
    "Predicted VWAP",
    "Predicted pct_change",
    "Predicted high %",
    "Predicted low %",
    "Predicted open %"
    ]

# sliding window

window_size = 1

# init lists to store generated data with associated timestamps

timestamps = []
predictions = []

In [126]:
# shape of scaled test data

print(test_df_scaled.shape)

(499, 11)


In [127]:
predictions_df = pd.DataFrame(columns = column_names)

predictions_df[:] = 0

In [128]:
print(test_df.index)
print(window_size)

DatetimeIndex(['2022-03-01 00:00:00-05:00', '2022-03-02 00:00:00-05:00',
               '2022-03-03 00:00:00-05:00', '2022-03-04 00:00:00-05:00',
               '2022-03-07 00:00:00-05:00', '2022-03-08 00:00:00-05:00',
               '2022-03-09 00:00:00-05:00', '2022-03-10 00:00:00-05:00',
               '2022-03-11 00:00:00-05:00', '2022-03-14 00:00:00-04:00',
               ...
               '2024-02-09 00:00:00-05:00', '2024-02-12 00:00:00-05:00',
               '2024-02-13 00:00:00-05:00', '2024-02-14 00:00:00-05:00',
               '2024-02-15 00:00:00-05:00', '2024-02-16 00:00:00-05:00',
               '2024-02-20 00:00:00-05:00', '2024-02-21 00:00:00-05:00',
               '2024-02-22 00:00:00-05:00', '2024-02-23 00:00:00-05:00'],
              dtype='datetime64[ns, America/New_York]', name='timestamp', length=499, freq=None)
1


In [129]:
# Prediction loop

for i in range(0,len(test_df_scaled),window_size):
    print(i)
    # extract sliding window of real data

    input_data = test_df_scaled[i:i+window_size]

    # predict the next row using generator

    shaped_data = input_data.reshape(1,window_size,11)
    print(shaped_data)

    predicted_row = build_generator.predict(shaped_data)
    
    #predicted_row = build_generator.predict(input_data)

    #append generated row to list
    # predictions.append(predicted_row)

    # extract timestamp for the prediction

    if (i + window_size) < len(test_df):
        timestamp = test_df.index[i+window_size]
    else:
        timestamp = test_df.index[-1] + timedelta(days=1)
    
    print(timestamp)
    print(predicted_row.squeeze())
    predictions_df.loc[timestamp] = predicted_row.squeeze()
    print(predictions_df.head())
    

    # predictions_df = pd.concat([predictions_df,predicted_row],ignore_index=True)


0
[[[ 0.44939782  0.43657467  0.28426178  0.28516393  2.23533508
    3.67900843  0.34211291 -1.30188808  1.4931797   0.08293824
    1.25569624]]]
2022-03-02 00:00:00-05:00
[1.         1.         1.         1.         0.         1.
 1.         0.10321881 1.         1.         1.        ]
                           Predicted Open  Predicted High  Predicted Low  \
2022-03-02 00:00:00-05:00             1.0             1.0            1.0   

                           Predicted Close  Predicted Volume  \
2022-03-02 00:00:00-05:00              1.0               0.0   

                           Predicted Trade Count  Predicted VWAP  \
2022-03-02 00:00:00-05:00                    1.0             1.0   

                           Predicted pct_change  Predicted high %  \
2022-03-02 00:00:00-05:00              0.103219               1.0   

                           Predicted low %  Predicted open %  
2022-03-02 00:00:00-05:00              1.0               1.0  
1
[[[ 0.36620036  0.5174457 

2024-02-26 21:15:59.618510: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


2022-03-10 00:00:00-05:00
[1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1.]
                           Predicted Open  Predicted High  Predicted Low  \
2022-03-02 00:00:00-05:00             1.0             1.0            1.0   
2022-03-03 00:00:00-05:00             1.0             1.0            1.0   
2022-03-04 00:00:00-05:00             1.0             1.0            1.0   
2022-03-07 00:00:00-05:00             1.0             1.0            1.0   
2022-03-08 00:00:00-05:00             1.0             1.0            1.0   

                           Predicted Close  Predicted Volume  \
2022-03-02 00:00:00-05:00              1.0      0.000000e+00   
2022-03-03 00:00:00-05:00              1.0      0.000000e+00   
2022-03-04 00:00:00-05:00              1.0      1.194250e-34   
2022-03-07 00:00:00-05:00              1.0      2.936097e-35   
2022-03-08 00:00:00-05:00              1.0      0.000000e+00   

                           Predicted Trade Count  Predicted VWAP  \
2022-03-02 00:00:00-05:00    

In [130]:
index = predictions_df.index

predictions_df_inverse_scaled = scaler.inverse_transform(predictions_df)
generated_data_df = pd.DataFrame(predictions_df_inverse_scaled, columns=column_names,index=index)
generated_data_df.head()



Unnamed: 0,Predicted Open,Predicted High,Predicted Low,Predicted Close,Predicted Volume,Predicted Trade Count,Predicted VWAP,Predicted pct_change,Predicted high %,Predicted low %,Predicted open %
2022-03-02 00:00:00-05:00,452.710114,454.93576,450.518829,453.008636,85364928.0,840040.375,452.755493,0.001612,0.013308,-0.000917,0.0093
2022-03-03 00:00:00-05:00,452.710114,454.93576,450.518829,453.008636,85364928.0,840040.375,452.755493,0.012361,0.013308,-0.000917,0.000132
2022-03-04 00:00:00-05:00,452.710114,454.93576,450.518829,453.008636,85364928.0,840040.375,452.755493,0.000378,0.013308,-0.000917,0.0093
2022-03-07 00:00:00-05:00,452.710114,454.93576,450.518829,453.008636,85364928.0,840040.375,452.755493,0.012361,0.013308,-0.000917,-0.000353
2022-03-08 00:00:00-05:00,452.710114,454.93576,450.518829,453.008636,85364928.0,840040.375,452.755493,0.000375,0.013308,-0.000917,0.0093


In [131]:
test_df.head()

Unnamed: 0_level_0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,open %
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-03-01 00:00:00-05:00,435.04,437.17,427.11,429.98,139991559.0,1422070.0,431.644816,-0.01523,0.016722,-0.006675,0.011768
2022-03-02 00:00:00-05:00,432.37,439.72,431.57,437.89,120234911.0,1252759.0,436.251696,0.018396,0.004179,-0.014433,-0.012606
2022-03-03 00:00:00-05:00,440.47,441.11,433.8,435.71,105501865.0,1039162.0,436.908456,-0.004978,0.012394,-0.004384,0.010925
2022-03-04 00:00:00-05:00,431.75,433.37,427.88,432.17,113974262.0,1083494.0,430.920534,-0.008125,0.002777,-0.009927,-0.000972
2022-03-07 00:00:00-05:00,431.55,432.3018,419.36,419.43,138047941.0,1448623.0,423.968129,-0.029479,0.030689,-0.000167,0.028896


In [132]:
bars_df = pd.concat([test_df,generated_data_df],axis=1)

In [133]:
bars_df.head()

Unnamed: 0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,...,Predicted High,Predicted Low,Predicted Close,Predicted Volume,Predicted Trade Count,Predicted VWAP,Predicted pct_change,Predicted high %,Predicted low %,Predicted open %
2022-03-01 00:00:00-05:00,435.04,437.17,427.11,429.98,139991559.0,1422070.0,431.644816,-0.01523,0.016722,-0.006675,...,,,,,,,,,,
2022-03-02 00:00:00-05:00,432.37,439.72,431.57,437.89,120234911.0,1252759.0,436.251696,0.018396,0.004179,-0.014433,...,454.93576,450.518829,453.008636,85364928.0,840040.375,452.755493,0.001612,0.013308,-0.000917,0.0093
2022-03-03 00:00:00-05:00,440.47,441.11,433.8,435.71,105501865.0,1039162.0,436.908456,-0.004978,0.012394,-0.004384,...,454.93576,450.518829,453.008636,85364928.0,840040.375,452.755493,0.012361,0.013308,-0.000917,0.000132
2022-03-04 00:00:00-05:00,431.75,433.37,427.88,432.17,113974262.0,1083494.0,430.920534,-0.008125,0.002777,-0.009927,...,454.93576,450.518829,453.008636,85364928.0,840040.375,452.755493,0.000378,0.013308,-0.000917,0.0093
2022-03-07 00:00:00-05:00,431.55,432.3018,419.36,419.43,138047941.0,1448623.0,423.968129,-0.029479,0.030689,-0.000167,...,454.93576,450.518829,453.008636,85364928.0,840040.375,452.755493,0.012361,0.013308,-0.000917,-0.000353


In [134]:
test_shift_df = test_df.copy()

test_shift_df = test_shift_df.rename(columns={
    "open": "next open",
    "high": "next high",
    "low": "next low",
    "close": "next close",
    "volumne": "next volume",
    "trade_count": "next trade_count",
    "vwap": "next vwap",
    "pct_change": "next pct_change",
    "high %": "next high %",
    "low %": "next low %",
    "open %": "next open %"
})

test_shift_df.head()


Unnamed: 0_level_0,next open,next high,next low,next close,volume,next trade_count,next vwap,next pct_change,next high %,next low %,next open %
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-03-01 00:00:00-05:00,435.04,437.17,427.11,429.98,139991559.0,1422070.0,431.644816,-0.01523,0.016722,-0.006675,0.011768
2022-03-02 00:00:00-05:00,432.37,439.72,431.57,437.89,120234911.0,1252759.0,436.251696,0.018396,0.004179,-0.014433,-0.012606
2022-03-03 00:00:00-05:00,440.47,441.11,433.8,435.71,105501865.0,1039162.0,436.908456,-0.004978,0.012394,-0.004384,0.010925
2022-03-04 00:00:00-05:00,431.75,433.37,427.88,432.17,113974262.0,1083494.0,430.920534,-0.008125,0.002777,-0.009927,-0.000972
2022-03-07 00:00:00-05:00,431.55,432.3018,419.36,419.43,138047941.0,1448623.0,423.968129,-0.029479,0.030689,-0.000167,0.028896


In [135]:
test_shift_df = test_shift_df.shift(-1)

In [136]:
bars_df = bars_df.dropna()
bars_df.head()

Unnamed: 0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,...,Predicted High,Predicted Low,Predicted Close,Predicted Volume,Predicted Trade Count,Predicted VWAP,Predicted pct_change,Predicted high %,Predicted low %,Predicted open %
2022-03-02 00:00:00-05:00,432.37,439.72,431.57,437.89,120234911.0,1252759.0,436.251696,0.018396,0.004179,-0.014433,...,454.93576,450.518829,453.008636,85364928.0,840040.375,452.755493,0.001612,0.013308,-0.000917,0.0093
2022-03-03 00:00:00-05:00,440.47,441.11,433.8,435.71,105501865.0,1039162.0,436.908456,-0.004978,0.012394,-0.004384,...,454.93576,450.518829,453.008636,85364928.0,840040.375,452.755493,0.012361,0.013308,-0.000917,0.000132
2022-03-04 00:00:00-05:00,431.75,433.37,427.88,432.17,113974262.0,1083494.0,430.920534,-0.008125,0.002777,-0.009927,...,454.93576,450.518829,453.008636,85364928.0,840040.375,452.755493,0.000378,0.013308,-0.000917,0.0093
2022-03-07 00:00:00-05:00,431.55,432.3018,419.36,419.43,138047941.0,1448623.0,423.968129,-0.029479,0.030689,-0.000167,...,454.93576,450.518829,453.008636,85364928.0,840040.375,452.755493,0.012361,0.013308,-0.000917,-0.000353
2022-03-08 00:00:00-05:00,419.62,427.21,415.12,416.25,167638902.0,1950904.0,419.707824,-0.007582,0.02633,-0.002715,...,454.93576,450.518829,453.008636,85364928.0,840040.375,452.755493,0.000375,0.013308,-0.000917,0.0093


In [137]:
bars_df = pd.concat([bars_df,test_shift_df],axis=1)
bars_df = bars_df.dropna()
bars_df.head()

Unnamed: 0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,...,next high,next low,next close,volume.1,next trade_count,next vwap,next pct_change,next high %,next low %,next open %
2022-03-02 00:00:00-05:00,432.37,439.72,431.57,437.89,120234911.0,1252759.0,436.251696,0.018396,0.004179,-0.014433,...,441.11,433.8,435.71,105501865.0,1039162.0,436.908456,-0.004978,0.012394,-0.004384,0.010925
2022-03-03 00:00:00-05:00,440.47,441.11,433.8,435.71,105501865.0,1039162.0,436.908456,-0.004978,0.012394,-0.004384,...,433.37,427.88,432.17,113974262.0,1083494.0,430.920534,-0.008125,0.002777,-0.009927,-0.000972
2022-03-04 00:00:00-05:00,431.75,433.37,427.88,432.17,113974262.0,1083494.0,430.920534,-0.008125,0.002777,-0.009927,...,432.3018,419.36,419.43,138047941.0,1448623.0,423.968129,-0.029479,0.030689,-0.000167,0.028896
2022-03-07 00:00:00-05:00,431.55,432.3018,419.36,419.43,138047941.0,1448623.0,423.968129,-0.029479,0.030689,-0.000167,...,427.21,415.12,416.25,167638902.0,1950904.0,419.707824,-0.007582,0.02633,-0.002715,0.008096
2022-03-08 00:00:00-05:00,419.62,427.21,415.12,416.25,167638902.0,1950904.0,419.707824,-0.007582,0.02633,-0.002715,...,429.51,422.82,427.41,117271076.0,1133573.0,426.179666,0.026811,0.004913,-0.010739,-0.005311


In [138]:
bars_df["High/Low Success"] = np.where(
    (bars_df["next close"] >= bars_df["Predicted Low"]),
    np.where(
        bars_df["close"] <= bars_df["Predicted High"], 1, 0
    ), 0
)

In [139]:
bars_df["Predicted Close Delta"] = bars_df["Predicted Close"] - bars_df["close"]
bars_df["Next Close Delta"] = bars_df["next close"] - bars_df["close"]
bars_df["Close Product"] = bars_df["Next Close Delta"] * bars_df["Predicted Close Delta"]
bars_df["Close Success"] = np.where(
    bars_df["Close Product"] >= 0, 1, 0
)

In [140]:


actual_close = bars_df.hvplot.line(
    x=bars_df.index,
    y="Next Close",
)

DataError: Dimensions may not reference duplicated DataFrame columns (found duplicate 'volume' columns). If you want to plot a column against itself simply declare two dimensions with the same name. 

PandasInterface expects tabular data, for more information on supported datatypes see http://holoviews.org/user_guide/Tabular_Datasets.html

In [None]:
predicted_high = bars_df.hvplot.line(
    x=bars_df.index,
    y="Predicted High",
)

In [None]:
predicted_low = bars_df.hvplot.line(
    x=bars_df.index,
    y="Predicted Low",
)

In [None]:
predicted_close = bars_df.hvplot.line(
    x=bars_df.index,
    y="Predicted Close",
)

In [None]:
actual_close * predicted_high * predicted_low * predicted_close

In [None]:
bars_df.describe()

In [None]:
bars_df["Action"] = np.where(
    bars_df["Next Close"] > bars_df["Close"], 1, -1
)

In [None]:
display(bars_df.head())
display(bars_df.tail())

In [None]:
current_position = 0
starting_cash = 1000
current_cash = starting_cash
max_position = 50
for index, row in bars_df.iterrows():
    # Get the current action
    close = row["close"]
    action = row["Action"]

    # Take the action if possible
    if action > 0:
        if action + current_position <= max_position and action * close < current_cash:
            current_position += action
            current_cash -= action*close
        else:
            action = 0
    elif action < 0:
        if action + current_position >= 0:
            current_position += action
            current_cash += -action*close
        else:
            action = 0

    # Update enabled and position
    bars_df.loc[index, "Position"] = current_position
    bars_df.loc[index, "Cash"] = current_cash

In [None]:
# Compute Holdings
bars_df["Holdings"] = bars_df["close"] * bars_df["Position"]

# Compute Profit
bars_df["Strategy Value"] = bars_df["Holdings"] + bars_df["Cash"]
starting_close = bars_df.iloc[0]["close"] 
display(f"starting_close {starting_close}")
starting_shares = math.floor(starting_cash / starting_close)
display(f"starting_shares {starting_shares}")
bars_df["Stock Value"] =  bars_df["close"] * starting_shares

# Compute Returns
bars_df["Stock Returns"] = bars_df["close"].pct_change()
bars_df["Strategy Returns"] = bars_df["Strategy Value"].pct_change()

# Compute Cumulative Daily Returns
bars_df["Stock Cumulative Returns"] = (bars_df["Stock Returns"] + 1).cumprod()
bars_df["Strategy Cumulative Returns"] = (bars_df["Strategy Returns"] + 1).cumprod()

In [None]:
bars_df.tail()

In [None]:
stock_cumulative_returns = bars_df.hvplot.line(
    x=bars_df.index,
    y="Stock Cumulative Returns",
)

In [None]:
strategy_cumulative_returns = bars_df.hvplot.line(
    x=bars_df.index,
    y="Strategy Cumulative Returns",
)

In [None]:
stock_cumulative_returns * strategy_cumulative_returns

In [None]:
cash = bars_df.hvplot.line(
    x=bars_df.index,
    y="Cash",
)

In [None]:
holdings = bars_df.hvplot.line(
    x=bars_df.index,
    y="Holdings",
)

In [None]:
cash*holdings

In [None]:
position = bars_df.hvplot.line(
    x=bars_df.index,
    y="Position",
)
position