In [1]:
import numpy as np
import os
import pandas as pd
import tensorflow as tf
#from tensorflow.keras import layers, models, optimizers
#from tensorflow.keras.layers import Dense, Reshape, Flatten, Conv2D, Conv2DTranspose
#from tensorflow.keras.models import Sequential
#from tensorflow.keras.optimizers import Adam
from keras.optimizers.legacy import Adam
from keras.models import Sequential
from keras.layers import Dense, Reshape, Flatten, Conv2D, Conv2DTranspose


from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler

from datetime import datetime, timedelta
from dotenv import load_dotenv

from alpaca.data.requests import StockBarsRequest
from alpaca.data.historical.stock import StockHistoricalDataClient
from alpaca.data.timeframe import TimeFrame, TimeFrameUnit

In [2]:
# Load .env file

load_dotenv()

True

In [3]:
# Set Alpaca API key and secret

ALPACA_API_KEY = os.getenv("ALPACA_API_KEY")
ALPACA_SECRET_KEY = os.getenv("ALPACA_API_SECRET")
client = StockHistoricalDataClient(ALPACA_API_KEY, ALPACA_SECRET_KEY)

# Create the Alpaca API object

timeframe = TimeFrame(5, TimeFrameUnit.Minute)
symbol = 'TSLA'
start = datetime.utcnow() - timedelta(days=5)
end=datetime.utcnow() - timedelta(days=1)
request = StockBarsRequest(symbol_or_symbols=symbol, start=start, end=end, timeframe=timeframe)

In [4]:
source_df = client.get_stock_bars(request).df.tz_convert('America/New_York', level=1)

In [5]:
display(source_df.tail())

Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close,volume,trade_count,vwap
symbol,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
TSLA,2024-02-21 15:10:00-05:00,192.73,192.99,192.62,192.9199,517743.0,4942.0,192.84758
TSLA,2024-02-21 15:15:00-05:00,192.88,193.285,192.57,193.0839,722334.0,6472.0,192.969811
TSLA,2024-02-21 15:20:00-05:00,193.1,193.1199,192.6,193.0515,651434.0,5880.0,192.885015
TSLA,2024-02-21 15:25:00-05:00,193.045,193.53,193.025,193.36,735144.0,6787.0,193.333905
TSLA,2024-02-21 15:30:00-05:00,193.37,193.8499,193.285,193.74,808397.0,7811.0,193.628305


In [6]:
bars_df = source_df.copy()
# pct_change is profit from last close
bars_df["pct_change"] = bars_df["close"].pct_change()
# signal for when we want to be in or out of a stock
#bars_df["signal"] = np.where(bars_df["pct_change"] > 0, 1.0, 0.0)
# reaction is the signal diff
#bars_df["reaction"] = bars_df["signal"].diff()
# action is if we could perfectly predict the next close
#bars_df["action"] = bars_df["reaction"].shift(-1)
# these values are the high, low, and open as a percentage of the current close
bars_df["high %"] = (bars_df["high"] - bars_df["close"])/bars_df["close"]
bars_df["low %"] = (bars_df["low"] - bars_df["close"])/bars_df["close"]
bars_df["open %"] = (bars_df["open"] - bars_df["close"])/bars_df["close"]

In [7]:
bars_df.info()
display(bars_df.head())

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 328 entries, ('TSLA', Timestamp('2024-02-20 04:00:00-0500', tz='America/New_York')) to ('TSLA', Timestamp('2024-02-21 15:30:00-0500', tz='America/New_York'))
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   open         328 non-null    float64
 1   high         328 non-null    float64
 2   low          328 non-null    float64
 3   close        328 non-null    float64
 4   volume       328 non-null    float64
 5   trade_count  328 non-null    float64
 6   vwap         328 non-null    float64
 7   pct_change   327 non-null    float64
 8   high %       328 non-null    float64
 9   low %        328 non-null    float64
 10  open %       328 non-null    float64
dtypes: float64(11)
memory usage: 40.0+ KB


Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,open %
symbol,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
TSLA,2024-02-20 04:00:00-05:00,196.98,198.23,196.37,197.3,45358.0,1353.0,197.439241,,0.004714,-0.004714,-0.001622
TSLA,2024-02-20 04:05:00-05:00,197.19,197.36,196.82,196.82,17128.0,523.0,197.04608,-0.002433,0.002744,0.0,0.00188
TSLA,2024-02-20 04:10:00-05:00,196.82,197.11,196.72,197.11,23857.0,548.0,196.912064,0.001473,0.0,-0.001979,-0.001471
TSLA,2024-02-20 04:15:00-05:00,197.15,197.5,197.06,197.24,17774.0,460.0,197.375357,0.00066,0.001318,-0.000913,-0.000456
TSLA,2024-02-20 04:20:00-05:00,197.18,197.29,197.0,197.0,7822.0,367.0,197.117828,-0.001217,0.001472,0.0,0.000914


In [8]:
# Cleanup DF for model

bars_df = bars_df.droplevel(level=0)
bars_df = bars_df.dropna()
bars_df.head()

Unnamed: 0_level_0,open,high,low,close,volume,trade_count,vwap,pct_change,high %,low %,open %
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2024-02-20 04:05:00-05:00,197.19,197.36,196.82,196.82,17128.0,523.0,197.04608,-0.002433,0.002744,0.0,0.00188
2024-02-20 04:10:00-05:00,196.82,197.11,196.72,197.11,23857.0,548.0,196.912064,0.001473,0.0,-0.001979,-0.001471
2024-02-20 04:15:00-05:00,197.15,197.5,197.06,197.24,17774.0,460.0,197.375357,0.00066,0.001318,-0.000913,-0.000456
2024-02-20 04:20:00-05:00,197.18,197.29,197.0,197.0,7822.0,367.0,197.117828,-0.001217,0.001472,0.0,0.000914
2024-02-20 04:25:00-05:00,197.0,197.0,196.46,196.67,23537.0,625.0,196.697067,-0.001675,0.001678,-0.001068,0.001678


In [9]:
# Define constants

latent_dim = 100
height, width, channels = 28,28,1

In [10]:
# Normalize data set

scaler = StandardScaler()
bars_df_scaled = scaler.fit_transform(bars_df)


In [11]:
# Define generator

def build_generator():
    generator = Sequential([
        Dense(128, input_shape=(latent_dim,)),
        Reshape((1, 1, 128)),
        Conv2DTranspose(256, (4, 4), strides=(1, 1), padding='valid', activation='relu'),
        Conv2DTranspose(128, (4, 4), strides=(2, 2), padding='same', activation='relu'),
        Conv2DTranspose(64, (4, 4), strides=(2, 2), padding='same', activation='relu'),
        Conv2DTranspose(1, (4, 4), strides=(2, 2), padding='same', activation='tanh')
    ])
    return generator

# Define descriminator

def build_discriminator():
    discriminator = Sequential([
        Conv2D(64, (4, 4), strides=(2, 2), padding='same', input_shape=(height, width, channels)),
        Conv2D(128, (4, 4), strides=(2, 2), padding='same', activation='relu'),
        Conv2D(256, (4, 4), strides=(2, 2), padding='same', activation='relu'),
        Conv2D(512, (4, 4), strides=(2, 2), padding='same', activation='relu'),
        Flatten(),
        Dense(1)
    ])
    return discriminator

In [12]:
# build and compile generator

generator = build_generator()

#build and compile descriminator

discriminator = build_discriminator()
discriminator.compile(loss="mse", optimizer=Adam(learning_rate=0.0002,beta_1=0.5))

# Combine models

z = tf.keras.Input(shape=(latent_dim,))
img = generator(z)
discriminator.trainable = False
validity = discriminator(img)
combined = tf.keras.Model(z,validity)
combined.compile(loss="mse", optimizer=Adam(learning_rate=0.0002,beta_1=0.5))

2024-02-22 14:31:51.803538: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Max
2024-02-22 14:31:51.803562: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 64.00 GB
2024-02-22 14:31:51.803570: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 24.00 GB
2024-02-22 14:31:51.803603: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-02-22 14:31:51.803619: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [13]:
# Training loop
epochs = 100
batch_size = 128

for epoch in range(epochs):

    # Train Discriminator
    idx = np.random.randint(0, bars_df_scaled.shape[0], batch_size)
    imgs = bars_df_scaled[idx]
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    gen_imgs = generator.predict(noise)

    d_loss_real = discriminator.train_on_batch(imgs, np.ones((batch_size, 1)))
    d_loss_fake = discriminator.train_on_batch(gen_imgs, np.zeros((batch_size, 1)))
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # Train Generator
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    g_loss = combined.train_on_batch(noise, np.ones((batch_size, 1)))

    # Print progress
    print(f"Epoch {epoch}, Discriminator Loss: {d_loss}, Generator Loss: {g_loss}")

: 

: 

In [None]:
# # Define GridSearch Paramters for both generator and discriminator

# generator_grid = {
#     "num_layers":[2,3],
#     "layer_size":[128,256],
#     "activation":["relu","tanh"],
#     "output_activation":["tanh","sigmoid"]
# }

# discriminator_grid = {
#     "num_layers":[2,3],
#     "layer_size":[128,256],
#     "activation":["relu","tanh"],
# }

In [None]:
# # Define constants

# latent_dim = 100
# input_shape = bars_df.shape[1]
# output_shape = bars_df.shape[1]
# num_samples = 1000
# gaussian_noise = np.random.normal(0,1,size=(num_samples, latent_dim))
# random_walk_noise = np.cumsum(gaussian_noise,axis=0)

In [None]:
# # define model building functions

# def build_generator(num_layers,layer_size,activation,output_activation):
#     model = models.Sequential()
#     model.add(layers.Dense(layer_size, activation = activation, input_dim=num_layers))

#     for _ in range(num_layers-1):
#         model.add(layers.Dense(layer_size, activation = activation))

#     model.add(layers.Dense(output_shape,activation = output_activation))

#     pass

# # def build_discriminator(layers,layer_size,activation):

# def build_discriminator(num_layers,layer_size,activation):
#     model = models.Sequential()
#     model.add(layers.Dense(layer_size, activation = activation, input_dim = input_shape))

#     for _ in range(num_layers - 1):
#         model.add(layers.Dense(layer_size, activation = activation))

#     model.add(layers.Dense(1))

#     pass

In [None]:
# Normalize data set

scaler = StandardScaler()
bars_df_scaled = scaler.fit_transform(bars_df)


In [None]:
from sklearn.base import BaseEstimator

class GeneratorWrapper(BaseEstimator):
    def __init__(self, num_layers, layer_size, activation, output_activation):
        self.num_layers = num_layers
        self.layer_size = layer_size
        self.activation = activation
        self.output_activation = output_activation
        self.model = None

    def fit(self, X, y=None):
        # Build generator model
        self.model = build_generator(self.num_layers, self.layer_size, self.activation, self.output_activation)
        # Compile the model
        self.model.compile(loss='binary_crossentropy', optimizer='adam')
        # Fit the model
        self.model.fit(X, X, epochs=3, batch_size=64, verbose=0)  # Assuming autoencoder-like training
        return self

class DiscriminatorWrapper(BaseEstimator):
    def __init__(self, num_layers=2, layer_size=128, activation='relu'):
        self.num_layers = num_layers
        self.layer_size = layer_size
        self.activation = activation
        self.model = None

    def fit(self, X, y=None):
        # Build discriminator model
        self.model = build_discriminator(self.num_layers, self.layer_size, self.activation)
        # Compile the model
        self.model.compile(loss='binary_crossentropy', optimizer='adam')
        # Fit the model
        self.model.fit(X, y, epochs=3, batch_size=64, verbose=0)  # Assuming binary classification
        return self

In [None]:
# # Perform girdsearch for generator

# generator_search = GridSearchCV(estimator=build_generator, param_grid=generator_grid, cv = 3)
# generator_search.fit(random_walk_noise, bars_df_scaled)
# best_generator_params = generator_search.best_params_

# # get the best output layer activation from gridsearch

# output_activation = best_generator_params.pop("output_activation")

# # Build the generator with the best parameters
# generator = build_generator(latent_dim, output_shape, output_activation, **best_generator_params)

# # Perform grid search for discriminator
# discriminator_search = GridSearchCV(estimator=build_discriminator, param_grid=discriminator_grid, cv=3)
# discriminator_search.fit(random_walk_noise,bars_df_scaled)
# best_discriminator_params = discriminator_search.best_params_

# # Build the discriminator with the best parameters
# discriminator = build_discriminator(**best_discriminator_params)



In [None]:
# Create GridSearchCV instance for the generator
generator_search = GridSearchCV(estimator=GeneratorWrapper(), param_grid=generator_grid, cv=3)
generator_search.fit(random_walk_noise, bars_df_scaled)
best_generator_params = generator_search.best_params_

# Create GridSearchCV instance for the discriminator
discriminator_search = GridSearchCV(estimator=DiscriminatorWrapper(), param_grid=discriminator_grid, cv=3)
discriminator_search.fit(bars_df_scaled)
best_discriminator_params = discriminator_search.best_params_

print("Best parameters for the generator:", best_generator_params)
print("Best parameters for the discriminator:", best_discriminator_params)

TypeError: GeneratorWrapper.__init__() missing 4 required positional arguments: 'num_layers', 'layer_size', 'activation', and 'output_activation'