In [None]:
# Import the required libraries
import numpy as np
import pandas as pd
import hvplot.pandas
from pathlib import Path
from finta import TA
from pandas.tseries.offsets import DateOffset
import os
import requests
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Dropout

%matplotlib inline

In [None]:
# Import indicator dataframe
# Start with 1-minute
df = pd.read_csv("../algotrader2/resources/aapl_1min_indc_df.csv")
df.head()


In [None]:
# our df does not have date-time
hello = df['timestamp']
hello

In [None]:
# Create our timestamp column as a datetime index, then save it as our index
df['timestamp'] = pd.to_datetime(df['timestamp'])
hello = df['timestamp']
hello

In [None]:
df.set_index('timestamp', inplace=True)
df.head()

In [None]:
# X is everything except the new_signal column
X = df.drop(['new_signal'], axis=1)
# X = X.drop(["SQZMI", "FVE", "STC"], axis=1)
X.head()

In [None]:
# We should use the .shift() function so that our algorithm predicts the minute before realtime
# Drop the row with NaN values 
X = X.shift().dropna()

X.head()

We may look to consider what it would do if we changed the amount that we shifted by. Perhaps we tried predicting 5 minutes into the future... how about an hour?

In [None]:
# y is the NEW signal column
y = df[("new_signal")]


In [None]:
# Set start of training period
training_begin = X.index.min()

print(training_begin)

In [None]:
# Select ending period for the training data. Since we pulled a year's worth of data
# we will train on 9 months and then test with the rest
training_end = X.index.min() + DateOffset(months=9)

print(training_end)

In [None]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end:]
y_test = y.loc[training_end:]

In [None]:
# Create a StandardScaler instance
scaler = StandardScaler()
# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)
# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
display(X_train_scaled.shape)
display(X_test_scaled.shape)

In [None]:
X_train_scaled

In [None]:
X_test_scaled

In [None]:
y_train

In [None]:
y_test

### Now that we have scaled our data, we can build our neural network.

In [None]:
num_predictors = len(X.columns)

# We have 2 possible outcomes, and we are trying to predict the stock/indicators to be in position 0 or 1
num_classes = 1

num_predictors

In [None]:
nn_model = Sequential()

In [None]:
# Add dense layer(s)
nn_model.add(Dense(10, input_dim=num_predictors, activation='relu'))

In [None]:
# Add output layer with number of outputs equal to number of classes
nn_model.add(Dense(num_classes, activation="sigmoid"))

In [None]:
# Compile model
nn_model.compile(loss="binary_crossentropy",
              optimizer="adam",
              metrics=['accuracy'])

# Summarize model
nn_model.summary()

In [None]:
# Fit model
num_epochs = 50

nn_model.fit(X_train_scaled, y_train,
          epochs=num_epochs,
          batch_size=100,
          validation_split=0.2,     # This 'validation_split' is telling the neural network to keep 20% of the data to validate its score on the training set... this is to help AVOID OVERFITTING. 
          shuffle=True)

In [None]:
# Show model loss and accuracy

# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled, y_test, verbose=2)

# Display the evaluation results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Now we backtest with the TEST portion of the data

In [None]:
# Predict values using testing data
nn_test_predictions = nn_model.predict(X_test_scaled)
nn_train_predictions = nn_model.predict(X_train_scaled)

In [None]:
import numpy as np

# Convert probabilities to class labels (0 or 1) using 0.5 as the threshold
nn_test_predictions_labels = (nn_test_predictions > 0.5).astype(int)
nn_train_predictions_labels = (nn_train_predictions > 0.5).astype(int)


In [None]:
print(nn_train_predictions)
print(nn_train_predictions_labels)

In [None]:
print(nn_test_predictions)
print(nn_test_predictions_labels)

In [None]:
# Training classification report
train_class_report = classification_report(y_train, nn_train_predictions_labels)
print(train_class_report)

In [None]:
# Testing classification report
test_class_report = classification_report(y_test, nn_test_predictions_labels)
print(test_class_report)

In [None]:
# # Save model history for further manipulation
# model_history = model.history.model_history.keys()

In [None]:
# # Now we can plot the accuracy for training and validation

# training_results = pd.DataFrame(index=range(1, num_epochs+1))
# training_results['Training'] = model_history['categorical_accuracy']
# training_results['Validation'] = model_history['val_categorical_accuracy']
# training_results.plot(title = 'Training and Validation Performance')

### Now using the 3 minute data

In [None]:
# Import indicator dataframe
df = pd.read_csv("../algotrader2/resources/aapl_3min_indc_df.csv")
df.head()

# df = pd.read_csv("../algotrader2/resources/aapl_1min_pivot_point_indicator_df.csv")
# df.head()

In [None]:
# Create our timestamp column as a datetime index, then save it as our index
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)
# X is everything except the signal column
X = df.drop('new_signal', axis=1)
# We should use the .shift() function so that our algorithm predicts the minute before realtime
# Drop the row with NaN values 
X = X.shift().dropna()
display(X.head())
y = df[("new_signal")]
# Set start of training period
training_begin = X.index.min()
print(f"Start date: {training_begin}")
# Select ending period for the training data. Since we pulled a year's worth of data
# we will train on 9 months and then test with the rest
training_end = X.index.min() + DateOffset(months=9)
print(f"End date: {training_end}")
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end:]
y_test = y.loc[training_end:]

In [None]:
# Create a StandardScaler instance
scaler = StandardScaler()
# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)
# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
display(X_train_scaled.shape)
display(X_test_scaled.shape)

In [None]:
# NEURAL NETWORK
num_predictors = len(X.columns)
num_classes = 1
nn_model = Sequential()
# Add dense layer(s)
nn_model.add(Dense(10, input_dim=num_predictors, activation='relu'))
# Drop-out layer(s)
# nn_model.add(Dropout(.2,input_shape=(10,)))
# Add dense layer, add Regularization
#model.add(Dense(5, activation='relu', kernel_regularized=l2(0.01), bias_regularized=l2(0.01)))
# Add output layer
# Number of outputs equals number of classes
#nn_model.add(Dense(num_classes))
nn_model.add(Dense(num_classes, activation="sigmoid"))

In [None]:
# Compile model
nn_model.compile(loss="binary_crossentropy",
              optimizer="adam",
              metrics=['accuracy'])
# Summarize model
nn_model.summary()

In [None]:
# Fit model
num_epochs = 100

nn_model.fit(X_train_scaled, y_train,
          epochs=num_epochs,
          batch_size=100,
          validation_split=0.2,     # This 'validation_split' is telling the neural network to keep 20% of the data to validate its score on the training set... this is to help AVOID OVERFITTING. 
          shuffle=True)

In [None]:
# Show model loss and accuracy

# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled, y_test, verbose=2)
# Display the evaluation results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Predict values using testing data
nn_test_predictions = nn_model.predict(X_test_scaled)
nn_train_predictions = nn_model.predict(X_train_scaled)

In [None]:
# Convert probabilities to class labels (0 or 1) using 0.5 as the threshold
nn_test_predictions_labels = (nn_test_predictions > 0.5).astype(int)
nn_train_predictions_labels = (nn_train_predictions > 0.5).astype(int)

# Training classification report
train_class_report = classification_report(y_train, nn_train_predictions_labels)
print(train_class_report)

# Testing classification report
test_class_report = classification_report(y_test, nn_test_predictions_labels)
print(test_class_report)

In [None]:
# # Save model history for further manipulation
# model_history = model.history.model_history.keys()

### Now 15 minute

In [None]:
# Import indicator dataframe
df = pd.read_csv("../algotrader2/resources/aapl_15min_indc_df.csv")
df.head()

In [None]:
# Create our timestamp column as a datetime index, then save it as our index
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)
# X is everything except the signal column
X = df.drop('new_signal', axis=1)
# We should use the .shift() function so that our algorithm predicts the minute before realtime
# Drop the row with NaN values 
X = X.shift().dropna()
display(X.head())
y = df[("new_signal")]
# Set start of training period
training_begin = X.index.min()
print(f"Start date: {training_begin}")
# Select ending period for the training data. Since we pulled a year's worth of data
# we will train on 9 months and then test with the rest
training_end = X.index.min() + DateOffset(months=9)
print(f"End date: {training_end}")
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end:]
y_test = y.loc[training_end:]

In [None]:
# Create a StandardScaler instance
scaler = StandardScaler()
# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)
# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
display(X_train_scaled.shape)
display(X_test_scaled.shape)

In [None]:
# NEURAL NETWORK
num_predictors = len(X.columns)
num_classes = 1
nn_model = Sequential()
# Add dense layer(s)
nn_model.add(Dense(10, input_dim=num_predictors, activation='relu'))
# Drop-out layer(s)
# nn_model.add(Dropout(.2,input_shape=(10,)))
# Add dense layer, add Regularization
#model.add(Dense(5, activation='relu', kernel_regularized=l2(0.01), bias_regularized=l2(0.01)))
# Add output layer
# Number of outputs equals number of classes
#nn_model.add(Dense(num_classes))
nn_model.add(Dense(num_classes, activation="sigmoid"))

In [None]:
# Compile model
nn_model.compile(loss="binary_crossentropy",
              optimizer="adam",
              metrics=['accuracy'])

# Summarize model
nn_model.summary()

In [None]:
# Fit model
num_epochs = 100

nn_model.fit(X_train_scaled, y_train,
          epochs=num_epochs,
          batch_size=100,
          validation_split=0.2,     # This 'validation_split' is telling the neural network to keep 20% of the data to validate its score on the training set... this is to help AVOID OVERFITTING. 
          shuffle=True)

In [None]:
# Show model loss and accuracy

# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled, y_test, verbose=2)

# Display the evaluation results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Predict values using testing data
nn_test_predictions = nn_model.predict(X_test_scaled)
nn_train_predictions = nn_model.predict(X_train_scaled)

In [None]:
# Convert probabilities to class labels (0 or 1) using 0.5 as the threshold
nn_test_predictions_labels = (nn_test_predictions > 0.5).astype(int)
nn_train_predictions_labels = (nn_train_predictions > 0.5).astype(int)

# Training classification report
train_class_report = classification_report(y_train, nn_train_predictions_labels)
print(train_class_report)

# Testing classification report
test_class_report = classification_report(y_test, nn_test_predictions_labels)
print(test_class_report)

In [None]:
# # Save model history for further manipulation
# model_history = model.history.model_history.keys()

In [None]:
# # Deployment (Hypothetical)
# while True:
#     current_data = your_trading_api.get_real_time_data()
#     current_data_processed = preprocess_data(current_data)
#     prediction = model.predict(current_data_processed)
#     if prediction > some_threshold:
#         your_trading_api.execute_trade()

# # Placeholder Functions
# def combine_data(historical, news):
#     # Combine and return data
#     pass

# def split_data(data):
#     # Split and return data
#     pass

# def backtest_strategy(model, data):
#     # Implement backtesting logic
#     pass

# def preprocess_data(data):
#     # Data preprocessing steps
#     pass