In [None]:
# Initial imports
import alpaca_trade_api as tradeapi
from finta import TA
import numpy as np

from imblearn.under_sampling import RandomUnderSampler

import pandas as pd 
from pathlib import Path 
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler

# Import
import hvplot.pandas
from pathlib import Path

# Initial imports
from sklearn import tree
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

# Needed for decision tree visualization
import pydotplus
from IPython.display import Image

# Initial imports
from sklearn.ensemble import RandomForestClassifier

# Import the finta Python library and the TA module
from finta import TA
from pandas.tseries.offsets import DateOffset

In [None]:
# API credentials
ALPACA_API_KEY = 'PKETI0Q5C8PPUXVNHJFJ'
ALPACA_SECRET_KEY = 'WQxmEpBCoMXydCCg0G8cUe6hGLpgaAfrvedZ09Fy'
ALPACA_API_BASE_URL = "https://paper-api.alpaca.markets"

# Create a connection to the API 
api = tradeapi.REST(ALPACA_API_KEY, ALPACA_SECRET_KEY, ALPACA_API_BASE_URL, api_version="v2")

# Set signal variable
signal = 1

# Create buy signal, num shares and ticker
if signal == 1:
    orderSide = "buy"
else:
    orderSide = "sell"
    
# Set the ticket symbol and the number of shares to buy
ticker = "AAPL"
number_of_shares = 1

# Make API call
signals_df = api.get_bars(ticker, "5Min", "2022-08-22", "2022-10-14", adjustment='raw').df
# Reorganize the DataFrame
signals_df = pd.concat([signals_df], axis=1, keys=["TSLA"])

# Drop the Multi-Index from the DataFrame
signals_df.columns = signals_df.columns.droplevel(0)

# create a seperate dataframe for signals
signals_df

In [None]:
#  Setup EMAs for crosses
longest_MA_window = 200
signals_df["9EMA"] = TA.EMA(signals_df, 9)
signals_df["20EMA"] = TA.EMA(signals_df, 20)
signals_df["50EMA"] = TA.EMA(signals_df, 50)
signals_df["200SMA"] = TA.SMA(signals_df, longest_MA_window)

# Setup Indicators
signals_df["ATR"] = TA.ATR(signals_df)
bbands_df = TA.BBANDS(signals_df)
macd_df = TA.MACD(signals_df)
signals_df["RSI"] = TA.RSI(signals_df)

# join macd and bbands Dataframes to signals_df
bbands_df = pd.concat([bbands_df, macd_df], axis=1)
signals_df = pd.concat([signals_df, bbands_df], axis=1)
signals_df.drop(columns="SIGNAL", inplace=True)

# Review DataFrame
signals_df

In [None]:
# discrete or continuous features (techinal indicators) may be used
continuous_features = ["volume", "trade_count", "vwap", "9EMA", "20EMA", "50EMA", "200SMA", "ATR", "RSI", "BB_UPPER", "BB_LOWER", "MACD"]
discrete_features = ["Bollinger_Bands_Above_Upper_BB", "Bollinger_Bands_Below_Lower_BB", "9EMA/20EMA_Cross, 9EMA>20EMA", "9EMA/20EMA_Cross, 9EMA<20EMA", "50EMA/200SMA_Cross, 50EMA>200SMA", "50EMA/200SMA_Cross, 50EMA<200SMA", "RSI_Over_70", "RSI_Under_30", "VWAP_Cross_From_Above", "VWAP_Cross_From_Below"]
all_features = ["volume", "trade_count", "vwap", "9EMA", "20EMA", "50EMA", "200SMA", "ATR", "RSI", "BB_UPPER", "BB_MIDDLE", "BB_LOWER", "MACD", "Bollinger_Bands_Above_Upper_BB", "Bollinger_Bands_Below_Lower_BB", "9EMA/20EMA_Cross, 9EMA>20EMA", "9EMA/20EMA_Cross, 9EMA<20EMA", "50EMA/200SMA_Cross, 50EMA>200SMA", "50EMA/200SMA_Cross, 50EMA<200SMA", "RSI_Over_70", "RSI_Under_30", "VWAP_Cross_From_Above", "VWAP_Cross_From_Below"]

for feature in discrete_features:
    signals_df[feature] = 0.0

# Review DataFrame
signals_df

In [None]:
# Generate the trading signals 1 (entry) or -1 (exit) for a long position trading algorithm
# where -1 is when the Close price is less than the BB_LOWER window
# where 1 is when the Close price is greater the the BB_UPPER window
for index, row in signals_df.iterrows():
    if row["close"] < row["BB_LOWER"]:
        signals_df.loc[index, "Bollinger_Bands_Below_Lower_BB"] = 1
    if row["close"] > row["BB_UPPER"]:
        signals_df.loc[index,"Bollinger_Bands_Above_Upper_BB"] = 1

# Generate the trading signal 1 or 0
# where 1 is when the Short window is greater than (or crosses over) the Long Window
# where 0 is when the Short window is under the Long window
signals_df["9EMA/20EMA_Cross, 9EMA>20EMA"][9:] = np.where(
    signals_df["9EMA"][9:] > signals_df["20EMA"][9:], 1.0, 0.0)
# Calculate the points in time at which a position should be taken, 1 or -1, when there is a cross
signals_df["9EMA/20EMA_Cross, 9EMA>20EMA"] = signals_df["9EMA/20EMA_Cross, 9EMA>20EMA"].diff()
signals_df["9EMA/20EMA_Cross, 9EMA<20EMA"] = (signals_df["9EMA/20EMA_Cross, 9EMA>20EMA"]) * -1

# Generate the trading signal 1 or 0,
# where 1 is when the Short window is greater than (or crosses over) the Long Window
# where 0 is when the Short window is under the Long window
signals_df["50EMA/200SMA_Cross, 50EMA>200SMA"][50:] = np.where(
    signals_df["50EMA"][50:] > signals_df["200SMA"][50:], 1.0, 0.0)
# Calculate the points in time at which a position should be taken, 1 or -1, when the 50EMA Crosses the 200SMA
signals_df["50EMA/200SMA_Cross, 50EMA>200SMA"] = signals_df["50EMA/200SMA_Cross, 50EMA>200SMA"].diff()
signals_df["50EMA/200SMA_Cross, 50EMA<200SMA"] = (signals_df["50EMA/200SMA_Cross, 50EMA>200SMA"]) * -1

# WORK IN PROGRESS - WILL ADD SOON
# # Generate the trading signal 1 or 0,
# # where 1 is when the MACD is Increasing
# # where 0 is when the MACD is Decreasing
# n = signals_df["MACD_Rate"].index
# signals_df["MACD_Rate"] = np.where(
#     signals_df["MACD"][n+1] > signals_df["MACD"][n], 1.0, 0.0)
# # Calculate the points in time at which a position should be taken, 1 or -1
# signals_df["MACD_Rate"] = signals_df["MACD_Rate"].diff()

# Generate the trading signals 1 (entry) or -1 (exit) for a long position trading algorithm
# where -1 is when the RSI is below 30
# where 1 is when the RSI is above 70
for index, row in signals_df.iterrows():
    if 30 > row["RSI"]:
        signals_df.loc[index, "RSI_Under_30"] = 1
    if 70 < row["RSI"]:
        signals_df.loc[index,"RSI_Over_70"] = 1

# Generate the trading signal 1 or 0,
# where 1 is when the price is above VWAP
# where 0 is when the price is below VWAP
signals_df["VWAP_Cross_From_Above"] = np.where(
    signals_df["vwap"] <= signals_df["close"], 1.0, 0)
# Calculate the points in time at which a position should be taken, 1 or -1, when price crosses VWAP
signals_df["VWAP_Cross_From_Above"] = signals_df["VWAP_Cross_From_Above"].diff()
signals_df["VWAP_Cross_From_Below"] = (signals_df["VWAP_Cross_From_Above"]) * -1

# Exit is the labeled target for ML, Exit Price is for use in Pnl Metrics
signals_df["Exit Price"] = 0
signals_df["Exit"] = 0

# Review DataFrame
signals_df.head(100)

In [None]:
# here we create the exit column, our "y", for use in supervised ML
# How many rows are in the signals_df? for use in modifying DataFrame
num_rows_in_df = signals_df.shape[0]

# reward:risk ratio
reward = 3
risk = 1

# we also figure out our exit price
# hitting target price before the stop price signals a win and will be 1
# hitting stop price before hitting the target price signals a loss and will be -1
# loop thru the dataframe, from the longest_MA_window to the end (num_rows_in_df) to avoid NaN values
for j in range(longest_MA_window, num_rows_in_df):
    # entries will be on candle close
    entry = signals_df["close"].iloc[j]
    # calculate volatility for each candle
    atr = signals_df["ATR"].iloc[j]
    # stop is entry price minus the average volatility for the entry period
    stop = entry - (risk * atr)
    # target is entry price plus the average volatility for the entry period times a multiplier
    target = entry + (reward * atr)
    # loop again thru the dataset to compare j entry price to future closing prices to see if we hit target or stop
    for k in range(j + 1, num_rows_in_df):
        # current low of the candle
        curr_low = signals_df["low"].iloc[k]
        # current high of the candle
        curr_high = signals_df["high"].iloc[k]
        # record and break if we hit stop or target, if not we check the next k period
        # if current low breaks our stop we should've sold: -1 in our "Exit" column
        if curr_low <= stop:
            signals_df["Exit Price"].iloc[j] = stop
            signals_df["Exit"].iloc[j] = -1
            # if we hit the stop break the inner loop to check the next row
            break
        # if current high breaks our target we should've sold: +1 in our "Exit" column
        elif curr_high >= target:
            signals_df["Exit Price"].iloc[j] = target
            signals_df["Exit"].iloc[j] = 1
            # if we hit the target break the inner loop to check the next row
            break

# drop beginning columns to avoid NaN values from EMA/SMA calculations
signals_df = signals_df[longest_MA_window:]

signals_df

In [None]:
# check if there is potentially-unwanted zeros in the dataframe
signals_df["Exit"].value_counts()

In [None]:
# Remove all unwanted zeros from the exit column
signals_df = signals_df.loc[signals_df["Exit"] != 0]
signals_df["Exit"].value_counts()

In [None]:
for i in discrete_features:
    print(signals_df[i].value_counts())

In [None]:
for i in discrete_features:
    signals_df[i] = signals_df[i].replace(-0, 0)
    signals_df[i] = signals_df[i].replace(-1, 0)
    print(signals_df[i].value_counts())

In [None]:
training_begin = str(signals_df.index.min())
training_end = str(signals_df.index.min() + DateOffset(months=1))

training_begin, training_end

In [None]:
# choose if you want continuous or discrete features
discrete_X = signals_df[discrete_features]

# 1 means a buy would've produced a profit, -1 means a sale would've produced a profit
discrete_y = signals_df["Exit"]
discrete_X.head()

Split the data into training and testing sets.

In [None]:
# Splitting into Train and Test sets
discrete_X_train = discrete_X.loc[training_begin: training_end]
discrete_y_train = discrete_y.loc[training_begin: training_end]

# Generate the X_test and y_test DataFrames
discrete_X_test = discrete_X.loc[training_end:]
discrete_y_test = discrete_y.loc[training_end:]
# X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.5)

In [None]:
discrete_rus = RandomUnderSampler(random_state=1)
undersampled_discrete_X_train, undersampled_discrete_y_train = discrete_rus.fit_resample(discrete_X_train, discrete_y_train)

In [None]:
# choose if you want continuous or discrete features
continuous_X = signals_df[continuous_features]

# 1 means a buy would've produced a profit, -1 means a sale would've produced a profit
continuous_y = signals_df["Exit"]
continuous_X.head()

In [None]:
# Splitting into Train and Test sets
continuous_X_train = continuous_X.loc[training_begin: training_end]
continuous_y_train = continuous_y.loc[training_begin: training_end]

# Generate the X_test and y_test DataFrames
continuous_X_test = continuous_X.loc[training_end:]
continuous_y_test = continuous_y.loc[training_end:]
# X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.5)

In [None]:
# Scale the continuous data
scaler = StandardScaler()
X_scaler = scaler.fit(continuous_X_train)
continuous_X_train_scaled = X_scaler.transform(continuous_X_train)
continuous_X_test_scaled = X_scaler.transform(continuous_X_test)

In [None]:
continuous_rus = RandomUnderSampler(random_state=1)
undersampled_continuous_X_train_scaled, undersampled_continuous_y_train = continuous_rus.fit_resample(continuous_X_train_scaled, continuous_y_train)

In [None]:
### TIMS SVC MODEL STARTS HERE
# Imports
from sklearn import svm



In [None]:
# Create the classifier model
svm_model = svm.SVC()
 
# Fit the model to the data using X_train_scaled and y_train
svm_model = svm_model.fit(undersampled_continuous_X_train_scaled, undersampled_continuous_y_train)

# Use the trained model to predict the trading signals for the training data
training_signal_predictions = svm_model.predict(undersampled_continuous_X_train_scaled)

# Display the sample predictions
training_signal_predictions[:10]

In [None]:
# Evaluate the model using a classification report
svm_training_report = classification_report(undersampled_continuous_y_train, training_signal_predictions)
print(svm_training_report)

In [None]:
 # Use the trained model to predict the trading signals for the testing data.
svm_testing_signal_predictions = svm_model.predict(continuous_X_test_scaled)

In [None]:
 # Evaluate the model's ability to predict the trading signal for the testing data
svm_testing_report = classification_report(continuous_y_test, svm_testing_signal_predictions)
print(svm_testing_report)

In [None]:
#ARANDIS SGD MODEL STARTS HERE
from sklearn.linear_model import SGDClassifier




In [None]:
#Create the classifier model
SGD_model = SGDClassifier(random_state=0)
# Fit the model to the data using X_train_scaled and y_train
SGD_model.fit(undersampled_continuous_X_train_scaled, undersampled_continuous_y_train)

# Use the trained model to predict the trading signals for the training data
SGD_training_predictions = SGD_model.predict(undersampled_continuous_X_train_scaled)
# Use the trained model to predict the trading signals for the testing data.
SGD_testing_signal_predictions = SGD_model.predict(continuous_X_test_scaled)
# Display the sample predictions
SGD_training_predictions[:5]

In [None]:
SGD_training_report = classification_report(undersampled_continuous_y_train, SGD_training_predictions)
print(SGD_training_report)

In [None]:
SGD_testing_report = classification_report(continuous_y_test, SGD_testing_signal_predictions)
print(SGD_testing_report)

In [None]:
# DAVIDS RF MODEL STARTS HERE




In [None]:
# Create the decision tree classifier instance
rf_model = tree.DecisionTreeClassifier(random_state=1)



In [None]:
# Fit the model
rf_model = rf_model.fit(undersampled_discrete_X_train, undersampled_discrete_y_train)


In [None]:
# Making predictions using the testing data
predictions = rf_model.predict(discrete_X_test)
# Use the trained model to predict the trading signals for the training data
rf_training_signal_predictions = rf_model.predict(undersampled_discrete_X_train)
rf_testing_signal_predictions = rf_model.predict(discrete_X_test)


In [None]:
rf_training_report = classification_report(undersampled_discrete_y_train, rf_training_signal_predictions)
print(rf_training_report)

In [None]:
rf_testing_report = classification_report(discrete_y_test, rf_testing_signal_predictions)
print(rf_testing_report)

In [None]:
# Create DOT data
dot_data = tree.export_graphviz(
    rf_model, out_file=None, feature_names=discrete_X.columns, class_names=["1", "-1"], filled=True
)

# Draw graph
graph = pydotplus.graph_from_dot_data(dot_data)

# Show graph
Image(graph.create_png())



In [None]:
# When saving the image, Path() is not used because graph.write_<file_type>() must take a string object

# Saving the tree as PDF
file_path = "transactions_tree.pdf"
graph.write_pdf(file_path)

# Saving the tree as PNG
file_path = "transactions_tree.png"
graph.write_png(file_path)


In [None]:
importances = rf_model.feature_importances_
# List the top 10 most important features
importances_sorted = sorted(zip(rf_model.feature_importances_, discrete_X.columns), reverse=True)
importances_sorted[:10]