In [1]:
# Imports
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
from imblearn.metrics import classification_report_imbalanced
import keras.metrics
import keras.optimizers
from tensorflow import keras

In [2]:
# Import the MACD dataset into a Pandas Dataframe
macd_df = pd.read_csv(
    Path("./Resources/macd_new.csv"),
    index_col = 'Date',
    infer_datetime_format=True, 
    parse_dates=True,
)

# Create a variable to allow line skips inside of an f-string
n='\n'

# Drop columns that are irrelevant to machine learning
macd_df.drop(columns=['Portfolio Holdings','Portfolio Cash','Portfolio Total','Portfolio Period Returns','Portfolio Cumulative Returns','value'], inplace=True)
  
# Review the macd_df DataFrame, data types, and number of columns
display(macd_df.head(),macd_df.dtypes,len(macd_df.columns))

Unnamed: 0_level_0,Close,Open,High,Volume,Low,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,hist_adj,Signal,Entry/Exit,Position,Entry/Exit Position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2021-04-19 15:30:00-04:00,415.23999,414.325012,415.339996,14176944,414.140015,0.621316,-0.520959,1.142274,0.071674,1.0,1.0,1.0,1.0
2021-04-20 09:30:00-04:00,413.920013,413.910004,414.679993,11437142,413.660004,0.463053,-0.543377,1.00643,-0.022418,0.0,-1.0,0.0,-1.0
2021-04-20 10:30:00-04:00,411.575012,413.92099,413.929993,17431474,411.119995,0.146716,-0.687771,0.834487,-0.144395,0.0,0.0,0.0,0.0
2021-04-20 11:30:00-04:00,412.144989,411.575012,412.399994,10103321,410.619995,-0.05733,-0.713454,0.656124,-0.025683,0.0,0.0,0.0,0.0
2021-04-20 12:30:00-04:00,411.290009,412.149994,412.220001,5748608,411.25,-0.284746,-0.752696,0.46795,-0.039242,0.0,0.0,0.0,0.0


Close                  float64
Open                   float64
High                   float64
Volume                   int64
Low                    float64
MACD_12_26_9           float64
MACDh_12_26_9          float64
MACDs_12_26_9          float64
hist_adj               float64
Signal                 float64
Entry/Exit             float64
Position               float64
Entry/Exit Position    float64
dtype: object

13

In [3]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)
    # sparse = False, results in an array
    # sparse = True (default), results in a sparse matrix
    
# Create a list of categorical variables
categorical_variables = list(macd_df[['Entry/Exit']])
    # set categorical_variables to whatever column you'd like OneHotEncoded
# Display the categorical variables list
display(categorical_variables[0:5])

# Encode the categorcal variables using OneHotEncoder
encoded_data = enc.fit_transform(macd_df[categorical_variables])

# Create a DataFrame with the encoded variables
encoded_df = pd.DataFrame(
    encoded_data,
    columns = enc.get_feature_names_out(categorical_variables)
        # function gathers column names and assigns them to the new DataFrame
)

# Set encoded_df's index to macd_df's index
encoded_df.set_index(macd_df.index, inplace=True)

# Review encoded_data array and the encoded_df DataFrame
display(encoded_data[0:5],encoded_df.head())

['Entry/Exit']

array([[0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.]])

Unnamed: 0_level_0,Entry/Exit_-1.0,Entry/Exit_0.0,Entry/Exit_1.0
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-04-19 15:30:00-04:00,0.0,0.0,1.0
2021-04-20 09:30:00-04:00,1.0,0.0,0.0
2021-04-20 10:30:00-04:00,0.0,1.0,0.0
2021-04-20 11:30:00-04:00,0.0,1.0,0.0
2021-04-20 12:30:00-04:00,0.0,1.0,0.0


In [4]:
# Set the numerical variable DataFrame
side_numeric = macd_df.drop(columns=['Entry/Exit'])

# Add the numerical variables from the original DataFrame to the one-hot encoding DataFrame
macd_ohe_df = pd.concat([encoded_df,side_numeric],axis=1)

# Review the side_numeric data, macd_ohe DF, and macd_ohe data types
display(side_numeric.head(),macd_ohe_df.head(),macd_ohe_df.dtypes)

Unnamed: 0_level_0,Close,Open,High,Volume,Low,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,hist_adj,Signal,Position,Entry/Exit Position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2021-04-19 15:30:00-04:00,415.23999,414.325012,415.339996,14176944,414.140015,0.621316,-0.520959,1.142274,0.071674,1.0,1.0,1.0
2021-04-20 09:30:00-04:00,413.920013,413.910004,414.679993,11437142,413.660004,0.463053,-0.543377,1.00643,-0.022418,0.0,0.0,-1.0
2021-04-20 10:30:00-04:00,411.575012,413.92099,413.929993,17431474,411.119995,0.146716,-0.687771,0.834487,-0.144395,0.0,0.0,0.0
2021-04-20 11:30:00-04:00,412.144989,411.575012,412.399994,10103321,410.619995,-0.05733,-0.713454,0.656124,-0.025683,0.0,0.0,0.0
2021-04-20 12:30:00-04:00,411.290009,412.149994,412.220001,5748608,411.25,-0.284746,-0.752696,0.46795,-0.039242,0.0,0.0,0.0


Unnamed: 0_level_0,Entry/Exit_-1.0,Entry/Exit_0.0,Entry/Exit_1.0,Close,Open,High,Volume,Low,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,hist_adj,Signal,Position,Entry/Exit Position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2021-04-19 15:30:00-04:00,0.0,0.0,1.0,415.23999,414.325012,415.339996,14176944,414.140015,0.621316,-0.520959,1.142274,0.071674,1.0,1.0,1.0
2021-04-20 09:30:00-04:00,1.0,0.0,0.0,413.920013,413.910004,414.679993,11437142,413.660004,0.463053,-0.543377,1.00643,-0.022418,0.0,0.0,-1.0
2021-04-20 10:30:00-04:00,0.0,1.0,0.0,411.575012,413.92099,413.929993,17431474,411.119995,0.146716,-0.687771,0.834487,-0.144395,0.0,0.0,0.0
2021-04-20 11:30:00-04:00,0.0,1.0,0.0,412.144989,411.575012,412.399994,10103321,410.619995,-0.05733,-0.713454,0.656124,-0.025683,0.0,0.0,0.0
2021-04-20 12:30:00-04:00,0.0,1.0,0.0,411.290009,412.149994,412.220001,5748608,411.25,-0.284746,-0.752696,0.46795,-0.039242,0.0,0.0,0.0


Entry/Exit_-1.0        float64
Entry/Exit_0.0         float64
Entry/Exit_1.0         float64
Close                  float64
Open                   float64
High                   float64
Volume                   int64
Low                    float64
MACD_12_26_9           float64
MACDh_12_26_9          float64
MACDs_12_26_9          float64
hist_adj               float64
Signal                 float64
Position               float64
Entry/Exit Position    float64
dtype: object

In [5]:
# Create the features set selecting all features besides the target set and assign it to X
X = macd_ohe_df.drop(columns=['Entry/Exit_-1.0','Entry/Exit_0.0','Entry/Exit_1.0'])

# Create the target set selecting the Signal column and assiging it to y
# We have three targets due to multi-class classification and OneHotEncoder 
y = macd_ohe_df[['Entry/Exit_-1.0','Entry/Exit_0.0','Entry/Exit_1.0']]

# Use train_test_split to create our X and y training and testing variables
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 1)
    # test_size=x
    
# Select the start of the training period
training_begin = X.index.min() + DateOffset(hours=1)

# Select the ending period for the training data with an offset of 3 months
training_end = X.index.min() + DateOffset(months=3)
    # Keep training less than 50% of total DataFrame

# Display the training begin date
print(training_begin)
print(training_end)

2021-04-19 16:30:00-04:00
2021-07-19 15:30:00-04:00


In [6]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end+DateOffset(hours=1):]
y_test = y.loc[training_end+DateOffset(hours=1):]

# Review the X_train DataFrame
X_train.head()

Unnamed: 0_level_0,Close,Open,High,Volume,Low,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,hist_adj,Signal,Position,Entry/Exit Position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2021-04-20 09:30:00-04:00,413.920013,413.910004,414.679993,11437142,413.660004,0.463053,-0.543377,1.00643,-0.022418,0.0,0.0,-1.0
2021-04-20 10:30:00-04:00,411.575012,413.92099,413.929993,17431474,411.119995,0.146716,-0.687771,0.834487,-0.144395,0.0,0.0,0.0
2021-04-20 11:30:00-04:00,412.144989,411.575012,412.399994,10103321,410.619995,-0.05733,-0.713454,0.656124,-0.025683,0.0,0.0,0.0
2021-04-20 12:30:00-04:00,411.290009,412.149994,412.220001,5748608,411.25,-0.284746,-0.752696,0.46795,-0.039242,0.0,0.0,0.0
2021-04-20 13:30:00-04:00,411.535004,411.309692,412.100006,5744918,411.200012,-0.440132,-0.726465,0.286333,0.02623,1.0,1.0,1.0


In [7]:
# Scale the features DataFrame
# Create a StandardScaler instance
scaler = StandardScaler()

# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)

# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [8]:
# Define the the number of inputs (features) to the model
number_input_features = len(X_train.iloc[0])
    # input features should equal the number of features in X

# Define the number of neurons in the output layer
number_output_neurons = len(y.columns)
    # we use 3 output neurons because we have three targets, Entry/Exit_-1.0, Entry/Exit_0.0, and Entry/Exit_1.0.
    # should be equal to the number of target columns (len(y.columns)) we are trying to predict.

# Define the number of hidden nodes for all hidden layers
hidden_nodes_layer1 = 7
hidden_nodes_layer2 = 5

# Review the number of input features, output neurons, and hidden nodes
print(f'Number of input features:{n}{number_input_features}{n}Number of output neurons:{n}{number_output_neurons}{n}' 
    f'Hidden layer Neuron #:{n}First Layer: {hidden_nodes_layer1}{n}Second Layer: {hidden_nodes_layer2}')

Number of input features:
12
Number of output neurons:
3
Hidden layer Neuron #:
First Layer: 7
Second Layer: 5


In [9]:
# Create the Sequential model instance
nn = Sequential()

# Add the first hidden layer
nn.add(Dense(units=hidden_nodes_layer1,input_dim=number_input_features,activation='relu'))

# Add the second hidden layer
nn.add(Dense(units=hidden_nodes_layer2,activation='relu'))

# Add the output layer to the model specifying the number of output neurons and activation function
nn.add(Dense(units=number_output_neurons,activation='softmax'))
    # choice is between softmax and sigmoid

# Display the Sequential model summary
nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 7)                 91        
                                                                 
 dense_1 (Dense)             (None, 5)                 40        
                                                                 
 dense_2 (Dense)             (None, 3)                 18        
                                                                 
Total params: 149
Trainable params: 149
Non-trainable params: 0
_________________________________________________________________


In [10]:
# Compile the Sequential model
nn.compile(loss=keras.losses.BinaryCrossentropy(), optimizer=keras.optimizers.Adam(), metrics=[keras.metrics.CategoricalAccuracy()])
    # loss options: keras.losses.CategoricalCrossentropy(), keras.losses.BinaryCrossentropy()

# Fit the model using X epochs and the training data
nn.fit(X_train_scaled,y_train,epochs=500)
    # make sure to use X_train_scaled rather than X_train
    # verbose=3, reduces the graphics displayed per epoch. in turn this increases the overall speed of the epochs.

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

<keras.callbacks.History at 0x23ffe4d3b88>

In [11]:
# Evaluate the model's keras.metrics.CategoricalAccuracy() metrics using the evaluate method and the test data
nn_macd_keras = nn.evaluate(X_test_scaled,y_test)
    # is there a way to see the metrics of each output neuron individually?



In [12]:
# Clean the original macd_df for log reg and random forest.
# Create the features set selecting all features besides the target set and assign it to X
X = macd_df.drop(columns=['Entry/Exit'])

# Create the target set selecting the Signal column (target) and assiging it to y
y = macd_df['Entry/Exit']

# Use train_test_split to create our X and y training and testing variables
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 1)
    # test_size=
    
# Select the start of the training period
training_begin = X.index.min() + DateOffset(hours=1)

# Select the ending period for the training data with an offset of 3 months
training_end = X.index.min() + DateOffset(months=3)
    # Keep training less than 50% of total DataFrame

# Display the training beginning and end date
print(training_begin)
print(training_end)

2021-04-19 16:30:00-04:00
2021-07-19 15:30:00-04:00


In [13]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end+DateOffset(hours=1):]
y_test = y.loc[training_end+DateOffset(hours=1):]

# Review the X_train and X_test DataFrames
display(X_train.head())
display(X_test.head())

Unnamed: 0_level_0,Close,Open,High,Volume,Low,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,hist_adj,Signal,Position,Entry/Exit Position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2021-04-20 09:30:00-04:00,413.920013,413.910004,414.679993,11437142,413.660004,0.463053,-0.543377,1.00643,-0.022418,0.0,0.0,-1.0
2021-04-20 10:30:00-04:00,411.575012,413.92099,413.929993,17431474,411.119995,0.146716,-0.687771,0.834487,-0.144395,0.0,0.0,0.0
2021-04-20 11:30:00-04:00,412.144989,411.575012,412.399994,10103321,410.619995,-0.05733,-0.713454,0.656124,-0.025683,0.0,0.0,0.0
2021-04-20 12:30:00-04:00,411.290009,412.149994,412.220001,5748608,411.25,-0.284746,-0.752696,0.46795,-0.039242,0.0,0.0,0.0
2021-04-20 13:30:00-04:00,411.535004,411.309692,412.100006,5744918,411.200012,-0.440132,-0.726465,0.286333,0.02623,1.0,1.0,1.0


Unnamed: 0_level_0,Close,Open,High,Volume,Low,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,hist_adj,Signal,Position,Entry/Exit Position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2021-07-20 09:30:00-04:00,429.019989,425.679993,429.470001,22332446,424.829987,-2.727512,-0.308362,-2.41915,0.506535,1.0,1.0,0.0
2021-07-20 10:30:00-04:00,429.920013,429.019989,430.559998,11728727,428.809998,-2.288172,0.104782,-2.392954,0.413144,1.0,1.0,0.0
2021-07-20 11:30:00-04:00,431.220001,429.920013,431.440002,9536033,429.839996,-1.814182,0.463018,-2.2772,0.358236,1.0,1.0,0.0
2021-07-20 12:30:00-04:00,430.959991,431.220001,432.079987,9371889,430.839996,-1.442889,0.667449,-2.110338,0.204431,1.0,1.0,0.0
2021-07-20 13:30:00-04:00,431.799988,430.959991,431.829987,7831322,430.73999,-1.068539,0.833439,-1.901978,0.165991,1.0,1.0,0.0


In [14]:
# Scale the features DataFrames
# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)

## Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [15]:
# Assign X_train_scaled data to X_train's columns
X_train_scaled_col = pd.DataFrame(X_train_scaled, columns=X_train.columns)

In [16]:
# Instantiate RandomForestClassifier classifier model instance
macd_tree = RandomForestClassifier(random_state=1)
 
# Fit the model to the data using the training data
macd_tree.fit(X_train_scaled_col,y_train)
 
# Use the testing data to make the model predictions
y_macd_tree_pred = macd_tree.predict(X_test)

# Create and save confusion matrix and classification report to a variable name
macd_tree_class = classification_report(y_test,y_macd_tree_pred)
macd_tree_matrix = confusion_matrix(y_test,y_macd_tree_pred)

In [17]:
# Instantiate the random oversampler model
random_sampler = RandomOverSampler(random_state=1)

# Fit the original training data to the random_oversampler model
X_resampled, y_resampled = random_sampler.fit_resample(X_train,y_train)

In [18]:
# Instantiate oversampled RandomForestClassifier classifier model instance
macd_tree_os = RandomForestClassifier(random_state=1)
 
# Fit the model to the data using the training data
macd_tree_os.fit(X_resampled,y_resampled)
 
# Use the testing data to make the model predictions
y_macd_tree_pred_os = macd_tree_os.predict(X_test)

# Create and save confusion matrix and classification report to a variable name
macd_tree_class_os = classification_report_imbalanced(y_test,y_macd_tree_pred_os)
macd_tree_matrix_os = confusion_matrix(y_test,y_macd_tree_pred_os)

In [19]:
# Instantiate LogisticRegression classifier model instance
macd_log = LogisticRegression(random_state=1)
 
# Fit the model to the data using the training data
macd_log.fit(X_train_scaled,y_train)
 
# Use the testing data to make the model predictions
y_macd_log_pred = macd_log.predict(X_test_scaled)

# Create and save confusion matrix and classification report to a variable name
macd_log_matrix = confusion_matrix(y_test,y_macd_log_pred)
macd_log_class = classification_report(y_test,y_macd_log_pred)

In [20]:
# Import the BTC dataset into a Pandas Dataframe
btc_df = pd.read_csv(
    Path("./Resources/bitcoin_new.csv"),
    index_col = 'Date',
    infer_datetime_format=True, 
    parse_dates=True
)

# Drop columns that are irrelevant to machine learning
btc_df.drop(columns=['value','Portfolio Cumulative Returns','Portfolio Period Returns','Portfolio Total','Portfolio Cash','Portfolio Holdings'], inplace=True)

# Review the btc_df DataFrame, data types, and number of columns
display(btc_df.head(),btc_df.dtypes,len(btc_df.columns))

Unnamed: 0_level_0,Close,Open,High,Volume,Low,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,hist_adj,Signal,Entry/Exit,Position,Entry/Exit Position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2021-04-16 03:00:00+00:00,63060.675781,62897.472656,63242.539062,645509120,62867.0,76.206577,3.677745,72.528833,-14.900307,0.0,0.0,0.0,0.0
2021-04-16 04:00:00+00:00,61900.785156,63024.421875,63024.421875,137613312,61900.785156,-28.142434,-80.537014,52.394579,-84.214758,0.0,0.0,0.0,0.0
2021-04-16 05:00:00+00:00,61948.710938,61817.359375,62318.277344,1772748800,61695.523438,-105.753508,-126.51847,20.764962,-45.981456,0.0,0.0,0.0,0.0
2021-04-16 06:00:00+00:00,61562.84375,61943.515625,61943.515625,1777618944,61289.355469,-196.136164,-173.520901,-22.615263,-47.002431,0.0,0.0,0.0,0.0
2021-04-16 07:00:00+00:00,61626.21875,61558.09375,61724.355469,550719488,61278.59375,-259.658061,-189.634238,-70.023823,-16.113337,0.0,0.0,0.0,0.0


Close                  float64
Open                   float64
High                   float64
Volume                   int64
Low                    float64
MACD_12_26_9           float64
MACDh_12_26_9          float64
MACDs_12_26_9          float64
hist_adj               float64
Signal                 float64
Entry/Exit             float64
Position               float64
Entry/Exit Position    float64
dtype: object

13

In [21]:
# Encode the categorcal variables using OneHotEncoder
encoded_data = enc.fit_transform(btc_df[categorical_variables])

# Create a list of categorical variables
# We are replicating the encoding of the macd_df DataFrame used for the Neural Network
categorical_variables = list(btc_df[['Entry/Exit']])

# Create a DataFrame with the encoded variables
encoded_df = pd.DataFrame(
    encoded_data,
    columns = enc.get_feature_names_out(categorical_variables)
        # function gathers column names and assigns them to the new DataFrame
)

# Set encoded_df's index to btc_df's index
encoded_df.set_index(btc_df.index, inplace=True)

# Review encoded_data array and the encoded_df DataFrame
display(encoded_data[0:5],encoded_df.head())

array([[0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.]])

Unnamed: 0_level_0,Entry/Exit_-1.0,Entry/Exit_0.0,Entry/Exit_1.0
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-04-16 03:00:00+00:00,0.0,1.0,0.0
2021-04-16 04:00:00+00:00,0.0,1.0,0.0
2021-04-16 05:00:00+00:00,0.0,1.0,0.0
2021-04-16 06:00:00+00:00,0.0,1.0,0.0
2021-04-16 07:00:00+00:00,0.0,1.0,0.0


In [22]:
# Set the numerical variable DataFrame
side_numeric = btc_df.drop(columns=['Entry/Exit'])

# Add the numerical variables from the original DataFrame to the one-hot encoding DataFrame
btc_ohe_df = pd.concat([encoded_df,side_numeric],axis=1)

# Review the side_numeric data, btc_ohe_df DF, and btc_ohe_df data types
display(side_numeric.head(),btc_ohe_df.head(),btc_ohe_df.dtypes)

Unnamed: 0_level_0,Close,Open,High,Volume,Low,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,hist_adj,Signal,Position,Entry/Exit Position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2021-04-16 03:00:00+00:00,63060.675781,62897.472656,63242.539062,645509120,62867.0,76.206577,3.677745,72.528833,-14.900307,0.0,0.0,0.0
2021-04-16 04:00:00+00:00,61900.785156,63024.421875,63024.421875,137613312,61900.785156,-28.142434,-80.537014,52.394579,-84.214758,0.0,0.0,0.0
2021-04-16 05:00:00+00:00,61948.710938,61817.359375,62318.277344,1772748800,61695.523438,-105.753508,-126.51847,20.764962,-45.981456,0.0,0.0,0.0
2021-04-16 06:00:00+00:00,61562.84375,61943.515625,61943.515625,1777618944,61289.355469,-196.136164,-173.520901,-22.615263,-47.002431,0.0,0.0,0.0
2021-04-16 07:00:00+00:00,61626.21875,61558.09375,61724.355469,550719488,61278.59375,-259.658061,-189.634238,-70.023823,-16.113337,0.0,0.0,0.0


Unnamed: 0_level_0,Entry/Exit_-1.0,Entry/Exit_0.0,Entry/Exit_1.0,Close,Open,High,Volume,Low,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,hist_adj,Signal,Position,Entry/Exit Position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2021-04-16 03:00:00+00:00,0.0,1.0,0.0,63060.675781,62897.472656,63242.539062,645509120,62867.0,76.206577,3.677745,72.528833,-14.900307,0.0,0.0,0.0
2021-04-16 04:00:00+00:00,0.0,1.0,0.0,61900.785156,63024.421875,63024.421875,137613312,61900.785156,-28.142434,-80.537014,52.394579,-84.214758,0.0,0.0,0.0
2021-04-16 05:00:00+00:00,0.0,1.0,0.0,61948.710938,61817.359375,62318.277344,1772748800,61695.523438,-105.753508,-126.51847,20.764962,-45.981456,0.0,0.0,0.0
2021-04-16 06:00:00+00:00,0.0,1.0,0.0,61562.84375,61943.515625,61943.515625,1777618944,61289.355469,-196.136164,-173.520901,-22.615263,-47.002431,0.0,0.0,0.0
2021-04-16 07:00:00+00:00,0.0,1.0,0.0,61626.21875,61558.09375,61724.355469,550719488,61278.59375,-259.658061,-189.634238,-70.023823,-16.113337,0.0,0.0,0.0


Entry/Exit_-1.0        float64
Entry/Exit_0.0         float64
Entry/Exit_1.0         float64
Close                  float64
Open                   float64
High                   float64
Volume                   int64
Low                    float64
MACD_12_26_9           float64
MACDh_12_26_9          float64
MACDs_12_26_9          float64
hist_adj               float64
Signal                 float64
Position               float64
Entry/Exit Position    float64
dtype: object

In [23]:
# Create the features set selecting all features besides the target set and assign it to X
X_test = btc_ohe_df.drop(columns=['Entry/Exit_-1.0','Entry/Exit_0.0','Entry/Exit_1.0'])

# Create the target set selecting the Signal column and assiging it to y
# We have three targets due to multi-class classification and OneHotEncoder 
y_test = btc_ohe_df[['Entry/Exit_-1.0','Entry/Exit_0.0','Entry/Exit_1.0']]

In [24]:
# Apply the scaler model to fit_transform the X-test data
# Transform the X_test DataFrame using the X_scaler
X_scaled = X_scaler.fit_transform(X_test)

In [25]:
# predict the new data using tthe macd nn
y_btc_pred = nn.predict(X_scaled)

# turn y_btc_pred into a Dataframe then save as csv
y_btc_df = pd.DataFrame(y_btc_pred,columns=['Sell','Hold','Buy'])
y_btc_df.to_csv('Resources/btc_nn_signal.csv')

In [26]:
# Evaluate the btc neural network's keras.metrics.CategoricalAccuracy() metrics using the evaluate method and the test data
nn_macd_btc = nn.evaluate(X_scaled,y_test)



In [27]:
# Display all results found in notebook
a = '                 '

print(f'NN -- BTC -- macd:{n}Loss:{a}Categorical_accuracy:{n}{nn_macd_btc}{n}{n}NN -- macd:{n}Loss:{a}Categorical_accuracy:{n}{nn_macd_keras}{n}Number of input features:{n}{number_input_features}{n}'
    f'Number of output neurons:{n}{number_output_neurons}{n}Hidden layer Neuron #:{n}First Layer: {hidden_nodes_layer1}{n}Second Layer: {hidden_nodes_layer2}{n}LogReg:{n}{macd_log_class}{n}'
    f'{macd_log_matrix}{n}Random Forest:{n}{macd_tree_class}{n}{macd_tree_matrix}{n}Random Forest Oversampled:{n}{macd_tree_class_os}{n}{macd_tree_matrix_os}')

NN -- BTC -- macd:
Loss:                 Categorical_accuracy:
[0.00036099759745411575, 0.9996464848518372]

NN -- macd:
Loss:                 Categorical_accuracy:
[0.00042263854993507266, 1.0]
Number of input features:
12
Number of output neurons:
3
Hidden layer Neuron #:
First Layer: 7
Second Layer: 5
LogReg:
              precision    recall  f1-score   support

        -1.0       1.00      1.00      1.00       138
         0.0       1.00      1.00      1.00      1024
         1.0       1.00      1.00      1.00       138

    accuracy                           1.00      1300
   macro avg       1.00      1.00      1.00      1300
weighted avg       1.00      1.00      1.00      1300

[[ 138    0    0]
 [   0 1024    0]
 [   0    0  138]]
Random Forest:
              precision    recall  f1-score   support

        -1.0       1.00      1.00      1.00       138
         0.0       1.00      1.00      1.00      1024
         1.0       1.00      1.00      1.00       138

    accuracy     