In [1]:
# Imports
import pandas as pd
from pathlib import Path
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder

In [2]:
# Read the .csv file from the Resources folder into a Pandas DataFrame
bitcoin_data_df =pd.read_csv(Path('./Resources/Bitcoin_Historical.csv'))

In [3]:
# Review the DataFrame
print(bitcoin_data_df.head())
print(bitcoin_data_df.tail())

           Date     Price      Open      High       Low     Vol. Change %
0  May 13, 2021  49,688.6  49,398.2  51,337.3  46,331.1  269.65K    0.62%
1  May 12, 2021  49,384.2  56,694.5  57,938.5  49,187.0  160.74K  -12.90%
2  May 11, 2021  56,695.7  55,846.1  56,871.1  54,550.4   96.47K    1.52%
3  May 10, 2021  55,848.9  58,251.2  59,523.9  53,678.3  142.61K   -4.10%
4  May 09, 2021  58,238.3  58,840.6  59,227.0  56,414.4  103.59K   -1.02%
              Date    Price     Open     High      Low     Vol. Change %
1589  Jan 05, 2017    989.3  1,135.4  1,150.6    874.5  240.01K  -12.86%
1590  Jan 04, 2017  1,135.4  1,033.3  1,148.5  1,022.3  156.27K    9.88%
1591  Jan 03, 2017  1,033.3  1,017.0  1,035.5  1,006.5   54.79K    1.60%
1592  Jan 02, 2017  1,017.0    995.4  1,031.7    990.2   64.95K    2.17%
1593  Jan 01, 2017    995.4    963.4  1,001.6    956.1   41.15K    3.33%


In [4]:
# Review the data types associated with the columns
bitcoin_data_df.dtypes

Date        object
Price       object
Open        object
High        object
Low         object
Vol.        object
Change %    object
dtype: object

In [5]:
# Drop unnecessary information from the dataframe
bitcoin_data_df = bitcoin_data_df.drop(columns=['Date'])

# Review the DataFrame
bitcoin_data_df

Unnamed: 0,Price,Open,High,Low,Vol.,Change %
0,49688.6,49398.2,51337.3,46331.1,269.65K,0.62%
1,49384.2,56694.5,57938.5,49187.0,160.74K,-12.90%
2,56695.7,55846.1,56871.1,54550.4,96.47K,1.52%
3,55848.9,58251.2,59523.9,53678.3,142.61K,-4.10%
4,58238.3,58840.6,59227.0,56414.4,103.59K,-1.02%
...,...,...,...,...,...,...
1589,989.3,1135.4,1150.6,874.5,240.01K,-12.86%
1590,1135.4,1033.3,1148.5,1022.3,156.27K,9.88%
1591,1033.3,1017.0,1035.5,1006.5,54.79K,1.60%
1592,1017.0,995.4,1031.7,990.2,64.95K,2.17%


In [6]:
# Apply a binary "signal" where positive changes are to labeled as 1, all other to be labeled as 0
bitcoin_data_df['Signal'] = bitcoin_data_df['Change %'].apply(lambda x: '1' if x >= '0' else '0')

# Review the DataFrame
bitcoin_data_df

Unnamed: 0,Price,Open,High,Low,Vol.,Change %,Signal
0,49688.6,49398.2,51337.3,46331.1,269.65K,0.62%,1
1,49384.2,56694.5,57938.5,49187.0,160.74K,-12.90%,0
2,56695.7,55846.1,56871.1,54550.4,96.47K,1.52%,1
3,55848.9,58251.2,59523.9,53678.3,142.61K,-4.10%,0
4,58238.3,58840.6,59227.0,56414.4,103.59K,-1.02%,0
...,...,...,...,...,...,...,...
1589,989.3,1135.4,1150.6,874.5,240.01K,-12.86%,0
1590,1135.4,1033.3,1148.5,1022.3,156.27K,9.88%,1
1591,1033.3,1017.0,1035.5,1006.5,54.79K,1.60%,1
1592,1017.0,995.4,1031.7,990.2,64.95K,2.17%,1


In [7]:
# Review the data types associated with the columns
bitcoin_data_df.dtypes

# Reassign datatype for 
bitcoin_data_df = bitcoin_data_df.astype({'Signal':np.float})

# Review the data types associated with the columns
bitcoin_data_df.dtypes

Price        object
Open         object
High         object
Low          object
Vol.         object
Change %     object
Signal      float64
dtype: object

In [8]:
# Create a list of categorical variables 
categorical_variables = list(bitcoin_data_df.dtypes[bitcoin_data_df.dtypes == "object"].index)

# Display the categorical variables list
categorical_variables


['Price', 'Open', 'High', 'Low', 'Vol.', 'Change %']

In [9]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Encode the categorcal variables using OneHotEncoder
encoded_data = enc.fit_transform(bitcoin_data_df[categorical_variables])

# Create a DataFrame with the encoded variables
encoded_variables_df = pd.DataFrame(
    encoded_data,
    columns= enc.get_feature_names(categorical_variables)
)

# Review the DataFrame
encoded_variables_df

Unnamed: 0,"Price_1,004.0","Price_1,008.2","Price_1,008.3","Price_1,013.0","Price_1,016.1","Price_1,017.0","Price_1,022.6","Price_1,024.7","Price_1,031.8","Price_1,031.9",...,Change %_9.65%,Change %_9.70%,Change %_9.74%,Change %_9.77%,Change %_9.81%,Change %_9.83%,Change %_9.87%,Change %_9.88%,Change %_9.89%,Change %_9.93%
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1589,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1590,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1591,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1592,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
# Add the numerical variables from the original DataFrame to the one-hot encoding DataFrame
numerical_variables_df = bitcoin_data_df.drop(columns=categorical_variables)
encoded_df = pd.concat(
    [
        numerical_variables_df,
        encoded_variables_df
    ],
    axis=1
)

# Review the Dataframe
encoded_df.head()

Unnamed: 0,Signal,"Price_1,004.0","Price_1,008.2","Price_1,008.3","Price_1,013.0","Price_1,016.1","Price_1,017.0","Price_1,022.6","Price_1,024.7","Price_1,031.8",...,Change %_9.65%,Change %_9.70%,Change %_9.74%,Change %_9.77%,Change %_9.81%,Change %_9.83%,Change %_9.87%,Change %_9.88%,Change %_9.89%,Change %_9.93%
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
# Define target variable y
y = encoded_df['Signal']

# Display sample of target data
y[:5]

0    1.0
1    0.0
2    1.0
3    0.0
4    0.0
Name: Signal, dtype: float64

In [12]:
# Define test variable x
X = encoded_df.drop(columns=["Signal"]).copy()

# Display sample of testing data
X.head()

Unnamed: 0,"Price_1,004.0","Price_1,008.2","Price_1,008.3","Price_1,013.0","Price_1,016.1","Price_1,017.0","Price_1,022.6","Price_1,024.7","Price_1,031.8","Price_1,031.9",...,Change %_9.65%,Change %_9.70%,Change %_9.74%,Change %_9.77%,Change %_9.81%,Change %_9.83%,Change %_9.87%,Change %_9.88%,Change %_9.89%,Change %_9.93%
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state = 1)

In [14]:
# Create a StandardScaler instance
X_scaler = StandardScaler()

# Fit the scaler to the features training dataset
X_scaler.fit(X_train)

# Fit the scaler to the features training dataset
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [15]:
# Define the the number of inputs (features) to the model
number_input_features = len(X_train.iloc[0])

# Review the number of features
number_input_features

8774

In [16]:
# Define the number of neurons in the output layer
number_output_neurons = 1

In [17]:
# Define the number of hidden nodes for the first hidden layer
hidden_nodes_layer1 =  (number_input_features +1) // 2

# Review the number hidden nodes in the first layer
hidden_nodes_layer1

4387

In [18]:
# Define the number of hidden nodes for the second hidden layer
hidden_nodes_layer2 =  (hidden_nodes_layer1 +1) //2

# Review the number hidden nodes in the second layer
hidden_nodes_layer2

2194

In [19]:
# Create the Sequential model instance
nn = Sequential()

In [20]:
# Add the first hidden layer
nn.add(Dense(units=hidden_nodes_layer1, input_dim = number_input_features, activation='relu'))

In [21]:
# Add the second hidden layer
nn.add(Dense(units=hidden_nodes_layer2, activation='selu'))

In [22]:
# Add the output layer to the model specifying the number of output neurons and activation function
nn.add(Dense(units=1, activation = 'linear'))

In [23]:
# Display the Sequential model summary
nn.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 4387)              38495925  
_________________________________________________________________
dense_1 (Dense)              (None, 2194)              9627272   
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 2195      
Total params: 48,125,392
Trainable params: 48,125,392
Non-trainable params: 0
_________________________________________________________________


In [24]:
# Compile the Sequential model
nn.compile(loss='binary_crossentropy', optimizer = 'adam', metrics=['accuracy'])

In [25]:
# Fit the model using 100 epochs and the training data
fit_model = nn.fit(X_train_scaled, y_train, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [26]:
# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)

13/13 - 2s - loss: 6.4207 - accuracy: 0.5789


In [27]:
# Display the model loss and accuracy results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Loss: 6.420733451843262, Accuracy: 0.5789473652839661


In [28]:
print("Test Model Results")

# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)

# Display the model loss and accuracy results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Test Model Results
13/13 - 1s - loss: 6.4207 - accuracy: 0.5789
Loss: 6.420733451843262, Accuracy: 0.5789473652839661


In [29]:
### Develop a secondary model for comparison ###

In [30]:
# Define the the number of inputs (features) to the model
number_input_features = len(X_train.iloc[0])

# Review the number of features
number_input_features

8774

In [31]:
# Define the number of neurons in the output layer
number_output_neurons_A1 = 1

In [32]:
# Define the number of hidden nodes for the first hidden layer
hidden_nodes_layer1_A1 = (number_input_features + 1) // 2

# Review the number of hidden nodes in the first layer
hidden_nodes_layer1_A1

4387

In [33]:
# Define the number of hidden nodes for the second hidden layer
hidden_nodes_layer2_A1 = (hidden_nodes_layer1_A1 + 1) // 2

# Review the number of hidden nodes in the second layer
hidden_nodes_layer2_A1

2194

In [34]:
# Define the number of hidden nodes for the third hidden layer
hidden_nodes_layer3_A1 = (hidden_nodes_layer2_A1 + 1) // 2

# Review the number of hidden nodes in the third layer
hidden_nodes_layer3_A1

1097

In [35]:
# Define the number of hidden nodes for the fourth hidden layer
hidden_nodes_layer4_A1 = (hidden_nodes_layer3_A1 + 1) // 2

# Review the number of hidden nodes in the fourth layer
hidden_nodes_layer4_A1

549

In [36]:
# Create the Sequential model instance
nn_A1 = Sequential()

In [37]:
# First hidden layer
nn_A1.add(Dense(units=hidden_nodes_layer1_A1, input_dim=number_input_features, activation="relu"))

# Second hidden layer
nn_A1.add(Dense(units=hidden_nodes_layer2_A1, activation="selu"))

# Third hidden layer
nn_A1.add(Dense(units=hidden_nodes_layer3_A1, activation="gelu"))

# Fourth hidden layer
nn_A1.add(Dense(units=hidden_nodes_layer4_A1, activation="relu"))

# Output layer
nn_A1.add(Dense(units=1, activation="linear"))

# Check the structure of the model
nn_A1.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 4387)              38495925  
_________________________________________________________________
dense_4 (Dense)              (None, 2194)              9627272   
_________________________________________________________________
dense_5 (Dense)              (None, 1097)              2407915   
_________________________________________________________________
dense_6 (Dense)              (None, 549)               602802    
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 550       
Total params: 51,134,464
Trainable params: 51,134,464
Non-trainable params: 0
_________________________________________________________________


In [38]:
# Compile the model
nn_A1.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [39]:
# Fit the model
fit_model_A1 = nn_A1.fit(X_train_scaled, y_train, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [40]:
print("Alternative Model 1 Results")

# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn_A1.evaluate(X_test_scaled, y_test, verbose=2)

# Display the model loss and accuracy results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Alternative Model 1 Results
13/13 - 1s - loss: 6.4207 - accuracy: 0.5789
Loss: 6.420733451843262, Accuracy: 0.5789473652839661
