In [None]:
# Imports
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
from imblearn.metrics import classification_report_imbalanced
import keras.metrics

In [None]:
# Import the BITCOIN csv into the notebook
btc_df = pd.read_csv(
    Path("./Resources/bitcoin.csv"),
    index_col = 'Date',
    infer_datetime_format=True, 
    parse_dates=True
)

# Review the DataFrame
btc_df.head()

In [None]:
btc_df.dtypes

In [None]:
# Create a list of categorical variables 
#categorical_variables = list(stoch_df.dtypes[stoch_df.dtypes == "object"].index)
categorical_variables = list(btc_df[['custom_signal']])
# Display the categorical variables list
display(categorical_variables[0:6])

In [None]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)
    # sparse = False, results in an array
    # sparse = True (default), results in a sparse matrix

In [None]:
# Encode the categorcal variables using OneHotEncoder
encoded_data = enc.fit_transform(btc_df[categorical_variables])
encoded_data[0:1]

In [None]:
# Create a DataFrame with the encoded variables
encoded_df = pd.DataFrame(
    encoded_data,
    columns = enc.get_feature_names(categorical_variables)
        # function gathers column names and assigns them to the new DataFrame
)

# set index of encoded_df
encoded_df.set_index(btc_df.index, inplace=True)

# Review the DataFrame
encoded_df.head()

In [None]:
#encoded_df.drop(columns=['Stoch_Entry/Exit_nan','MACD_Entry/Exit_nan'],inplace=True)

# Review the DataFrame
#encoded_df.head()

In [None]:
# Add the numerical variables from the original DataFrame to the one-hot encoding DataFrame
side_numeric = btc_df.drop(columns=['custom_signal'])

# Review the DataFrame
side_numeric.head()

In [None]:
btc_ohe_df = pd.concat([encoded_df,side_numeric],axis=1)

# Review the number of columns
len(btc_ohe_df.columns)

In [None]:
X = btc_ohe_df.drop(columns=['custom_signal_-2','custom_signal_0','custom_signal_2'])

# Review the number of columns
len(X.columns)

In [None]:
# Create the target set selecting the Signal column and assiging it to y
y = btc_ohe_df[['custom_signal_-2','custom_signal_0','custom_signal_2']]

# Review the number of columns
len(y.columns)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 1)
    # test_size=x
# Review the DataFrame
X_test.head()

In [None]:
# Select the start of the training period
training_begin = X.index.min() + DateOffset(hours=1)

# Display the training begin date
print(training_begin)

In [None]:
# Select the ending period for the training data with an offset of 3 months
training_end = X.index.min() + DateOffset(months=3)
    # Keep training less than 50% of total DataFrame

# Display the training end date
print(training_end)

In [None]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Review the X_train DataFrame
X_train.head()

In [None]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end+DateOffset(hours=1):]
y_test = y.loc[training_end+DateOffset(hours=1):]

# Review the X_test DataFrame
X_test.head()

In [None]:
# Scale the features DataFrames
# Create a StandardScaler instance
scaler = StandardScaler()

# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)

## Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# MACD Neural Network
# Define the the number of inputs (features) to the model
number_input_features = len(X_train.iloc[0])

# Review the number of features
number_input_features

In [None]:
# Define the number of neurons in the output layer
number_output_neurons = 3
    # equal to one because we only have one target, y.
    # should be equal to the number of target columns we are trying to predict.

In [None]:
n = '\n'

# Define the number of hidden nodes for the first hidden layer and second layer
hidden_nodes_layer1 =  (number_input_features + number_output_neurons) //2
hidden_nodes_layer2 = (hidden_nodes_layer1 + number_output_neurons) //2
hidden_nodes_layer3 = (hidden_nodes_layer2 + number_output_neurons) //2 
hidden_nodes_layer4 = (hidden_nodes_layer3 + number_output_neurons) //2 
hidden_nodes_layer5 = (hidden_nodes_layer4 + number_output_neurons) //2 

# Review the number hidden nodes in the first and second layer
print(f'# of neurons in the first hidden layer: {hidden_nodes_layer1}{n}# of neurons in the second hidden layer: {hidden_nodes_layer2}{n}# of neurons in the third hidden layer: {hidden_nodes_layer3}'
    f'{n}# of neurons in the fourth hidden layer: {hidden_nodes_layer4}{n}# of neurons in the fifth hidden layer: {hidden_nodes_layer5}{n}')

In [None]:
# Create the Sequential model instance
nn = Sequential()

In [None]:
# Add the first hidden layer
nn.add(Dense(units=hidden_nodes_layer1,input_dim=number_input_features,activation='relu'))

In [None]:
# Add the second hidden layer
nn.add(Dense(units=hidden_nodes_layer2,activation='relu'))

In [None]:
# Add the third hidden layer
nn.add(Dense(units=hidden_nodes_layer3,activation='relu'))

In [None]:
# Add the fourth hidden layer
#nn.add(Dense(units=hidden_nodes_layer4,activation='relu'))

In [None]:
# Add the fifth hidden layer
#nn.add(Dense(units=hidden_nodes_layer5,activation='relu'))

In [None]:
# Add the output layer to the model specifying the number of output neurons and activation function
nn.add(Dense(units=number_output_neurons,activation='softmax'))

In [None]:
# Display the Sequential model summary
nn.summary()

In [None]:
# Compile the Sequential model
nn.compile(loss=keras.losses.CategoricalCrossentropy(), optimizer='adam', metrics=[keras.metrics.CategoricalAccuracy()])

    # metric exploration

In [None]:
# Fit the model using 50 epochs and the training data
nn.fit(X_train_scaled,y_train,epochs=100, verbose=3)
    # make sure to use X_train_scaled rather than X_train
    # verbose=3, reduces the graphics displayed per epoch. in turn this increases the overall speed of the epochs.

In [None]:
# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
nn_btc_keras= nn.evaluate(X_test_scaled,y_test, verbose=3)

# Display the model loss and accuracy results
abc = print(f"KERAS: {nn_btc_keras}")

In [None]:
# Loss: 0.2134855091571808, Accuracy: 0.9853846430778503
# # of neurons in the first hidden layer: 14
# of neurons in the second hidden layer: 8
# of neurons in the third hidden layer: 5
# of neurons in the fourth hidden layer: 4
# of neurons in the fifth hidden layer: 3
# relu activations functions with softmax for the output layer
# loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy, mse']
# 3 output neurons

In [None]:
btc_df.head()

In [None]:
btc_df.dtypes

In [None]:
categorical_variables = list(btc_df[['MACD_Signal','RSI_Signal','Stoch_Signal','MACD_Entry/Exit','Stoch_Entry/Exit']])
categorical_variables

In [None]:
# Encode the categorcal variables using OneHotEncoder
encoded_data = enc.fit_transform(btc_df[categorical_variables])
encoded_data[0:5]

In [None]:
# Create a DataFrame with the encoded variables
encoded_df = pd.DataFrame(
    encoded_data,
    columns = enc.get_feature_names(categorical_variables)
        # function gathers column names and assigns them to the new DataFrame
)

# set index of encoded_df
encoded_df.set_index(btc_df.index, inplace=True)

# Review the DataFrame
encoded_df.head()

In [None]:
# Add the numerical variables from the original DataFrame to the one-hot encoding DataFrame
#side_numeric = btc_df.drop(columns=['MACD_Signal_-1.0','MACD_Signal_1.0','RSI_Signal_-1.0','RSI_Signal_0.0','RSI_Signal_1.0','Stoch_Signal_-1.0','Stoch_Signal_1.0','','','','',''])
#side_numeric.head()

In [None]:
#btc_df = pd.concat([encoded_df,side_numeric],axis=1)
#btc_df.head()

In [None]:
btc_df

In [None]:
X = btc_df.drop(columns=['custom_signal'])

X.head()

In [None]:
# Create the target set selecting the Signal column and assiging it to y
y = btc_df['custom_signal']

y

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 1)
    # test_size=x
len(X_test)

In [None]:
y_train.value_counts()

In [None]:
# Select the start of the training period
training_begin = X.index.min() + DateOffset(hours=1)

# Display the training begin date
print(training_begin)

In [None]:
# Select the ending period for the training data with an offset of 3 months
training_end = X.index.min() + DateOffset(months=3)
    # Keep training less than 50% of total DataFrame

# Display the training end date
print(training_end)

In [None]:
# # Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Review the X_train DataFrame
X_train.head()

In [None]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end+DateOffset(hours=1):]
y_test = y.loc[training_end+DateOffset(hours=1):]

# Review the X_test DataFrame
display(X_test.head())
display(X_test.tail())
#     # NOT SURE IF DATEOFFSET IS NECESSARY FOR X/Y TEST

In [None]:
# Scale the features DataFrames
# Create a StandardScaler instance

# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)

# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# From SVM, instantiate SVC classifier model instance
btc_tree = RandomForestClassifier(random_state=1)
 
# Fit the model to the data using the training data
btc_tree.fit(X_train_scaled,y_train)
 
# Use the testing data to make the model predictions
y_btc_tree_pred = btc_tree.predict(X_test_scaled)

btc_tree_class = classification_report(y_test,y_btc_tree_pred)
btc_tree_matrix = confusion_matrix(y_test,y_btc_tree_pred)

In [None]:
# Instantiate the random oversampler model

random_sampler = RandomOverSampler(random_state=1)

# Fit the original training data to the random_oversampler model
X_resampled, y_resampled = random_sampler.fit_resample(X_train,y_train)


y_resampled.value_counts()

# Do we have to create this before 

In [None]:
# Stoch RandomForestClassifier Oversampled
btc_tree_os = RandomForestClassifier(random_state=1)
 
# Fit the model to the data using the training data
btc_tree_os.fit(X_resampled,y_resampled)
 
# Use the testing data to make the model predictions
y_btc_tree_pred_os = btc_tree_os.predict(X_test)

btc_tree_class_os = classification_report(y_test,y_btc_tree_pred_os)
btc_tree_matrix_os = confusion_matrix(y_test,y_btc_tree_pred_os)

In [None]:
# # From LogisticRegression, instantiate LogisticRegression classifier model instance
btc_log = LogisticRegression(random_state=1)
 
# Fit the model to the data using the training data
btc_log.fit(X_train_scaled,y_train)
 
# # Use the testing data to make the model predictions
y_btc_log_pred = btc_log.predict(X_test_scaled)

# # Create and save confusion matrix and classification report to a variable name
btc_log_matrix = confusion_matrix(y_test,y_btc_log_pred)
btc_log_class = classification_report(y_test,y_btc_log_pred)

In [None]:
print(btc_log_matrix)
print(btc_log_class)

In [None]:
# From LogisticRegression, instantiate LogisticRegression classifier model instance
btc_log_os = LogisticRegression(random_state=1)
 
# Fit the model to the data using the training data
btc_log_os.fit(X_resampled,y_resampled)
 
# # Use the testing data to make the model predictions
y_btc_log_pred_os = btc_log_os.predict(X_test)

# Create and save confusion matrix and classification report to a variable name
btc_log_matrix_os = confusion_matrix(y_test,y_btc_log_pred_os)
btc_log_class_os = classification_report(y_test,y_btc_log_pred_os)

In [None]:
# try to create a model which uses a lot more OHE catagorical variables

In [None]:
print('NN')
print(f'KERAS:{nn_btc_keras}')
print('Random Forest')
print(btc_tree_class)
print(btc_tree_matrix)
print('OS -- Random Forest')
print(btc_tree_class_os)
print(btc_tree_matrix_os)
print('log Reg')
print(btc_log_class_os)
print(btc_log_matrix_os)
print('OS -- Log Reg')
print(btc_log_class_os)
print(btc_log_matrix_os)