In [1]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store()
mr = project.get_model_registry()

  from .autonotebook import tqdm as notebook_tqdm


Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/556181
Connected. Call `.close()` to terminate connection gracefully.
Connected. Call `.close()` to terminate connection gracefully.


In [2]:
feature_view = fs.get_feature_view(
    name='bitcoin_price_movement_training_fv',
    version=2
)

In [3]:
bitcoin_fg = fs.get_feature_group(
    name='bitcoin_price_movement',
    version=2
)

In [4]:
data = bitcoin_fg.select_all()
version = 2
feature_view = fs.get_or_create_feature_view(
    name='bitcoin_price_movement_training_fv',
    version=version,
    query=data
)

In [5]:
df = feature_view.get_batch_data()

Finished: Reading data from Hopsworks, using ArrowFlight (1.38s) 


In [6]:
sorted_df = df.sort_values(by='id')

In [18]:
sorted_df.head()

Unnamed: 0,date,open,high,low,close,volume,ma7,ma21,bollinger_upper,bollinger_lower,lag7,volatility,close_usd_index,close_oil,close_gold,hash_rate,id
485,2015-01-24 00:00:00+00:00,232.699997,248.210007,230.022003,247.847,24782500,225.363429,242.852998,312.179592,173.526404,199.259995,13.918947,95.0,45.59,1292.599976,264487.024401,1
1156,2015-01-25 00:00:00+00:00,247.352005,255.074005,243.889999,253.718002,33582700,231.560429,242.354093,311.184643,173.523543,210.339005,15.662329,95.0,45.59,1292.599976,264487.024401,2
1776,2015-01-26 00:00:00+00:00,254.078995,309.384003,254.078995,273.472992,106794000,239.933572,242.306426,311.044877,173.567976,214.860992,20.243832,94.800003,45.150002,1279.400024,264487.024401,3
2062,2015-01-27 00:00:00+00:00,273.166992,275.480011,250.653,263.475006,44399000,247.385001,241.224808,307.742075,174.707541,211.315002,17.346404,94.019997,46.23,1291.699951,264487.024401,4
3297,2015-01-28 00:00:00+00:00,263.351013,266.535004,227.046005,233.914993,44352200,248.387571,238.347569,300.285301,176.409837,226.897003,16.124688,94.470001,44.450001,1285.900024,324166.338972,5


In [7]:
import numpy as np

high_prices = sorted_df.loc[:, 'high'].values
low_prices = sorted_df.loc[:, 'low'].values
mid_prices = (high_prices + low_prices) / 2.0

mid_price_changes = np.diff(mid_prices) / mid_prices[:-1] * 100
mid_price_changes = np.insert(mid_price_changes, 0, 0)

features = sorted_df[['volume', 'ma7', 'ma21', 'bollinger_upper', 'bollinger_lower', 'volatility', 'close_usd_index', 'close_oil', 'close_gold', 'hash_rate']].values
feature_changes = np.diff(features, axis=0) / features[:-1] * 100
feature_changes = np.insert(feature_changes, 0, 0, axis=0)

combined_features = np.column_stack((mid_price_changes.reshape(-1, 1), feature_changes))

In [8]:
sequence_length = 100
sequence_data = []
sequence_labels = []

for i in range(len(combined_features) - sequence_length):
    sequence_data.append(combined_features[i:i + sequence_length])
    # Labels based on whether the next mid_price_change is positive (1) or negative (0)
    sequence_labels.append(1 if mid_price_changes[i + sequence_length] > 0 else 0)

sequence_data = np.array(sequence_data)
sequence_labels = np.array(sequence_labels)

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import classification_report

split_index = int(len(sequence_data) * 0.8)
train_data = sequence_data[:split_index]
train_labels = sequence_labels[:split_index]
test_data = sequence_data[split_index:]
test_labels = sequence_labels[split_index:]

train_data = TensorDataset(torch.from_numpy(train_data), torch.from_numpy(train_labels))
test_data = TensorDataset(torch.from_numpy(test_data), torch.from_numpy(test_labels))

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

In [10]:
# Creating LSTM model class, which defines the model's structure
# We added dropout layer too, to try to tackle overfitting problem
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        out = self.fc(self.dropout(lstm_out[:, -1, :]))
        return out

# Setting the input size of the model to match the number of features
# Setting the number of neurons (hidden size) to 50
# Setting the size of the output to 1, indicating that we will predict a single value (increase or decrease of the price)
input_size = combined_features.shape[1]
hidden_size = 30
output_size = 1

model = LSTMModel(input_size, hidden_size, output_size)

# Hyperparameters
epochs = 50
learning_rate = 0.001

train_losses = []
test_losses = []

# Early stopping parameters, these are added because early stopping method can reduce the risk of overfitting
# Early stopping stops the training process when the model's performance doesn't improve on a validation set anymore
# The patience parameter tells us through how many epochs we wait for improvement. If no improvement can be seen after 10, the training stops
# We track the loss of the model and stop when we don't see improvement on it
patience = 10
best_loss = float('inf')
epochs_no_improve = 0

# We use Binary Cross Entropy Loss function and combine it with a sigmoid layer in one function, which is needed for the classification problem
# We use Adam optimiser to adjust the parameters of the model to minimise loss during training
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [11]:
import torch
model = mr.get_model(
    name="bitcoin_price_movement_prediction_model_lstm", 
    version=1
)

saved_model_dir = model.download()

lstm_model = LSTMModel(input_size, hidden_size, output_size)
lstm_model.load_state_dict(torch.load(saved_model_dir + "/bitcoin_price_movement_prediction_lstm.pth"))
lstm_model.eval()

Downloading model artifact (0 dirs, 5 files)... DONE



LSTMModel(
  (lstm): LSTM(11, 30, batch_first=True)
  (fc): Linear(in_features=30, out_features=1, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)

In [12]:
def predict(model, data_loader):
    model.eval()
    predictions = []
    true_labels = []

    with torch.no_grad():
        for data, label in data_loader:
            data = data.float()
            output = model(data)
            predicted = torch.sigmoid(output).squeeze().tolist()
            predictions.extend(predicted)
            true_labels.extend(label.squeeze().tolist())

    predictions = [1 if p >= 0.5 else 0 for p in predictions]
    true_labels = [1 if a >= 0.5 else 0 for a in true_labels]

    return predictions, true_labels

In [13]:
predictions, labels = predict(lstm_model, test_loader)

predictions = np.array(predictions)
true_labels = np.array(labels)

print(classification_report(true_labels, predictions))

              precision    recall  f1-score   support

           0       0.59      0.48      0.53       305
           1       0.63      0.73      0.68       373

    accuracy                           0.62       678
   macro avg       0.61      0.61      0.60       678
weighted avg       0.61      0.62      0.61       678



In [15]:
# This function creates forecasted values into the future, based on the latest sequence
# It outputs a prediction whether the price would increase or decrease in the given time steps ahead in the future
def forecast(model, data, sequence_length, steps_ahead):
    model.eval()

    data = np.array(data)
    
    current_sequence = data[-sequence_length:].reshape(1, sequence_length, -1)
    predictions = []

    with torch.no_grad():
        for _ in range(steps_ahead):
            input_seq = torch.tensor(current_sequence, dtype=torch.float32)
            
            output = model(input_seq)
            predicted_value = torch.sigmoid(output).item()
            
            predicted_class = 1 if predicted_value >= 0.5 else 0
            
            predictions.append(predicted_class)
            
            new_sequence = np.append(current_sequence[0, 1:, :], [[predicted_class] * current_sequence.shape[2]], axis=0)
            current_sequence = new_sequence.reshape(1, sequence_length, -1)
    
    return predictions

# We use the forecast function on the combined dataset, for 2 days ahead in the future and print the predictions
all_data = np.concatenate((train_data.tensors[0].numpy(), test_data.tensors[0].numpy())).reshape(-1, input_size)
steps_ahead = 2
future_predictions = forecast(lstm_model, all_data, sequence_length, steps_ahead)
movement_interpretation = ['Increase' if pred == 1 else 'Decrease' for pred in future_predictions]

print('Future Predictions for next 2 days:', movement_interpretation)

Future Predictions for next 2 days: ['Decrease', 'Decrease']


In [16]:
def backtest_strategy(prices, predictions, initial_capital=100000):
    capital = initial_capital
    bitcoin_holdings = 0
    
    for t in range(len(prices)):
        if predictions[t] == 1:  # Model predicts price will go up
            if bitcoin_holdings == 0:  # Buy Bitcoin if not holding
                bitcoin_holdings = capital / prices[t]
                capital = 0
        elif predictions[t] == 0:  # Model predicts price will go down
            if bitcoin_holdings > 0:  # Sell Bitcoin if holding
                capital = bitcoin_holdings * prices[t]
                bitcoin_holdings = 0
                
    # Calculate final value (if any Bitcoin left, convert to cash)
    final_value = capital + bitcoin_holdings * prices[-1]
    return final_value

In [17]:
initial_capital = 100000
prices = sorted_df['close'].tail(30).to_numpy()  # Example prices
example_predictions = predictions[-30:]  # Example predictions (1 for up, 0 for down)

final_value = backtest_strategy(prices, example_predictions)
roi = ((final_value - initial_capital) / initial_capital) * 100
print(f"Return on Investment (ROI): {roi:.2f}%")
print(f"Final capital after 30 days: ${final_value:.2f}")

Return on Investment (ROI): 16.05%
Final capital after 30 days: $116047.41
