In [1]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store()
mr = project.get_model_registry()

  from .autonotebook import tqdm as notebook_tqdm


Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/556181
Connected. Call `.close()` to terminate connection gracefully.
Connected. Call `.close()` to terminate connection gracefully.


In [2]:
feature_view = fs.get_feature_view(
    name='bitcoin_analysis_training_fv',
    version=1
)

In [3]:
import joblib
model = mr.get_model(
    name="bitcoin_price_prediction_model_random_forest", 
    version=1
)

saved_model_dir = model.download()

rf_model = joblib.load(saved_model_dir + "/bitcoin_price_prediction_model_random_forest.pkl")

Downloading model artifact (0 dirs, 5 files)... DONE

In [4]:
bitcoin_fg = fs.get_feature_group(
    name='bitcoin_analysis',
    version=1
)

In [5]:
data = bitcoin_fg.select_all()
version = 1
feature_view = fs.get_or_create_feature_view(
    name='bitcoin_analysis_training_fv',
    version=version,
    query=data
)

In [6]:
df = feature_view.get_batch_data()

Finished: Reading data from Hopsworks, using ArrowFlight (1.42s) 


In [7]:
sorted_df = df.sort_values(by='id')

In [8]:
sorted_df.tail()

Unnamed: 0,date,open,high,low,close,volume,ma7,ma21,bollinger_upper,bollinger_lower,lag7,volatility,close_usd_index,close_oil,close_gold,hash_rate,id
2969,2024-05-23 00:00:00+00:00,69121.304688,70041.273438,66356.953125,67929.5625,41895680979,68415.381696,64687.760789,71051.630458,58323.891119,65231.582031,1892.814381,105.110001,76.870003,2335.0,644799000.0,3408
175,2024-05-24 00:00:00+00:00,67928.132812,69220.296875,66622.671875,68526.101562,29197308153,68625.985491,64956.15439,71475.07722,58437.231559,67051.875,1795.323014,104.720001,77.720001,2332.5,671140000.0,3409
622,2024-05-25 00:00:00+00:00,68526.921875,69579.320312,68515.820312,69265.945312,15473071741,68958.148438,65212.081659,71972.955136,58451.208182,66940.804688,1639.943954,104.720001,77.720001,2332.5,671140000.0,3410
933,2024-05-26 00:00:00+00:00,69264.289062,69506.226562,68183.890625,68518.09375,15628433737,69278.109375,65425.746466,72312.304691,58539.18824,66278.367188,1185.488127,104.720001,77.720001,2332.5,671140000.0,3411
1741,2024-05-27 00:00:00+00:00,68512.179688,70597.882812,68232.5,69394.554688,25870990717,68984.732143,65722.537202,72735.386868,58709.687537,71448.195312,722.736435,104.720001,77.720001,2332.5,671140000.0,3412


In [9]:
import pandas as pd
import numpy as np
from features import feature_engineering
from datetime import datetime, timedelta

def make_predictions(df, model, horizon, noise_factor=0.01):
    predictions = []
    last_known_data = df.iloc[-1].copy()
    pred_df = df.copy()

    for _ in range(horizon):
        features_for_prediction = last_known_data.drop(labels=['date', 'close', 'id']).values.reshape(1, -1)
        prediction = model.predict(features_for_prediction)
        noise = np.random.normal(0, noise_factor * prediction[0])
        prediction[0] += noise
        predictions.append(prediction[0])

        new_row = last_known_data.copy()
        new_row['close'] = prediction[0]
        new_row['lag7'] = pred_df.iloc[-7]['close'] if len(pred_df) >= 7 else last_known_data['close']

        pred_df = pd.concat([pred_df, new_row.to_frame().T], ignore_index=True)
        pred_df['ma7'] = pred_df['close'].rolling(window=7).mean()
        pred_df['ma21'] = pred_df['close'].rolling(window=21).mean()
        pred_df['bollinger_upper'], pred_df['bollinger_lower'] = feature_engineering.compute_bollinger_bands(pred_df['close'], 21)
        pred_df['volatility'] = pred_df['close'].rolling(window=7).std()

        last_known_data = pred_df.iloc[-1]
    
    return predictions

In [10]:
# Preprocess the data
sorted_df['date'] = pd.to_datetime(sorted_df['date'])
sorted_df['year'] = sorted_df['date'].dt.year
sorted_df['month'] = sorted_df['date'].dt.month
sorted_df['day'] = sorted_df['date'].dt.day
sorted_df['day_of_week'] = sorted_df['date'].dt.dayofweek

In [11]:
# Forecast for the next 7 days
forecast_horizon = 7
forecast_predictions = make_predictions(sorted_df, rf_model, forecast_horizon)
forecast_dates = pd.date_range(start=sorted_df['date'].iloc[-1] + timedelta(days=1), periods=forecast_horizon)
forecast_df = pd.DataFrame({'date': forecast_dates, 'predicted_close': forecast_predictions})



In [12]:
# Forecast for the next 30 days
investment_horizon = 30
investment_predictions = make_predictions(sorted_df, rf_model, investment_horizon)
investment_forecast_df = pd.DataFrame({
    'date': pd.date_range(start=sorted_df['date'].iloc[-1] + timedelta(days=1), periods=investment_horizon),
    'predicted_close': investment_predictions
})



In [13]:
pd.options.display.float_format = '{:.4f}'.format
forecast_df

Unnamed: 0,date,predicted_close
0,2024-05-28 00:00:00+00:00,65786.8544
1,2024-05-29 00:00:00+00:00,64833.0581
2,2024-05-30 00:00:00+00:00,65978.8129
3,2024-05-31 00:00:00+00:00,65815.7971
4,2024-06-01 00:00:00+00:00,65520.0349
5,2024-06-02 00:00:00+00:00,64835.6699
6,2024-06-03 00:00:00+00:00,65247.5376


In [14]:
investment_forecast_df

Unnamed: 0,date,predicted_close
0,2024-05-28 00:00:00+00:00,65128.8812
1,2024-05-29 00:00:00+00:00,66676.538
2,2024-05-30 00:00:00+00:00,64150.752
3,2024-05-31 00:00:00+00:00,64911.4681
4,2024-06-01 00:00:00+00:00,64048.2518
5,2024-06-02 00:00:00+00:00,65019.6798
6,2024-06-03 00:00:00+00:00,65438.5464
7,2024-06-04 00:00:00+00:00,65508.7088
8,2024-06-05 00:00:00+00:00,65004.8282
9,2024-06-06 00:00:00+00:00,64819.8859


In [15]:
import os

# Create the folder if it doesn't exist
output_folder = 'prediction_dataframes'
os.makedirs(output_folder, exist_ok=True)

# Save dataframes to CSV in the specified folder
forecast_df.to_csv(os.path.join(output_folder, 'forecast_7_days.csv'), index=False)
investment_forecast_df.to_csv(os.path.join(output_folder, 'forecast_30_days.csv'), index=False)