## <span style="color:#ff5f27">📝 Imports </span>

In [1]:
import joblib
import pandas as pd
from functions import to_df

## <span style="color:#ff5f27">🔮 Connect to Hopsworks Feature Store </span>

In [2]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store()

mr = project.get_model_registry()

Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/2262
Connected. Call `.close()` to terminate connection gracefully.
Connected. Call `.close()` to terminate connection gracefully.


## <span style="color:#ff5f27">⚙️ Feature View Retrieval </span>

In [3]:
feature_view = fs.get_feature_view(
    name='price_fv',
    version=1,
)

## <span style="color:#ff5f27">🚀 Fetch the model </span>

In [4]:
retrieved_model = mr.get_model(
    name="xgboost_price_model",
    version=1,
)
saved_model_dir = retrieved_model.download()

Downloading file ... 

In [5]:
model = joblib.load(saved_model_dir + "/xgboost_price_model.pkl")
model

## <span style="color:#ff5f27">👨🏻‍⚖️ Real-time Predictions </span>

Move to SKerve pipeline

In [6]:
# Initialise feature view to retrieve feature vector
feature_view.init_serving(1)

# Retrieve a feature vector
feature_vector = feature_view.get_feature_vector(
    entry = {
        "id": 1,
        "date": '2023-09-26',
    }
)
feature_vector

[datetime.date(2023, 9, 29),
 1,
 0.8972438585979629,
 0.8820895973864907,
 0.8944360469301086,
 0.7265744949408762,
 0.6365405598511238,
 0.6820573659260941,
 0.7699945108458449,
 1.0147616975908977]

In [7]:
feature_vector_df = to_df(feature_vector)
feature_vector_df

Unnamed: 0,id,ma_7,ma_14,ma_30,daily_rate_of_change,volatility_30_day,ema_02,ema_05,rsi
0,1,0.897244,0.88209,0.894436,0.726574,0.636541,0.682057,0.769995,1.014762


In [8]:
# Predict transformed feature vector using retrieved model
prediction_feature_vector = model.predict(feature_vector_df)
prediction_feature_vector

See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)


array([229.98347], dtype=float32)

In [9]:
# Retrieve feature vectors
feature_vectors = feature_view.get_feature_vectors(
    entry = [
        {"id": 1, "date": '2023-09-26'},
        {"id": 2, "date": '2023-09-26'}
    ]
)
feature_vectors

[[datetime.date(2023, 9, 29),
  1,
  0.8972438585979629,
  0.8820895973864907,
  0.8944360469301086,
  0.7265744949408762,
  0.6365405598511238,
  0.6820573659260941,
  0.7699945108458449,
  1.0147616975908977],
 [datetime.date(2023, 9, 29),
  2,
  0.8529658478130617,
  0.8613085695272089,
  0.88490606780393,
  -0.3401872637504521,
  0.9058530582688904,
  0.4883641063885475,
  0.3281180226757261,
  -0.20759117769877702]]

In [10]:
feature_vectors_df = to_df(feature_vectors)
feature_vectors_df

Unnamed: 0,id,ma_7,ma_14,ma_30,daily_rate_of_change,volatility_30_day,ema_02,ema_05,rsi
0,1,0.897244,0.88209,0.894436,0.726574,0.636541,0.682057,0.769995,1.014762
1,2,0.852966,0.861309,0.884906,-0.340187,0.905853,0.488364,0.328118,-0.207591


In [11]:
# Predict feature vectors df using retrieved model
prediction_feature_vectors = model.predict(feature_vectors_df)
prediction_feature_vectors

See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)


array([229.98347, 176.07455], dtype=float32)

## <span style="color:#ff5f27">👨🏻‍⚖️ Batch Prediction </span>

In [12]:
# Initialise feature view to retrieve batch data
feature_view.init_batch_scoring(training_dataset_version=1)

# Retrieve batch data
batch_data = feature_view.get_batch_data(
    start_time='2023-09-01',
    end_time='2023-09-26',
)
batch_data.head(3)

2023-09-29 00:34:51,705 INFO: USE `maksymzh_featurestore`
2023-09-29 00:34:52,279 INFO: WITH right_fg0 AS (SELECT *
FROM (SELECT `fg1`.`date` `date`, `fg1`.`id` `id`, `fg1`.`id` `join_pk_id`, `fg1`.`date` `join_evt_date`, `fg0`.`ma_7` `ma_7`, `fg0`.`ma_14` `ma_14`, `fg0`.`ma_30` `ma_30`, `fg0`.`daily_rate_of_change` `daily_rate_of_change`, `fg0`.`volatility_30_day` `volatility_30_day`, `fg0`.`ema_02` `ema_02`, `fg0`.`ema_05` `ema_05`, `fg0`.`rsi` `rsi`, RANK() OVER (PARTITION BY `fg1`.`id`, `fg1`.`date` ORDER BY `fg0`.`date` DESC) pit_rank_hopsworks
FROM `maksymzh_featurestore`.`prices_1` `fg1`
INNER JOIN `maksymzh_featurestore`.`averages_1` `fg0` ON `fg1`.`id` = `fg0`.`id` AND `fg1`.`date` >= `fg0`.`date`
WHERE `fg1`.`date` >= DATE '2023-09-01' AND `fg1`.`date` < DATE '2023-09-26') NA
WHERE `pit_rank_hopsworks` = 1) (SELECT `right_fg0`.`date` `date`, `right_fg0`.`id` `id`, `right_fg0`.`ma_7` `ma_7`, `right_fg0`.`ma_14` `ma_14`, `right_fg0`.`ma_30` `ma_30`, `right_fg0`.`daily_rate_of_c



Unnamed: 0,date,id,ma_7,ma_14,ma_30,daily_rate_of_change,volatility_30_day,ema_02,ema_05,rsi
0,2023-09-01,0,0.846135,0.829819,0.8374,0.52715,0.585425,0.507356,0.574765,0.834656
1,2023-09-04,0,0.864649,0.848786,0.842784,0.451,0.728577,0.560601,0.600344,0.967033
2,2023-09-05,0,0.869203,0.854805,0.844238,0.453781,0.753995,0.564801,0.587654,0.868846


In [13]:
# Predict batch data using retrieved model
batch_data.drop('date', axis=1, inplace=True)
predictions_batch = model.predict(batch_data)
predictions_batch[:10]

See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)


array([208.79515, 208.81712, 207.08513, 209.94397, 213.29387, 204.94258,
       200.5633 , 205.45969, 207.56802, 203.4607 ], dtype=float32)

---