In [18]:
from sklearn.ensemble import HistGradientBoostingRegressor
import hopsworks
import os
from datetime import datetime
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score


# Creating the feature view

In [5]:
project = hopsworks.login(api_key_value=os.getenv("HOPSWORKS_API_KEY"))
fs = project.get_feature_store()

fg_price = fs.get_feature_group(
    name="solana_crypto_features",
    version=1,
)

fg_agg_sentiment = fs.get_feature_group(
    name="reddit_aggregated_sentiment_backfill",
    version=1
)

# Create feature view with selected features
selected_features = [
    'timestamp',
    #'close',
    'high', 
    'low',
    'open',
    'volume',
    'close_7d_ma',
    'close_30d_ma',
    'ma_7_30_cross',
    'rsi_14',
    'atr_14',
    'bb_bandwidth',
    'volume_7d_ma',
    'volume_ratio',
    'day_of_week',
    'month'
]

# Join with sentiment features
query = fg_price.select(selected_features + ['close']).join(
    fg_agg_sentiment.select_all(),
    on=['timestamp']
)

feature_view = fs.get_or_create_feature_view(
    name="solana_with_sentiment",
    version=2,
    query=query,
    labels=["close"]  # ou votre target
)

2026-01-15 16:08:04,713 INFO: Closing external client and cleaning up certificates.
Connection closed.
2026-01-15 16:08:04,718 INFO: Initializing external client
2026-01-15 16:08:04,719 INFO: Base URL: https://c.app.hopsworks.ai:443
To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'







2026-01-15 16:08:06,567 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1279131


# Instanciate the model

In [6]:
start_date_test_data = "2024-01-01"

test_start = datetime.strptime(start_date_test_data, "%Y-%m-%d")

X_train, X_test, y_train, y_test = feature_view.train_test_split(
    test_start=test_start
)

X_train


Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (2.23s) 



Unnamed: 0,timestamp,high,low,open,volume,close_7d_ma,close_30d_ma,ma_7_30_cross,rsi_14,atr_14,bb_bandwidth,volume_7d_ma,volume_ratio,day_of_week,month,reddit_aggregated_sentiment_backfill_mean_sentiment,reddit_aggregated_sentiment_backfill_count
0,1605484800,2.360211,1.994853,2.043234,22156135,2.055092,1.825444,1,68.548454,0.277274,0.703077,1.458409e+07,1.519199,0,11,0.000000,7.0
1,1607472000,1.674277,1.503676,1.664622,10461119,1.849739,2.026277,0,27.792126,0.189585,0.402118,1.003346e+07,1.042623,2,12,0.090909,11.0
2,1609632000,2.364981,1.799902,1.799902,55073422,1.719928,1.617500,1,64.871286,0.229080,0.556823,2.527390e+07,2.179063,6,1,0.666667,3.0
3,1612396800,6.531011,5.603236,5.731748,108249482,4.906323,3.754754,1,91.401345,0.556394,0.826621,6.736586e+07,1.606889,3,2,0.000000,4.0
4,1613433600,8.980368,7.983557,8.860748,71625205,8.880791,5.719969,1,73.447684,1.013082,1.155841,9.030898e+07,0.793113,1,2,0.000000,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1837,1701820800,66.383621,60.979866,60.979866,3030993886,61.477550,57.001038,1,61.034506,3.570023,0.191287,1.756013e+09,1.726066,2,12,0.263158,19.0
1838,1701993600,75.129791,67.841232,67.846718,3612712432,64.876785,58.886300,1,78.885595,4.226890,0.326349,2.323821e+09,1.554643,4,12,0.222222,18.0
1839,1702598400,79.151070,72.366638,75.636688,2821390652,72.089778,62.753072,1,64.760184,5.689867,0.403961,2.648506e+09,1.065276,4,12,0.388889,18.0
1840,1703548800,121.143997,104.728218,121.084335,6117657160,103.993197,75.669467,1,79.330591,9.648653,0.832903,5.161978e+09,1.185138,1,12,0.235294,17.0


In [20]:
X_test

Unnamed: 0,timestamp,high,low,open,volume,close_7d_ma,close_30d_ma,ma_7_30_cross,rsi_14,atr_14,bb_bandwidth,volume_7d_ma,volume_ratio,day_of_week,month,reddit_aggregated_sentiment_backfill_mean_sentiment,reddit_aggregated_sentiment_backfill_count
66,1704844800,105.082375,92.563263,99.384239,4216429514,98.203292,94.266309,1,46.324363,9.963049,0.277661,3.322546e+09,1.269036,2,1,0.235294,17.0
67,1705190400,102.173584,93.890564,95.732651,2736734725,97.246598,97.406168,0,44.724472,10.180137,0.239893,3.442700e+09,0.794938,6,1,0.350000,20.0
68,1706486400,101.771912,95.176132,95.937981,2604890575,92.000851,95.933388,0,57.426431,5.967697,0.227690,2.190202e+09,1.189338,0,1,0.055556,18.0
69,1707523200,110.742790,106.933662,106.933662,1836561570,101.102345,95.894477,1,72.030741,5.025592,0.283728,1.907257e+09,0.962933,5,2,0.450000,20.0
70,1708819200,104.329941,102.171028,104.132668,909662462,104.897385,104.089418,1,44.645616,5.890808,0.196550,1.729959e+09,0.525829,6,2,0.200000,20.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1871,1755475200,191.354156,180.414108,191.164490,6328028844,190.825165,181.363770,1,57.748735,10.851792,0.266098,8.151281e+09,0.776323,0,8,,
1872,1756080000,213.006134,186.025055,205.855438,13048248034,191.670600,181.850797,1,54.886504,14.074930,0.225180,8.608558e+09,1.515730,0,8,,
1873,1760140800,190.879135,173.754791,188.638901,12850012618,214.058609,223.326278,0,39.618874,14.593786,0.289290,9.818237e+09,1.308790,5,10,,
1874,1760832000,192.200607,183.458252,187.660294,4863227045,192.433105,210.549244,0,35.218743,16.847148,0.377950,9.193431e+09,0.528989,6,10,,


In [22]:
X_features = X_train.drop(columns=['timestamp'])
X_test_features = X_test.drop(columns=['timestamp'])

In [23]:
y_train

Unnamed: 0,close
0,2.255355
1,1.662304
2,2.161752
3,6.426065
4,8.297867
...,...
1837,61.902958
1838,75.002434
1839,72.368988
1840,112.676735


In [25]:

XGBoost_model = XGBRegressor()

XGBoost_model.fit(X_features, y_train)

In [26]:

y_pred = XGBoost_model.predict(X_test_features)
# Calculating Mean Squared Error (MSE) using sklearn
mse = mean_squared_error(y_test.iloc[:,0], y_pred)
print("MSE:", mse)

# Calculating R squared using sklearn
r2 = r2_score(y_test.iloc[:,0], y_pred)
print("R squared:", r2)



MSE: 35.08982708510918


R squared: 0.9742682210412474


In [27]:
model_dir = "crypto_price_model"
if not os.path.exists(model_dir):
    os.mkdir(model_dir)

In [28]:
XGBoost_model

In [29]:
XGBoost_model.save_model(model_dir + "/model.json")

In [31]:
mr = project.get_model_registry()

# Creating a Python model in the model registry name crypto_price_model
res_dict = { 
        "MSE": str(mse),
        "R squared": str(r2),
    }

model = mr.python.create_model(
    name="crypto_price_model", 
    metrics= res_dict,
    feature_view=feature_view,
    description="Cryptocurrency Price Predictor",
)

# Saving the model artifacts to the 'crypto_price_model' directory in the model registry
model.save(model_dir)

  0%|          | 0/6 [00:00<?, ?it/s]

Uploading c:\Users\samyz\ID2223_project\src\utils\training\crypto_price_model/model.json: 0.000%|          | 0…

Uploading c:\Users\samyz\ID2223_project\src\utils\training\model_schema.json: 0.000%|          | 0/1525 elapse…

Model created, explore it at https://c.app.hopsworks.ai:443/p/1279131/models/crypto_price_model/2


Model(name: 'crypto_price_model', version: 2)