## <span style="color:#ff5f27">📝 Imports </span>

In [None]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error
import os
import joblib
from features.price import plot_prediction_test
from functions import predict_id

## <span style="color:#ff5f27">🔮 Connect to Hopsworks Feature Store </span>

In [None]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store() 

In [None]:
averages_fg = fs.get_or_create_feature_group(
    name='averages',
    version=1,
)

prices_fg = fs.get_or_create_feature_group(
    name='prices',
    version=1,
)

## <span style="color:#ff5f27">🔪 Feature Selection </span>

In [None]:
query = prices_fg.select_all() \
    .join(averages_fg.select_except(['date']))
query.show(5)

## <span style="color:#ff5f27">🤖 Transformation Functions </span>

In [None]:
# Load transformation function
min_max_scaler = fs.get_transformation_function(name="min_max_scaler")

feature_names = [
    'ma_7', 'ma_14', 'ma_30', 'daily_rate_of_change', 'volatility_30_day', 'ema_02', 'ema_05', 'rsi'
]

# Map features to transformations
transformation_functions = {
    feature_name: min_max_scaler
    for feature_name in feature_names
}
transformation_functions

## <span style="color:#ff5f27">⚙️ Feature View Creation </span>

In [None]:
feature_view = fs.get_or_create_feature_view(
    name='price_fv',
    version=1,
    query=query,
    labels=["price"],
    transformation_functions=transformation_functions,
)

## <span style="color:#ff5f27">🏋️ Training Dataset Creation </span>

In [None]:
td_version, td_job = feature_view.create_train_test_split(
    description='Prices Dataset',
    train_start='2022-09-01',
    train_end='2023-07-01',
    test_start='2023-07-01',
    test_end=datetime.today().strftime("%Y-%m-%d"),
    data_format='csv',
    write_options={'wait_for_job': True},
)

In [None]:
X_train, X_test, y_train, y_test = feature_view.get_train_test_split(td_version)

In [None]:
X_train.head()

In [None]:
y_train.head()

In [None]:
X_train = X_train.sort_values("date")
y_train = y_train.reindex(X_train.index)

X_test = X_test.sort_values("date")
y_test = y_test.reindex(X_test.index)

train_date = pd.DataFrame(X_train.pop("date"))
test_date = pd.DataFrame(X_test.pop("date"))

## <span style="color:#ff5f27">🧬 Modeling </span>

In [None]:
# Initialize the XGBoost regressor
model = xgb.XGBRegressor()

# Train the model on the training data
model.fit(X_train, y_train)

# Make predictions on the validation set
y_test_pred = model.predict(X_test)

# Calculate RMSE on the validation set
mse = mean_squared_error(y_test, y_test_pred, squared=False)
print(f"Mean Squared Error (MSE): {mse}")

In [None]:
prediction_for_id = predict_id(1, X_test, model)

plot_prediction_test(1, X_train, X_test, y_train, y_test, train_date, test_date, prediction_for_id)

## <span style="color:#ff5f27">⚙️ Model Schema </span>

In [None]:
from hsml.schema import Schema
from hsml.model_schema import ModelSchema

input_schema = Schema(X_train.values)
output_schema = Schema(y_train)
model_schema = ModelSchema(input_schema=input_schema, output_schema=output_schema)

model_schema.to_dict()

## <span style="color:#ff5f27">📝 Register model </span>

In [None]:
model_dir="price_model"
if os.path.isdir(model_dir) == False:
    os.mkdir(model_dir)

joblib.dump(model, model_dir + '/xgboost_price_model.pkl')

In [None]:
mr = project.get_model_registry()

price_model = mr.python.create_model(
    name="xgboost_price_model", 
    metrics={"MSE": mse},
    model_schema=model_schema,
    input_example=X_train.sample(), 
    description="Price Predictor")

price_model.save(model_dir)

---