# <span style='color:#ff5f27'> Initialization </span>

### Hopsworks Settings

In [10]:
import sys
from pathlib import Path
import os

def is_google_colab() -> bool:
    if "google.colab" in str(get_ipython()):
        return True
    return False

def clone_repository() -> None:
    !git clone https://github.com/featurestorebook/mlfs-book.git
    %cd mlfs-book

def install_dependencies() -> None:
    !pip install --upgrade uv
    !uv pip install --all-extras --system --requirement pyproject.toml

if is_google_colab():
    clone_repository()
    install_dependencies()
    root_dir = str(Path().absolute())
    print("Google Colab environment")
else:
    root_dir = Path().absolute()
    # Strip ~/notebooks/ccfraud from PYTHON_PATH if notebook started in one of these subdirectories
    if root_dir.parts[-1:] == ('airquality',):
        root_dir = Path(*root_dir.parts[:-1])
    if root_dir.parts[-1:] == ('notebooks',):
        root_dir = Path(*root_dir.parts[:-1])
    root_dir = str(root_dir) 
    print("Local environment")

# Add the root directory to the `PYTHONPATH` to use the `recsys` Python module from the notebook.
if root_dir not in sys.path:
    sys.path.append(root_dir)
print(f"Added the following directory to the PYTHONPATH: {root_dir}")
    
# Set the environment variables from the file <root_dir>/.env
from mlfs import config
if os.path.exists(f"{root_dir}/.env"):
    settings = config.HopsworksSettings(_env_file=f"{root_dir}/.env")

Local environment
Added the following directory to the PYTHONPATH: c:\Users\nives\AllFolders\SML\sml-bike-sharing
HopsworksSettings initialized!


### Imports

In [11]:
from datetime import date, datetime, timedelta
import pandas as pd
import matplotlib.pyplot as plt
from xgboost import XGBRegressor
from xgboost import plot_importance
from sklearn.metrics import mean_squared_error, r2_score
import hopsworks
from mlfs import util
import json

import warnings
warnings.filterwarnings("ignore")

# <span style='color:#ff5f27'> Retrieve Data </span>

### Retrieve Metadata

In [12]:
# Check if HOPSWORKS_API_KEY env variable is set or if it is set in ~/.env
if settings.HOPSWORKS_API_KEY is not None:
    api_key = settings.HOPSWORKS_API_KEY.get_secret_value()
    os.environ['HOPSWORKS_API_KEY'] = api_key
project = hopsworks.login()
fs = project.get_feature_store() 

secrets = hopsworks.get_secrets_api()
location_str = secrets.get_secret("BIKES_LOCATION_JSON").value
location = json.loads(location_str)

country=location['country']
city=location['city']
latitude=location['latitude']
longitude=location['longitude']
today = date.today()
yesterday = today - timedelta(days=1)

print(f"City: {city}")
print(f"Contry: {country}")
print(f"Latitude: {latitude}")
print(f"Longitude: {longitude}")
print(f"Day: {today}")

2026-01-05 19:18:06,256 INFO: Closing external client and cleaning up certificates.
2026-01-05 19:18:06,259 INFO: Connection closed.
2026-01-05 19:18:06,261 INFO: Initializing external client
2026-01-05 19:18:06,261 INFO: Base URL: https://c.app.hopsworks.ai:443






2026-01-05 19:18:07,712 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1279175
City: Trento
Contry: Italy
Latitude: 46.07
Longitude: 11.12
Day: 2026-01-05


### Connect to Feature Groups

In [14]:
bikes_fg = fs.get_feature_group(
    name='bikes_trento',
    version=1,
)
weather_fg = fs.get_feature_group(
    name='weather_trento',
    version=1,
)

### Create Feature Views

In [15]:
selected_features = bikes_fg.select(
    ['id', 'bikes', 'date']
).join(weather_fg.select_features(), on=['city'])

2026-01-05 19:19:52,116 INFO: Using ['weather_code', 'apparent_temperature_mean', 'daylight_duration', 'precipitation_sum', 'wind_speed_10m_max'] from feature group `weather_trento` as features for the query. To include primary key and event time use `select_all`.


In [16]:
feature_view = fs.get_or_create_feature_view(
    name='bikes_fv',
    description="Features selected for bike predictions.",
    version=1,
    labels=['bikes'],
    query=selected_features,
)

In [17]:
feature_view = fs.get_feature_view(name="bikes_fv", version=1)
feature_view.schema

[Training Dataset Feature('id', 'string', 0, False, id, 1911123, None),
 Training Dataset Feature('bikes', 'int', 1, True, bikes, 1911123, None),
 Training Dataset Feature('date', 'timestamp', 2, False, date, 1911123, None),
 Training Dataset Feature('weather_code', 'int', 3, False, weather_code, 1908081, None),
 Training Dataset Feature('apparent_temperature_mean', 'float', 4, False, apparent_temperature_mean, 1908081, None),
 Training Dataset Feature('daylight_duration', 'float', 5, False, daylight_duration, 1908081, None),
 Training Dataset Feature('precipitation_sum', 'float', 6, False, precipitation_sum, 1908081, None),
 Training Dataset Feature('wind_speed_10m_max', 'float', 7, False, wind_speed_10m_max, 1908081, None)]

# <span style='color:#ff5f27'> Create Train/Test Datasets </span>

In [21]:
start_date_test_data = "2025-09-01"
# Convert string to datetime object
test_start = datetime.strptime(start_date_test_data, "%Y-%m-%d")

In [24]:
# get_data = feature_view.get_batch_data()

In [25]:
X_train, X_test, y_train, y_test = feature_view.train_test_split(
    test_start=test_start
)

2026-01-05 19:33:42,202 ERROR: Binder Error: Referenced column "{'name': 'date', 'type': 'timestamp'}" not found in FROM clause!
Candidate bindings: "read_parquet.date"
LINE 1: ...dictor_fl.weather_trento_1" as (select "{'name': 'date', 'type': 'timestamp'}"...
                                                  ^. Detail: Python exception: Traceback (most recent call last):
  File "/usr/src/app/src/server.py", line 142, in wrapper
    result = func(*args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/src/app/src/server.py", line 166, in wrapper
    result = func(instance, *args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/src/app/src/server.py", line 196, in do_get
    return self._read_query(context, path, command)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/src/app/src/server.py", line 123, in wrapper
    return func(instance, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/src/app/src/server.py", line 1

FeatureStoreException: Could not read data using Hopsworks Query Service.