# <span style='color:#ff5f27'> Initialization </span>

### Hopsworks Settings

In [None]:
import sys
from pathlib import Path
import os

def is_google_colab() -> bool:
    if "google.colab" in str(get_ipython()):
        return True
    return False

def clone_repository() -> None:
    !git clone https://github.com/featurestorebook/mlfs-book.git
    %cd mlfs-book

def install_dependencies() -> None:
    !pip install --upgrade uv
    !uv pip install --all-extras --system --requirement pyproject.toml


if is_google_colab():
    clone_repository()
    install_dependencies()
    root_dir = str(Path().absolute())
    print("Google Colab environment")
else:
    root_dir = Path().absolute()
    # Strip ~/notebooks/ccfraud from PYTHON_PATH if notebook started in one of these subdirectories
    if root_dir.parts[-1:] == ('airquality',):
        root_dir = Path(*root_dir.parts[:-1])
    if root_dir.parts[-1:] == ('notebooks',):
        root_dir = Path(*root_dir.parts[:-1])
    root_dir = str(root_dir) 
    print("Local environment")

# Add the root directory to the `PYTHONPATH` to use the `recsys` Python module from the notebook.
if root_dir not in sys.path:
    sys.path.append(root_dir)
print(f"Added the following directory to the PYTHONPATH: {root_dir}")
    
# Read the API keys and configuration variables from the file <root_dir>/.env
from mlfs import config
if os.path.exists(f"{root_dir}/.env"):
    settings = config.HopsworksSettings(_env_file=f"{root_dir}/.env")

### Imports

In [None]:
import datetime
import pandas as pd
from xgboost import XGBRegressor
import hopsworks
import json
from mlfs import util

# <span style='color:#ff5f27'> Retrieve Data </span>

### Retrieve Metadata

In [None]:
today = datetime.datetime.now() - datetime.timedelta(0)
tomorrow = today + datetime.timedelta(days = 1)
today

In [None]:
# Check if HOPSWORKS_API_KEY env variable is set or if it is set in ~/.env
if settings.HOPSWORKS_API_KEY is not None:
    api_key = settings.HOPSWORKS_API_KEY.get_secret_value()
    os.environ['HOPSWORKS_API_KEY'] = api_key
project = hopsworks.login()
fs = project.get_feature_store() 

secrets = hopsworks.get_secrets_api()
location_str = secrets.get_secret("BIKES_LOCATION_JSON").value
location = json.loads(location_str)

country=location['country']
city=location['city']
latitude=location['latitude']
longitude=location['longitude']
today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)

print(f"City: {city}")
print(f"Contry: {country}")
print(f"Latitude: {latitude}")
print(f"Longitude: {longitude}")
print(f"Day: {today}")

# <span style='color:#ff5f27'> Retrieve the Model </span>

### Download the Model From Model Registry

In [None]:
mr = project.get_model_registry()

retrieved_model = mr.get_model(
    name="trento_bikes_xgboost_model",
    version=1,
)

fv = retrieved_model.get_feature_view()

# Download the saved model artifacts to a local directory
saved_model_dir = retrieved_model.download()

### Loading the XGBoost Regressor Model

In [None]:
retrieved_xgboost_model = XGBRegressor(enable_categorical=True)
retrieved_xgboost_model.load_model(saved_model_dir + "/trento_bikes_xgboost_model.json")

# Displaying the retrieved XGBoost regressor model
retrieved_xgboost_model

# <span style='color:#ff5f27'> Retrieve Data </span>

### Get Weather Forecast

In [None]:
weather_fg = fs.get_feature_group(
    name='weather_trento',
    version=1,
)

# To avoid hourly discrepancies, we filter dates > than yesterday (rather than >= today)
weather_data = weather_fg.filter(weather_fg.date > today-datetime.timedelta(days=1)).read()

weather_data = weather_data.sort_values(by=['date'], ignore_index=True)
weather_data['days_before_forecast'] = range(1, len(weather_data)+1)
weather_data.head(10)

### Get Today's Bikes

In [None]:
bikes_fg = fs.get_feature_group(
    name='bikes_trento',
    version=1,
)

bikes_today = bikes_fg.filter(bikes_fg.date > (today-datetime.timedelta(days=1))).read()
bikes_today.head()

In [None]:
bikes_today.info()

# <span style='color:#ff5f27'> Making the Predictions </span>

### Prepare the Data to Fed to the Model

In [None]:
# Just in case there are more rows retrieved
bikes_today = bikes_today.sort_values(by=['date'], ascending=False)

joint_df = pd.merge(
    bikes_today[['id', 'city']],
    weather_data[['date', 'weather_code', 'apparent_temperature_mean', 'daylight_duration', 'precipitation_sum', 'wind_speed_10m_max', 'city', 'days_before_forecast']],
    on='city', how='left'
)
joint_df = joint_df.drop(columns=['city'])
joint_df['id'] = joint_df['id'].astype("category")
joint_df['weather_code'] = joint_df['weather_code'].astype("category")

joint_df.head(10)


In [None]:
joint_df.info()

### Make the Predictions

In [None]:
joint_df['predicted_bikes'] = retrieved_xgboost_model.predict(
    joint_df[['id', 'weather_code', 'apparent_temperature_mean', 'daylight_duration', 'precipitation_sum', 'wind_speed_10m_max']])
joint_df.head(10)

In [None]:
joint_df.info()