In [1]:
import sys
from pathlib import Path
import warnings
warnings.filterwarnings("ignore", module="IPython")

def is_google_colab() -> bool:
    if "google.colab" in str(get_ipython()):
        return True
    return False

def clone_repository() -> None:
    !git clone https://github.com/featurestorebook/mlfs-book.git
    %cd mlfs-book

def install_dependencies() -> None:
    !pip install --upgrade uv
    !uv pip install --all-extras --system --requirement pyproject.toml

if is_google_colab():
    clone_repository()
    install_dependencies()
    root_dir = str(Path().absolute())
    print("Google Colab environment")
else:
    root_dir = Path().absolute()
    # Strip ~/notebooks/ccfraud from PYTHON_PATH if notebook started in one of these subdirectories
    if root_dir.parts[-1:] == ('aurora',):
        root_dir = Path(*root_dir.parts[:-1])
    if root_dir.parts[-1:] == ('notebooks',):
        root_dir = Path(*root_dir.parts[:-1])
    root_dir = str(root_dir) 
    print("Local environment")

print(f"Root dir: {root_dir}")

# Add the root directory to the `PYTHONPATH` 
if root_dir not in sys.path:
    sys.path.append(root_dir)
    print(f"Added the following directory to the PYTHONPATH: {root_dir}")

# Set the environment variables from the file <root_dir>/.env
from mlfs import config
settings = config.HopsworksSettings(_env_file=f"{root_dir}/.env")

Local environment
Root dir: C:\Users\lppap\Documents\master\scalable_ML\id2223-project
Added the following directory to the PYTHONPATH: C:\Users\lppap\Documents\master\scalable_ML\id2223-project
HopsworksSettings initialized!


## Imports & Constants

In [2]:
import datetime as datetime
import pandas as pd
from xgboost import XGBClassifier
import hopsworks
import json
from mlfs.aurora import util
import os
import joblib

In [34]:
KP_FG = dict(name="geomagnetic_daily_final", version=1)
WEATHER_FG = dict(name="sweden_weather_daily_final", version=1)

AURORA_FV = dict(name="aurora_fv_final", version=1)

PRED_FG = ("aurora_predictions", 1)

MODEL_NAME = "aurora_xgboost"   # el prefijo que usas en training: f"{MODEL_NAME}_h{h}"
MAX_HORIZON = 5               # o el que uses
AP_THRESHOLD = 15             # tu umbral del evento

DATA_PATH = "../../data"
RUN_DATE = datetime.datetime.utcnow().date()- datetime.timedelta(days=1)

## Hopsworks Login

In [4]:
project = hopsworks.login(engine="python")
fs = project.get_feature_store()    # Feature Store
mr = project.get_model_registry()  # Model Registry

2026-01-05 12:53:59,875 INFO: Initializing external client
2026-01-05 12:53:59,876 INFO: Base URL: https://c.app.hopsworks.ai:443






To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'

2026-01-05 12:54:01,702 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1279154


## 1. Fetch the Inference Data and Add it to the Feature Store

In [5]:
def fetch_kp():
    # Obtain data
    df = util.get_latest_complete_kp_from_nowcast()

    # Insert into feature store
    #kp_fg = fs.get_feature_group(**KP_FG)
    #kp_fg.insert(df, wait=True)
    
    return df

In [6]:
def fetch_weather(run_date):

    LATITUDE = 62.0
    LONGITUDE = 15.0

    run_date = pd.to_datetime(run_date).date()
    date_str = run_date.strftime("%Y-%m-%d")

    df = util.get_historical_weather_sweden(
        start_date=run_date - pd.Timedelta(days=7),
        end_date=date_str,
        latitude=LATITUDE,
        longitude=LONGITUDE,
    )

    #weather_fg = fs.get_feature_group(**WEATHER_FG)
    #weather_fg.insert(df, wait=True)
    
    return df

In [7]:
def build_features(raw: dict) -> pd.DataFrame:
    df = build_feature_dataframe(
        kp=raw["kp"],
        weather=raw["weather"],
    )

    # Ãºltimo timestamp disponible
    df = df.sort_index().iloc[-1:]
    return df

In [8]:
def load_model(model_name: str):
    mr = project.get_model_registry()
    model = mr.get_model(
        name=model_name,
        alias="champion"
    )
    return model.download()

### Steps of the main Loop

In [9]:
# Step 1
def fetch_latest_raw_data():
    kp_df = fetch_kp()

    date = pd.to_datetime(kp_df["date"].iloc[-1]).normalize()

    print("last data from kp: ", date)
    
    fetch_weather(date)
    
    return date

In [10]:
# Step 2

def obtain_data_fv(date):

    date = pd.to_datetime(date, utc=True).normalize()
    
    fv = fs.get_feature_view(**AURORA_FV)

    X = fv.get_batch_data(
        start_time=date - pd.Timedelta(days=7),
        end_time=date
    )

    print(X.columns)

    print(f"Number of days retrieved from FV: {len(X)}")

    if len(X) < 7:
        raise RuntimeError(f"Expected 7 row from Feature View, got {len(X)}")

    return X

In [35]:
# Step 3
def run_models(X: pd.DataFrame, date) -> pd.DataFrame:
    mr = project.get_model_registry()
    
    results = []

    for h in range(1, MAX_HORIZON + 1):
        print(f"Running {MODEL_NAME}_h{h}")
        model_name = f"{MODEL_NAME}_h{h}"

        model = mr.get_model(
            name=model_name,
            version = 4
        )
        
        fv = model.get_feature_view()

        model_dir = model.download()

        clf = joblib.load(f"{model_dir}/model.pkl")

        y_proba = clf.predict_proba(X)[:, 1]

        result = {
            "timestamp": date + pd.Timedelta(days=h),
            "horizon_days": h,
            "probability": float(y_proba[0]),
        }

        results.append(result)

    out_dir = Path(DATA_PATH)
    out_dir.mkdir(parents=True, exist_ok=True)

    pd.DataFrame(results).to_json(
        out_dir / "predictions.json",
        orient="records",
        indent=2
    )

    print(f"Saved {len(results)} predictions to {out_dir / 'predictions.json'}")


    return pd.DataFrame(results)


In [19]:
# Step 4
def save_predictions(df: pd.DataFrame):
    fg = fs.get_or_create_feature_group(
        name="aurora_predictions",
        version=1,
        primary_key=["timestamp", "horizon_days"],
        description="Daily batch aurora predictions",
    )

    fg.insert(df)

## Main Loop

In [36]:
def run_daily_inference():
    date = fetch_latest_raw_data()

    print("Date used one the fetch data raw", date)
    X = obtain_data_fv(date)

    X_engi = util.geomagnetic_feature_engineering(X)
    
    X_engi = X_engi.sort_values("date").tail(1).reset_index(drop=True)
    
    date = X_engi.loc[0, "date"]

    X_engi.drop(columns=['date'], inplace=True)
    
    predictions = run_models(X_engi, date)

    print(predictions)
    #save_predictions(predictions)

run_daily_inference()



last data from kp:  2026-01-04 00:00:00
Date used one the fetch data raw 2026-01-04 00:00:00
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.35s) 
Index(['date', 'kp1', 'kp2', 'kp3', 'kp4', 'kp5', 'kp6', 'kp7', 'kp8', 'ap1',
       'ap2', 'ap3', 'ap4', 'ap5', 'ap6', 'ap7', 'ap8', 'cloud_cover_mean',
       'precipitation_sum', 'sunshine_duration'],
      dtype='object')
Number of days retrieved from FV: 7
Running aurora_xgboost_h1
2026-01-05 13:15:43,064 INFO: Initializing for batch retrieval of feature vectors


Downloading: 0.000%|          | 0/454328 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 1 files)... 

Downloading: 0.000%|          | 0/450484 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 2 files)... 

Downloading: 0.000%|          | 0/33761 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 3 files)... 

Downloading: 0.000%|          | 0/11117 elapsed<00:00 remaining<?

Downloading model artifact (1 dirs, 4 files)... 

Downloading: 0.000%|          | 0/32832 elapsed<00:00 remaining<?

Running aurora_xgboost_h2t (1 dirs, 5 files)... DONE
2026-01-05 13:15:51,489 INFO: Initializing for batch retrieval of feature vectors


Downloading: 0.000%|          | 0/455823 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 1 files)... 

Downloading: 0.000%|          | 0/455992 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 2 files)... 

Downloading: 0.000%|          | 0/32260 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 3 files)... 

Downloading: 0.000%|          | 0/11326 elapsed<00:00 remaining<?

Downloading model artifact (1 dirs, 4 files)... 

Downloading: 0.000%|          | 0/35070 elapsed<00:00 remaining<?

Running aurora_xgboost_h3t (1 dirs, 5 files)... DONE
2026-01-05 13:15:59,407 INFO: Initializing for batch retrieval of feature vectors


Downloading: 0.000%|          | 0/456843 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 1 files)... 

Downloading: 0.000%|          | 0/455652 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 2 files)... 

Downloading: 0.000%|          | 0/35533 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 3 files)... 

Downloading: 0.000%|          | 0/11358 elapsed<00:00 remaining<?

Downloading model artifact (1 dirs, 4 files)... 

Downloading: 0.000%|          | 0/35190 elapsed<00:00 remaining<?

Running aurora_xgboost_h4t (1 dirs, 5 files)... DONE
2026-01-05 13:16:07,922 INFO: Initializing for batch retrieval of feature vectors


Downloading: 0.000%|          | 0/462691 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 1 files)... 

Downloading: 0.000%|          | 0/456672 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 2 files)... 

Downloading: 0.000%|          | 0/34379 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 3 files)... 

Downloading: 0.000%|          | 0/11185 elapsed<00:00 remaining<?

Downloading model artifact (1 dirs, 4 files)... 

Downloading: 0.000%|          | 0/34964 elapsed<00:00 remaining<?

Running aurora_xgboost_h5t (1 dirs, 5 files)... DONE
2026-01-05 13:16:15,504 INFO: Initializing for batch retrieval of feature vectors


Downloading: 0.000%|          | 0/456707 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 1 files)... 

Downloading: 0.000%|          | 0/462656 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 2 files)... 

Downloading: 0.000%|          | 0/33100 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 3 files)... 

Downloading: 0.000%|          | 0/11387 elapsed<00:00 remaining<?

Downloading model artifact (1 dirs, 4 files)... 

Downloading: 0.000%|          | 0/35011 elapsed<00:00 remaining<?

Saved 5 predictions to ..\..\data\predictions.jsonNE
                  timestamp  horizon_days  probability
0 2026-01-04 00:00:00+00:00             1     0.282169
1 2026-01-05 00:00:00+00:00             2     0.160848
2 2026-01-06 00:00:00+00:00             3     0.081385
3 2026-01-07 00:00:00+00:00             4     0.219318
4 2026-01-08 00:00:00+00:00             5     0.298334
