In [1]:
import sys
from pathlib import Path
import warnings
warnings.filterwarnings("ignore", module="IPython")

def is_google_colab() -> bool:
    if "google.colab" in str(get_ipython()):
        return True
    return False

def clone_repository() -> None:
    !git clone https://github.com/featurestorebook/mlfs-book.git
    %cd mlfs-book

def install_dependencies() -> None:
    !pip install --upgrade uv
    !uv pip install --all-extras --system --requirement pyproject.toml

if is_google_colab():
    clone_repository()
    install_dependencies()
    root_dir = str(Path().absolute())
    print("Google Colab environment")
else:
    root_dir = Path().absolute()
    # Strip ~/notebooks/ccfraud from PYTHON_PATH if notebook started in one of these subdirectories
    if root_dir.parts[-1:] == ('aurora',):
        root_dir = Path(*root_dir.parts[:-1])
    if root_dir.parts[-1:] == ('notebooks',):
        root_dir = Path(*root_dir.parts[:-1])
    root_dir = str(root_dir) 
    print("Local environment")

print(f"Root dir: {root_dir}")

# Add the root directory to the `PYTHONPATH` 
if root_dir not in sys.path:
    sys.path.append(root_dir)
    print(f"Added the following directory to the PYTHONPATH: {root_dir}")

# Set the environment variables from the file <root_dir>/.env
from mlfs import config
settings = config.HopsworksSettings(_env_file=f"{root_dir}/.env")

Local environment
Root dir: /Users/appbites/Desktop/id2223-project
Added the following directory to the PYTHONPATH: /Users/appbites/Desktop/id2223-project
HopsworksSettings initialized!


In [2]:
import datetime
import time
import requests
import pandas as pd
import hopsworks
from mlfs.aurora import util
from mlfs import config
import json
import os
import warnings
warnings.filterwarnings("ignore")

In [3]:
project = hopsworks.login(engine="python")
fs = project.get_feature_store()

today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)


2025-12-30 22:00:49,499 INFO: Initializing external client
2025-12-30 22:00:49,499 INFO: Base URL: https://c.app.hopsworks.ai:443






2025-12-30 22:00:51,813 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1289364


## Get references to the feature groups

In [4]:
# Retrieve feature groups
geomagnetic_fg = fs.get_feature_group(
    name="geomagnetic_daily",
    version=1,
)

weather_fg = fs.get_feature_group(
    name="sweden_weather_daily",
    version=1,
)

## Retrieve latest geomagnetic data

In [5]:
# Fetch latest complete geomagnetic day from GFZ nowcast
kp_today_df = util.get_latest_complete_kp_from_nowcast()

kp_today_df


Unnamed: 0,date,kp1,kp2,kp3,kp4,kp5,kp6,kp7,kp8,ap1,ap2,ap3,ap4,ap5,ap6,ap7,ap8,ap
0,2025-12-29,2.667,2.667,2.333,2.333,1.667,1.0,0.333,1.667,12.0,12.0,9.0,9.0,6.0,4.0,2.0,6.0,8.0


In [6]:
kp_today_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 18 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    1 non-null      datetime64[ns]
 1   kp1     1 non-null      float32       
 2   kp2     1 non-null      float32       
 3   kp3     1 non-null      float32       
 4   kp4     1 non-null      float32       
 5   kp5     1 non-null      float32       
 6   kp6     1 non-null      float32       
 7   kp7     1 non-null      float32       
 8   kp8     1 non-null      float32       
 9   ap1     1 non-null      float32       
 10  ap2     1 non-null      float32       
 11  ap3     1 non-null      float32       
 12  ap4     1 non-null      float32       
 13  ap5     1 non-null      float32       
 14  ap6     1 non-null      float32       
 15  ap7     1 non-null      float32       
 16  ap8     1 non-null      float32       
 17  ap      1 non-null      float32       
dtypes: datetime64[

In [7]:
latest_date = kp_today_df["date"].iloc[0].date()

latitude = 62.0
longitude = 15.0

weather_today_df = util.get_historical_weather_sweden(
    start_date=latest_date.isoformat(),
    end_date=latest_date.isoformat(),
    latitude=latitude,
    longitude=longitude,
)

weather_today_df


Unnamed: 0,date,cloud_cover_mean,precipitation_sum,sunshine_duration
0,2025-12-29,55.875,1.3,3553.68457


## Insert the data to the feature store

In [8]:
# Insert latest geomagnetic features
geomagnetic_fg.insert(
    kp_today_df,
    wait=True
)

# Insert corresponding weather features
weather_fg.insert(
    weather_today_df,
    wait=True
)


2025-12-30 22:01:07,284 INFO: 	17 expectation(s) included in expectation_suite.
Validation succeeded.
Validation Report saved successfully, explore a summary at https://c.app.hopsworks.ai:443/p/1289364/fs/1278019/fg/1876508


Uploading Dataframe: 100.00% |██████████| Rows 1/1 | Elapsed Time: 00:02 | Remaining Time: 00:00


Launching job: geomagnetic_daily_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1289364/jobs/named/geomagnetic_daily_1_offline_fg_materialization/executions
2025-12-30 22:01:32,766 INFO: Waiting for execution to finish. Current state: INITIALIZING. Final status: UNDEFINED
2025-12-30 22:01:35,988 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2025-12-30 22:03:00,154 INFO: Waiting for execution to finish. Current state: SUCCEEDING. Final status: UNDEFINED
2025-12-30 22:03:03,379 INFO: Waiting for execution to finish. Current state: AGGREGATING_LOGS. Final status: SUCCEEDED
2025-12-30 22:03:03,580 INFO: Waiting for log aggregation to finish.
2025-12-30 22:03:12,320 INFO: Execution finished successfully.
2025-12-30 22:03:12,580 INFO: 	3 expectation(s) included in expectation_suite.
Validation succeeded.
Validation Report saved successfully, explore a summary at https://c.app.hop

Uploading Dataframe: 100.00% |██████████| Rows 1/1 | Elapsed Time: 00:03 | Remaining Time: 00:00


Launching job: sweden_weather_daily_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1289364/jobs/named/sweden_weather_daily_1_offline_fg_materialization/executions
2025-12-30 22:03:34,605 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED
2025-12-30 22:03:41,070 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2025-12-30 22:05:02,861 INFO: Waiting for execution to finish. Current state: SUCCEEDING. Final status: SUCCEEDED
2025-12-30 22:05:03,056 INFO: Waiting for log aggregation to finish.
2025-12-30 22:05:11,819 INFO: Execution finished successfully.


(Job('sweden_weather_daily_1_offline_fg_materialization', 'SPARK'),
 {
   "success": true,
   "results": [
     {
       "success": true,
       "expectation_config": {
         "expectation_type": "expect_column_values_to_be_between",
         "kwargs": {
           "column": "precipitation_sum",
           "min_value": 0.0
         },
         "meta": {
           "expectationId": 799791
         }
       },
       "result": {
         "element_count": 1,
         "missing_count": 0,
         "missing_percent": 0.0,
         "unexpected_count": 0,
         "unexpected_percent": 0.0,
         "unexpected_percent_total": 0.0,
         "unexpected_percent_nonmissing": 0.0,
         "partial_unexpected_list": []
       },
       "meta": {
         "ingestionResult": "INGESTED",
         "validationTime": "2025-12-30T09:03:12.000579Z"
       },
       "exception_info": {
         "raised_exception": false,
         "exception_message": null,
         "exception_traceback": null
       }
 