In [1]:
import sys
from pathlib import Path
import warnings
warnings.filterwarnings("ignore", module="IPython")

def is_google_colab() -> bool:
    if "google.colab" in str(get_ipython()):
        return True
    return False

def clone_repository() -> None:
    !git clone https://github.com/featurestorebook/mlfs-book.git
    %cd mlfs-book

def install_dependencies() -> None:
    !pip install --upgrade uv
    !uv pip install --all-extras --system --requirement pyproject.toml

if is_google_colab():
    clone_repository()
    install_dependencies()
    root_dir = str(Path().absolute())
    print("Google Colab environment")
else:
    root_dir = Path().absolute()
    # Strip ~/notebooks/ccfraud from PYTHON_PATH if notebook started in one of these subdirectories
    if root_dir.parts[-1:] == ('aurora',):
        root_dir = Path(*root_dir.parts[:-1])
    if root_dir.parts[-1:] == ('notebooks',):
        root_dir = Path(*root_dir.parts[:-1])
    root_dir = str(root_dir) 
    print("Local environment")

print(f"Root dir: {root_dir}")

# Add the root directory to the `PYTHONPATH` 
if root_dir not in sys.path:
    sys.path.append(root_dir)
    print(f"Added the following directory to the PYTHONPATH: {root_dir}")

# Set the environment variables from the file <root_dir>/.env
from mlfs import config
settings = config.HopsworksSettings(_env_file=f"{root_dir}/.env")

Local environment
Root dir: /Users/appbites/Desktop/id2223-project
Added the following directory to the PYTHONPATH: /Users/appbites/Desktop/id2223-project
HopsworksSettings initialized!


In [2]:
import datetime
import time
import requests
import pandas as pd
import hopsworks
from mlfs.aurora import util
from mlfs import config
import json
import os
import warnings
warnings.filterwarnings("ignore")

In [3]:
project = hopsworks.login(engine="python")
fs = project.get_feature_store()

today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)


2026-01-05 20:25:13,624 INFO: Initializing external client
2026-01-05 20:25:13,625 INFO: Base URL: https://c.app.hopsworks.ai:443






2026-01-05 20:25:15,407 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1289364


## Get references to the feature groups

In [4]:
# Retrieve feature groups
geomagnetic_fg = fs.get_feature_group(
    name="geomagnetic_daily_final",
    version=1,
)

weather_fg = fs.get_feature_group(
    name="sweden_weather_daily_final",
    version=1,
)

## Retrieve latest geomagnetic data

In [5]:
# Fetch latest complete geomagnetic day from GFZ nowcast
kp_today_df = util.get_latest_complete_kp_from_nowcast()

kp_today_df


Unnamed: 0,date,kp1,kp2,kp3,kp4,kp5,kp6,kp7,kp8,ap1,ap2,ap3,ap4,ap5,ap6,ap7,ap8,ap
23,2025-12-30,1.333,1.333,1.667,1.333,2.667,2.667,1.667,1.0,5.0,5.0,6.0,5.0,12.0,12.0,6.0,4.0,7.0
24,2025-12-31,2.0,1.333,2.0,1.667,3.333,1.0,1.0,1.333,7.0,5.0,7.0,6.0,18.0,4.0,4.0,5.0,7.0
25,2026-01-01,2.0,3.0,2.667,2.333,3.333,2.333,2.333,2.667,7.0,15.0,12.0,9.0,18.0,9.0,9.0,12.0,11.0
26,2026-01-02,3.0,4.0,3.333,2.333,3.667,3.333,5.0,4.333,15.0,27.0,18.0,9.0,22.0,18.0,48.0,32.0,24.0
27,2026-01-03,3.0,3.0,2.0,2.0,3.0,2.667,2.333,1.0,15.0,15.0,7.0,7.0,15.0,12.0,9.0,4.0,10.0
28,2026-01-04,0.333,0.0,0.0,0.667,0.333,1.0,1.667,3.0,2.0,0.0,0.0,3.0,2.0,4.0,6.0,15.0,4.0


In [6]:
kp_today_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 23 to 28
Data columns (total 18 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    6 non-null      datetime64[ns]
 1   kp1     6 non-null      float32       
 2   kp2     6 non-null      float32       
 3   kp3     6 non-null      float32       
 4   kp4     6 non-null      float32       
 5   kp5     6 non-null      float32       
 6   kp6     6 non-null      float32       
 7   kp7     6 non-null      float32       
 8   kp8     6 non-null      float32       
 9   ap1     6 non-null      float32       
 10  ap2     6 non-null      float32       
 11  ap3     6 non-null      float32       
 12  ap4     6 non-null      float32       
 13  ap5     6 non-null      float32       
 14  ap6     6 non-null      float32       
 15  ap7     6 non-null      float32       
 16  ap8     6 non-null      float32       
 17  ap      6 non-null      float32       
dtypes: datetime64[ns]

In [11]:
# latest_date = kp_today_df["date"].iloc[0].date()
# print('latest', latest_date)

kp_today_df["date"] = pd.to_datetime(kp_today_df["date"])
latest_date = kp_today_df["date"].max()
latest_date_str = pd.to_datetime(latest_date).strftime("%Y-%m-%d")



latitude = 62.0
longitude = 15.0

weather_today_df = util.get_historical_weather_sweden(
    start_date=latest_date_str,
    end_date=latest_date_str,
    latitude=latitude,
    longitude=longitude,
)

weather_today_df


Unnamed: 0,date,cloud_cover_mean,precipitation_sum,sunshine_duration
0,2026-01-04,89.75,0.6,585.581116


## Insert the data to the feature store

In [12]:
# Insert latest geomagnetic features
geomagnetic_fg.insert(
    kp_today_df,
    wait=True
)

# Insert corresponding weather features
weather_fg.insert(
    weather_today_df,
    wait=True
)


2026-01-05 20:31:30,292 INFO: 	17 expectation(s) included in expectation_suite.
Validation succeeded.
Validation Report saved successfully, explore a summary at https://c.app.hopsworks.ai:443/p/1289364/fs/1278019/fg/1893812


Uploading Dataframe: 100.00% |██████████| Rows 6/6 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: geomagnetic_daily_final_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1289364/jobs/named/geomagnetic_daily_final_1_offline_fg_materialization/executions
2026-01-05 20:32:20,978 INFO: Waiting for execution to finish. Current state: INITIALIZING. Final status: UNDEFINED
2026-01-05 20:32:24,193 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED
2026-01-05 20:32:27,382 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2026-01-05 20:34:48,816 INFO: Waiting for log aggregation to finish.
2026-01-05 20:35:14,419 INFO: Execution finished successfully.
2026-01-05 20:35:14,668 INFO: 	3 expectation(s) included in expectation_suite.
Validation succeeded.
Validation Report saved successfully, explore a summary at https://c.app.hopsworks.ai:443/p/1289364/fs/1278019/fg/1893813


Uploading Dataframe: 100.00% |██████████| Rows 1/1 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: sweden_weather_daily_final_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1289364/jobs/named/sweden_weather_daily_final_1_offline_fg_materialization/executions
2026-01-05 20:35:31,654 INFO: Waiting for execution to finish. Current state: INITIALIZING. Final status: UNDEFINED
2026-01-05 20:35:34,871 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED
2026-01-05 20:35:41,295 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2026-01-05 20:37:20,522 INFO: Waiting for execution to finish. Current state: AGGREGATING_LOGS. Final status: SUCCEEDED
2026-01-05 20:37:20,704 INFO: Waiting for log aggregation to finish.
2026-01-05 20:37:43,008 INFO: Execution finished successfully.


(Job('sweden_weather_daily_final_1_offline_fg_materialization', 'SPARK'),
 {
   "success": true,
   "results": [
     {
       "success": true,
       "expectation_config": {
         "expectation_type": "expect_column_values_to_be_between",
         "kwargs": {
           "column": "sunshine_duration",
           "min_value": 0.0
         },
         "meta": {
           "expectationId": 804953
         }
       },
       "result": {
         "element_count": 1,
         "missing_count": 0,
         "missing_percent": 0.0,
         "unexpected_count": 0,
         "unexpected_percent": 0.0,
         "unexpected_percent_total": 0.0,
         "unexpected_percent_nonmissing": 0.0,
         "partial_unexpected_list": []
       },
       "meta": {
         "ingestionResult": "INGESTED",
         "validationTime": "2026-01-05T07:35:14.000667Z"
       },
       "exception_info": {
         "raised_exception": false,
         "exception_message": null,
         "exception_traceback": null
    