In [None]:
Aurora Forecasting - Part 01: Feature Backfill

üóíÔ∏è This notebook is divided into the following sections:
Initialize Hopsworks connection.

Fetch historical Solar Wind & Kp index data using spacepy (OMNI dataset).

Fetch historical Cloud Cover for Stockholm, Lule√•, and Kiruna using Open-Meteo.

Create and Insert data into Feature Groups in the Hopsworks Feature Store.

üìù Imports

In [None]:
import pandas as pd
import datetime
from spacepy import omindata as omni
import hopsworks
from config import HopsworksSettings
import util

# Setup settings
settings = HopsworksSettings()

# Login to Hopsworks
project = hopsworks.login(
    project=settings.HOPSWORKS_PROJECT,
    api_key_value=settings.HOPSWORKS_API_KEY.get_secret_value()
)
fs = project.get_feature_store()

üõ∞Ô∏è Step 1: Historical Solar Wind & Kp Data (The Label)

We use the OMNI dataset via spacepy to get high-resolution historical satellite data. This includes the magnetic field components and the proton parameters needed to predict the Kp index.

In [None]:
# Define time range for backfill (Last 2 years)
end_date = datetime.datetime.now()
start_date = end_date - datetime.timedelta(days=730)

print(f"Fetching OMNI data from {start_date.date()} to {end_date.date()}...")

# Fetch OMNI data (Hourly resolution)
# This includes Bz (Interplanetary Magnetic Field) and V (Solar wind speed)
data = omni.get_omni(start_date, end_date)

# Create DataFrame
solar_wind_df = pd.DataFrame({
    'time_tag': data['UTC'],
    'bx_gsm': data['Bx'],
    'by_gsm': data['By'],
    'bz_gsm': data['Bz'],
    'density': data['n'],
    'speed': data['v'],
    'kp_index': data['Kp']
})

# Data Cleaning: OMNI uses 99.9 or 999.9 as fill values for missing data
solar_wind_df = solar_wind_df[solar_wind_df['kp_index'] < 10] # Kp is 0-9
solar_wind_df = solar_wind_df.dropna()

# Cast time to string/ISO format for Hopsworks compatibility
solar_wind_df['time_tag'] = pd.to_datetime(solar_wind_df['time_tag']).dt.strftime('%Y-%m-%d %H:%M:%S')

print(f"Successfully fetched {len(solar_wind_df)} historical records.")
solar_wind_df.head()

‚òÅÔ∏è Step 2: Historical City Weather (The Visibility Constraint)

We fetch historical cloud cover for our three target cities. In the final system, the Aurora is only "Visible" if the cloud cover is low.

In [None]:
weather_backfill_list = []

for city, coords in settings.CITIES.items():
    print(f"Fetching historical cloud cover for {city}...")

    # We use the historical weather function from util.py
    # Modified to fetch 'cloud_cover' specifically
    df_city = util.get_historical_weather(
        city=city,
        start_date=start_date.strftime("%Y-%m-%d"),
        end_date=end_date.strftime("%Y-%m-%d"),
        latitude=coords['lat'],
        longitude=coords['lon']
    )

    # Standardize columns
    df_city['city'] = city

    # Ensure cloud_cover is present
    if 'cloud_cover' not in df_city.columns:
        # Fallback if your util function uses different naming like cloud_cover_mean
        df_city = df_city.rename(columns={'cloud_cover_mean': 'cloud_cover'})

    weather_backfill_list.append(df_city[['city', 'date', 'cloud_cover']])

weather_df = pd.concat(weather_backfill_list)
weather_df['date'] = pd.to_datetime(weather_df['date']).dt.strftime('%Y-%m-%d')
weather_df.head()

üóÑÔ∏è Step 3: Create Feature Groups and Insert Data

Now we register these datasets in the Hopsworks Feature Store.

In [None]:
# 1. Create Solar Wind Feature Group
solar_wind_fg = fs.get_or_create_feature_group(
    name="solar_wind_fg",
    version=1,
    primary_key=['time_tag'],
    description="Satellite measurements (Bz, speed, density) and Kp index labels",
    online_enabled=True,
    statistics_config={"enabled": True, "histograms": True, "correlations": True}
)

# 2. Create City Weather Feature Group
city_weather_fg = fs.get_or_create_feature_group(
    name="city_weather_fg",
    version=1,
    primary_key=['city', 'date'],
    description="Historical cloud cover for Stockholm, Lule√•, and Kiruna",
    online_enabled=True,
    statistics_config={"enabled": True, "histograms": True}
)

# Insert Data
solar_wind_fg.insert(solar_wind_df)
city_weather_fg.insert(weather_df)

print("Backfill Complete! Data is now in the Hopsworks Feature Store.")