Aurora Forecasting - Part 04: Actionable Batch Inference

In [None]:
üóíÔ∏è This notebook is divided into the following sections:
Load the trained Kp prediction model from the Hopsworks Model Registry.

Retrieve the most recent solar wind features.

Predict the current Kp index.

Retrieve the latest cloud cover for Stockholm, Lule√•, and Kiruna.

Apply city-specific logic to determine if an Aurora is truly visible.

In [None]:
üìù Imports and Setup

In [None]:
import hopsworks
import joblib
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from config import HopsworksSettings
import util
import os

settings = HopsworksSettings()
project = hopsworks.login(
    project=settings.HOPSWORKS_PROJECT,
    api_key_value=settings.HOPSWORKS_API_KEY.get_secret_value()
)
fs = project.get_feature_store()
mr = project.get_model_registry()

üìÇ Step 1: Load the Registered Model

We retrieve the latest version of our Random Forest model from the registry to make our geomagnetic predictions.

In [None]:
model_obj = mr.get_model(name=settings.MODEL_NAME, version=settings.MODEL_VERSION)
model_dir = model_obj.download()
model = joblib.load(os.path.join(model_dir, "model.pkl"))
print(f"Successfully loaded {settings.MODEL_NAME} version {model_obj.version}")

üõ∞Ô∏è Step 2: Retrieve Latest Features and Predict Kp

We fetch the most recent minute-by-minute data from NOAA. In a production environment, you would typically pull the latest data from the Online Feature Store.

In [None]:
# We pull the most recent hours to show a trend on our dashboard
solar_wind_fg = fs.get_feature_group(name="solar_wind_fg", version=1)
# Pulling last 24 records to generate a trend graph
recent_data = solar_wind_fg.read().sort_values(by='time').tail(24)

In [None]:
# Prepare features for the model (ensuring they match the training columns)
# Drop 'time' and the target 'kp_index' to isolate features
features_df = recent_data.drop(columns=['time', 'kp_index'], errors='ignore')

In [None]:
# 4. Predict Kp Index for the whole window
recent_data['predicted_kp'] = model.predict(features_df)
latest_prediction = recent_data.iloc[-1]
predicted_kp = latest_prediction['predicted_kp']

print(f"\n>>> Current Predicted Global Kp Index: {predicted_kp:.2f}")

In [None]:
# 5. Local Visibility and Actionable Decisions
results = []
current_time = datetime.datetime.now()

for city, city_conf in settings.CITIES.items():
    cloud_cover = util.get_city_weather_forecast(city_conf['lat'], city_conf['lon'])
    status = util.aurora_visibility_logic(
        pred_kp=predicted_kp,
        kp_threshold=city_conf['kp_threshold'],
        cloud_cover=cloud_cover
    )
    
    results.append({
        "city": city,
        "time": current_time,
        "predicted_kp": round(predicted_kp, 2),
        "cloud_cover": cloud_cover,
        "status": status
    })

inference_df = pd.DataFrame(results)
print("\n--- Current Visibility Status ---")
print(inference_df[['city', 'predicted_kp', 'cloud_cover', 'status']])

In [None]:
# 6. Update Monitoring Feature Group
monitor_fg = fs.get_or_create_feature_group(
    name="aurora_monitoring_fg",
    version=1,
    primary_key=['city', 'time'],
    event_time="time",
    description="Actual vs Predicted visibility signals for monitoring"
)
monitor_fg.insert(inference_df)

Dashboard


In [None]:
# 7. VISUAL DASHBOARD GENERATION
# We pull the history of predictions to show the timeline
history_df = monitor_fg.read().sort_values(by='time')

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10), sharex=True)

# Plot 1: Kp Index Trend
ax1.plot(recent_data['time'], recent_data['predicted_kp'], color='blue', linewidth=2, label='Predicted Kp Trend')
ax1.axhline(y=predicted_kp, color='red', linestyle='--', alpha=0.5, label='Current Level')
ax1.set_ylabel('Kp Index (Intensity)', fontsize=12)
ax1.set_ylim(0, 9)
ax1.set_title('Global Geomagnetic Activity Trend (Last 24h)', fontsize=14)
ax1.legend(loc='upper left')
ax1.grid(True, alpha=0.3)

# Plot 2: City Visibility Timeline (The "Go" signal)
colors = {'Kiruna': 'forestgreen', 'Lule√•': 'orange', 'Stockholm': 'purple'}
for i, city in enumerate(settings.CITIES.keys()):
    city_hist = history_df[history_df['city'] == city].tail(24).copy()
    # Map "GO" to 1, others to 0 for plotting
    city_hist['go_val'] = city_hist['status'].apply(lambda x: 1 if x == "GO" else 0)
    
    # Offset the city lines slightly so they are all visible
    ax2.step(city_hist['time'], city_hist['go_val'] + (i * 0.05), label=f'{city} Visibility', color=colors[city], where='post')

ax2.set_ylabel('Visibility (GO = High Probability)', fontsize=12)
ax2.set_yticks([0, 1])
ax2.set_yticklabels(['No Activity', 'GO Signal'])
ax2.set_title('Actionable Visibility per City', fontsize=14)
ax2.legend(loc='upper left')
ax2.grid(True, alpha=0.2)

plt.xlabel('Time (UTC)', fontsize=12)
ax2.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d %H:%M'))
fig.autofmt_xdate()

# Save the dashboard
dashboard_path = "aurora_dashboard.png"
plt.savefig(dashboard_path, bbox_inches='tight')
print(f"\nDashboard saved as {dashboard_path}")
plt.show()

# 8. Upload to Hopsworks Resources
dataset_api = project.get_dataset_api()
if not dataset_api.exists("Resources/aurora"):
    dataset_api.mkdir("Resources/aurora")

# Upload the latest dashboard for public viewing
dataset_api.upload(dashboard_path, "Resources/aurora/latest_dashboard.png", overwrite=True)
# Upload a timestamped version for history
timestamp = current_time.strftime('%Y%m%d_%H%M')
dataset_api.upload(dashboard_path, f"Resources/aurora/history/dashboard_{timestamp}.png", overwrite=True)

print(f"Dashboard uploaded to Hopsworks: Resources/aurora/latest_dashboard.png")

# Save the dashboard into the docs folder for GitHub Pages
dashboard_path = "docs/aurora/assets/img/latest_dashboard.png"
plt.savefig(dashboard_path, bbox_inches='tight')