In [None]:
Aurora Forecasting - Part 03: Training Pipeline

üóíÔ∏è This notebook is divided into the following sections:
Initialize Hopsworks connection and retrieve Feature Groups.

Create a Feature View and Training Dataset.

Train a Random Forest model to predict the Kp index from solar wind features.

Evaluate model performance.

Register the model in the Hopsworks Model Registry.

Import and setup


In [None]:
import pandas as pd
import hopsworks
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib
import os
from config import HopsworksSettings

# Setup settings
settings = HopsworksSettings()

# Login to Hopsworks
project = hopsworks.login(
    project=settings.HOPSWORKS_PROJECT,
    api_key_value=settings.HOPSWORKS_API_KEY.get_secret_value()
)
fs = project.get_feature_store()

üèóÔ∏è Step 1: Create Feature View

The Feature View acts as a metadata layer over our Feature Group, allowing us to select specific features and labels for training. We will use the solar wind parameters (bx_gsm, by_gsm, bz_gsm, density, speed) as features and the kp_index as our target label.

In [None]:
# Get the solar wind feature group
solar_wind_fg = fs.get_feature_group(name="solar_wind_fg", version=1)

# Select features and the label
query = solar_wind_fg.select_all()

# Create or retrieve the Feature View
# Note: Weather data is used for visibility logic in inference,
# while Kp is predicted solely from solar wind data.
feature_view = fs.get_or_create_feature_view(
    name="aurora_kp_view",
    version=1,
    description="Predicting the Kp index from solar wind parameters",
    labels=["kp_index"],
    query=query
)

print("Feature View created/retrieved successfully.")

üìä Step 2: Create Training Dataset

We split our historical data into training and testing sets to ensure the model generalizes well to unseen solar wind conditions.

In [None]:
# Create training and test split
# This also registers the split in Hopsworks for reproducibility
X_train, X_test, y_train, y_test = feature_view.train_test_split(
    test_size=0.2,
    description="Aurora Kp prediction training dataset"
)

print(f"Training set size: {len(X_train)}")
print(f"Test set size: {len(X_test)}")
X_train.head()

ü§ñ Step 3: Train the Model

Based on your project description, we are implementing a Random Forest Regressor. This model is well-suited for mapping the complex, non-linear relationships between solar wind plasma parameters and geomagnetic activity.

In [None]:
print("Training Random Forest Regressor...")

# Initialize and train the model
# You can tune hyperparameters like n_estimators and max_depth
rf_model = RandomForestRegressor(
    n_estimators=100,
    max_depth=10,
    random_state=42
)

rf_model.fit(X_train, y_train.values.ravel())

print("Model training complete.")

üìâ Step 4: Model Evaluation

We evaluate the model using Mean Squared Error (MSE) and R-squared to determine how accurately it predicts the geomagnetic Kp index.

In [None]:
# Make predictions on the test set
y_pred = rf_model.predict(X_test)

# Calculate metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

metrics = {
    "mse": mse,
    "r2": r2
}

print(f"Model MSE: {mse:.4f}")
print(f"Model R2 Score: {r2:.4f}")

üóÉÔ∏è Step 5: Register Model to Hopsworks

Once satisfied with the performance, we save the model artifacts and register them in the Hopsworks Model Registry so they can be retrieved by the Batch Inference pipeline.

In [None]:
# Create a local directory for model artifacts
model_dir = "aurora_model"
if not os.path.exists(model_dir):
    os.makedirs(model_dir)

# Save the model artifact
model_path = os.path.join(model_dir, "model.pkl")
joblib.dump(rf_model, model_path)

# Get the Model Registry
mr = project.get_model_registry()

# Create the model entry
aurora_model = mr.python.create_model(
    name=settings.MODEL_NAME, # "aurora_kp_rf_model" from config.py
    metrics=metrics,
    description="Random Forest Regressor for predicting Kp index based on solar wind features.",
    input_example=X_train.sample(1),
    feature_view=feature_view
)

# Upload the model to the registry
aurora_model.save(model_dir)

print(f"Model '{settings.MODEL_NAME}' version {aurora_model.version} registered successfully.")