In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [9]:
# Load the processed dataset
df = pd.read_csv("../data/processed/StormEvents_features.csv")

X = df.drop(columns=["DAMAGE_PROPERTY", "DAMAGE_CROPS"], errors="ignore")
y = df[["DAMAGE_PROPERTY", "DAMAGE_CROPS"]]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Features:", X.shape[1])
print("Targets:", y.shape[1])


Features: 40
Targets: 2


In [10]:
models = {
    "LinearRegression": MultiOutputRegressor(LinearRegression()),
    "GradientBoosting": MultiOutputRegressor(
        GradientBoostingRegressor(n_estimators=300, learning_rate=0.05, random_state=42)
    )
}

results = {}

for name, model in models.items():
    print(f"\n🔹 Training {name}...")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    metrics = {}
    for i, target in enumerate(y.columns):
        rmse = np.sqrt(mean_squared_error(y_test.iloc[:, i], y_pred[:, i]))
        mae = mean_absolute_error(y_test.iloc[:, i], y_pred[:, i])
        r2 = r2_score(y_test.iloc[:, i], y_pred[:, i])
        metrics[target] = {"RMSE": rmse, "MAE": mae, "R2": r2}
        print(f"{name} - {target}: RMSE={rmse:.2f}, MAE={mae:.2f}, R²={r2:.3f}")
    
    results[name] = metrics



🔹 Training LinearRegression...
LinearRegression - DAMAGE_PROPERTY: RMSE=170255.73, MAE=134799.00, R²=0.697
LinearRegression - DAMAGE_CROPS: RMSE=56756.99, MAE=44928.96, R²=0.697

🔹 Training GradientBoosting...
GradientBoosting - DAMAGE_PROPERTY: RMSE=17213.35, MAE=4791.03, R²=0.997
GradientBoosting - DAMAGE_CROPS: RMSE=5909.98, MAE=1646.29, R²=0.997
