In [1]:
import numpy as np
import pandas as pd

from sklearn.metrics import (
    mean_absolute_error,
    mean_squared_error,
    r2_score
)

# ---------------------------------------
# Paths (adjust if needed)
# ---------------------------------------
TRAIN_PATH = "../dataset/scaled_data/energy_efficiency_train_processed.csv"
TEST_PATH  = "../dataset/scaled_data/energy_efficiency_test_processed.csv"

# ---------------------------------------
# Load Data
# ---------------------------------------
train_df = pd.read_csv(TRAIN_PATH)
test_df  = pd.read_csv(TEST_PATH)

# ---------------------------------------
# Target Columns
# ---------------------------------------
TARGET_COLS = ["Heating Load", "Cooling Load"]

# ---------------------------------------
# Extract Targets
# ---------------------------------------
y_train = train_df[TARGET_COLS]
y_test  = test_df[TARGET_COLS]

# ---------------------------------------
# Naïve Mean Baseline Prediction
# ---------------------------------------
target_means = y_train.mean()

# Predict same constant for all test samples
y_pred = np.tile(
    target_means.values,
    (len(y_test), 1)
)

# ---------------------------------------
# MAE
# ---------------------------------------
mae = mean_absolute_error(y_test, y_pred)

# ---------------------------------------
# RMSE
# ---------------------------------------
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# ---------------------------------------
# RMSLE (log-scale, safe)
# ---------------------------------------
y_test_clipped = np.clip(y_test.values, a_min=0, a_max=None)
y_pred_clipped = np.clip(y_pred, a_min=0, a_max=None)

rmsle = np.sqrt(
    mean_squared_error(
        np.log1p(y_test_clipped),
        np.log1p(y_pred_clipped)
    )
)

# ---------------------------------------
# R² Score
# ---------------------------------------
r2 = r2_score(y_test, y_pred)

# ---------------------------------------
# Multi-Target Correlation (Pearson)
# ---------------------------------------
target_correlations = {}

for i, target in enumerate(TARGET_COLS):
    corr = np.corrcoef(
        y_test.iloc[:, i].values,
        y_pred[:, i]
    )[0, 1]
    target_correlations[target] = corr

mean_target_correlation = np.mean(list(target_correlations.values()))

# ---------------------------------------
# Print Results
# ---------------------------------------
print("Regression Metrics (Multi-Output)")
print("----------------------------------")
print(f"MAE        : {mae:.3f}")
print(f"RMSE       : {rmse:.3f}")
print(f"RMSLE      : {rmsle:.3f}")
print(f"R² Score   : {r2:.3f}")
print(f"Avg Corr   : {mean_target_correlation:.3f}")

print("\nPer-Target Correlation:")
for k, v in target_correlations.items():
    print(f"{k}: {v:.3f}")


Regression Metrics (Multi-Output)
----------------------------------
MAE        : 9.007
RMSE       : 9.956
RMSLE      : 0.426
R² Score   : -0.007
Avg Corr   : -0.000

Per-Target Correlation:
Heating Load: 0.000
Cooling Load: -0.000


In [2]:
import os
import pandas as pd

# ---------------------------------------
# Model name
# ---------------------------------------
MODEL_NAME = "Naive_Mean_Baseline"

# ---------------------------------------
# Metrics (rounded to 3 decimals)
# ---------------------------------------
new_row = {
    "Model": MODEL_NAME,
    "MAE": round(mae, 3),
    "RMSE": round(rmse, 3),
    "RMSLE": round(rmsle, 3),
    "R2": round(r2, 3),
    "Avg_Correlation": round(mean_target_correlation, 3),
    "Corr_Heating_Load": round(target_correlations["Heating Load"], 3),
    "Corr_Cooling_Load": round(target_correlations["Cooling Load"], 3),
}

# ---------------------------------------
# Output Path
# ---------------------------------------
OUTPUT_DIR = "../results"

OUTPUT_PATH = os.path.join(OUTPUT_DIR, "metrics_results.csv")

# ---------------------------------------
# Append to Existing CSV (or create once)
# ---------------------------------------
if os.path.exists(OUTPUT_PATH):
    existing_df = pd.read_csv(OUTPUT_PATH)
    updated_df = pd.concat(
        [existing_df, pd.DataFrame([new_row])],
        ignore_index=True
    )
else:
    updated_df = pd.DataFrame([new_row])

# ---------------------------------------
# Write Back to CSV
# ---------------------------------------
updated_df.to_csv(OUTPUT_PATH, index=False)

print(f"Metrics for '{MODEL_NAME}' appended to: {OUTPUT_PATH}")


Metrics for 'Naive_Mean_Baseline' appended to: ../results\metrics_results.csv


  updated_df = pd.concat(
