In [1]:
import os
import numpy as np
import pandas as pd

from sklearn.linear_model import LinearRegression
from sklearn.metrics import (
    mean_absolute_error,
    mean_squared_error,
    r2_score
)

# ---------------------------------------
# Configuration
# ---------------------------------------
TRAIN_PATH = "../dataset/scaled_data/energy_efficiency_train_processed.csv"
TEST_PATH  = "../dataset/scaled_data/energy_efficiency_test_processed.csv"
RESULTS_PATH = "../results/metrics_results.csv"

TARGET_COLS = ["Heating Load", "Cooling Load"]
MODEL_NAME = "Linear_Regression"

# ---------------------------------------
# Load Data
# ---------------------------------------
train_df = pd.read_csv(TRAIN_PATH)
test_df  = pd.read_csv(TEST_PATH)

# ---------------------------------------
# Split Features / Targets
# ---------------------------------------
X_train = train_df.drop(columns=TARGET_COLS)
y_train = train_df[TARGET_COLS]

X_test = test_df.drop(columns=TARGET_COLS)
y_test = test_df[TARGET_COLS]

# ---------------------------------------
# Train Linear Regression
# ---------------------------------------
model = LinearRegression()
model.fit(X_train, y_train)

# ---------------------------------------
# Predictions
# ---------------------------------------
y_pred = model.predict(X_test)

# ---------------------------------------
# Metrics
# ---------------------------------------
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

# RMSLE (safe)
y_test_clipped = np.clip(y_test.values, a_min=0, a_max=None)
y_pred_clipped = np.clip(y_pred, a_min=0, a_max=None)

rmsle = np.sqrt(
    mean_squared_error(
        np.log1p(y_test_clipped),
        np.log1p(y_pred_clipped)
    )
)

# ---------------------------------------
# Multi-Target Correlation
# ---------------------------------------
target_correlations = {}
for i, target in enumerate(TARGET_COLS):
    target_correlations[target] = np.corrcoef(
        y_test.iloc[:, i].values,
        y_pred[:, i]
    )[0, 1]

mean_target_correlation = np.mean(list(target_correlations.values()))

# ---------------------------------------
# Prepare Metrics Row (3-decimal precision)
# ---------------------------------------
new_row = {
    "Model": MODEL_NAME,
    "MAE": round(mae, 3),
    "RMSE": round(rmse, 3),
    "RMSLE": round(rmsle, 3),
    "R2": round(r2, 3),
    "Avg_Correlation": round(mean_target_correlation, 3),
    "Corr_Heating_Load": round(target_correlations["Heating Load"], 3),
    "Corr_Cooling_Load": round(target_correlations["Cooling Load"], 3),
}

# ---------------------------------------
# Append Metrics to CSV
# ---------------------------------------
os.makedirs(os.path.dirname(RESULTS_PATH), exist_ok=True)

pd.DataFrame([new_row]).to_csv(
    RESULTS_PATH,
    mode="a",
    header=not os.path.exists(RESULTS_PATH),
    index=False
)

# ---------------------------------------
# Console Output
# ---------------------------------------
print("Linear Regression Training Completed")
print("------------------------------------")
for k, v in new_row.items():
    print(f"{k}: {v}")


Linear Regression Training Completed
------------------------------------
Model: Linear_Regression
MAE: 2.189
RMSE: 3.086
RMSLE: 0.115
R2: 0.903
Avg_Correlation: 0.951
Corr_Heating_Load: 0.955
Corr_Cooling_Load: 0.946


In [2]:
import pickle

# ---------------------------------------
# Model Save Path
# ---------------------------------------
MODEL_DIR = "../models"
os.makedirs(MODEL_DIR, exist_ok=True)

MODEL_PATH = os.path.join(MODEL_DIR, "regression_model.pkl")

# ---------------------------------------
# Save Model
# ---------------------------------------
with open(MODEL_PATH, "wb") as f:
    pickle.dump(model, f)

print(f"Model successfully saved to: {MODEL_PATH}")

Model successfully saved to: ../models\regression_model.pkl
