In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.neural_network import MLPRegressor

from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

data = pd.read_csv('heat_transfer_multilayer_dataset.csv')
target = "Q"
features = None
TEST_SIZE = 0.2
RANDOM_STATE = 42
save_pred_csv = True

df = pd.read_csv('heat_transfer_multilayer_dataset.csv')
print("Data Shape: ", df.shape)
print("Columns: ", df.columns.tolist())

if target not in df.columns:
  raise ValueError(f"Target columns '{target}' is not found in the dataset.")


if features is None:
  numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist()
  feature_columns = [c for c in numeric_columns if c != target]
  print("Auto Selected numeric feature columns:", feature_columns)

else:
  feature_columns = features
  missing = [c for c in feature_columns if c != target]
  if missing:
    raise ValueError(f"Missing feature columns: {missing}")

X = df[feature_columns].copy()
y = df[target].copy()

X = X.fillna(X.mean())

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=TEST_SIZE, random_state=RANDOM_STATE)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

models ={
    "Linear Regression": LinearRegression(), # Changed from LogisticRegression
    "Decision Tree": DecisionTreeRegressor(random_state=RANDOM_STATE),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=RANDOM_STATE),
    "XGBoost": XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=RANDOM_STATE, verbosity=0),
    "Neural Network (MLP)": MLPRegressor(random_state=RANDOM_STATE, max_iter=500)}
results = []
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mse)
    results.append({"model": name, "MSE": mse, "MAE": mae, "R²": r2, "RMSE": rmse})
    print(f"{name} -> MSE: {mse:.4f}, MAE: {mae:.4f}, R²: {r2:.4f}, RMSE: {rmse:.4f}")


results_df=pd.DataFrame(results).set_index("model")
print("\n---Summary---")
print(results_df.sort_values("RMSE"))

results_df.to_csv("models_performance_summary.csv")
print("\nSaved Performance Summary to models_performance_summary.csv")

Data Shape:  (1000, 17)
Columns:  ['delta_T', 'Mat1', 'L1', 'k1', 'Mat2', 'L2', 'k2', 'Mat3', 'L3', 'k3', 'Mat4', 'L4', 'k4', 'Mat5', 'L5', 'k5', 'Q']
Auto Selected numeric feature columns: ['delta_T', 'L1', 'k1', 'L2', 'k2', 'L3', 'k3', 'L4', 'k4', 'L5', 'k5']
Linear Regression -> MSE: 1227420.8949, MAE: 562.1715, R²: 0.1963, RMSE: 1107.8903
Decision Tree -> MSE: 465909.5189, MAE: 163.1172, R²: 0.6949, RMSE: 682.5757
Random Forest -> MSE: 259879.1074, MAE: 142.2110, R²: 0.8298, RMSE: 509.7834
XGBoost -> MSE: 232190.4839, MAE: 124.1431, R²: 0.8480, RMSE: 481.8615
Neural Network (MLP) -> MSE: 1167751.7062, MAE: 480.0311, R²: 0.2354, RMSE: 1080.6256

---Summary---
                               MSE         MAE        R²         RMSE
model                                                                
XGBoost               2.321905e+05  124.143094  0.847969   481.861478
Random Forest         2.598791e+05  142.211007  0.829839   509.783393
Decision Tree         4.659095e+05  163.117250  0

