In [2]:
import numpy as np
import pandas as pd

# Load Data (replace this with actual CSV file or DataFrame)
data = pd.read_csv('merged_properties.csv')  # Replace 'your_file.csv' with your actual file path
# Features and targets
# X = data.drop(columns=['c44', 'e15', 'q15', 'μ11', 'ϵ11', 'α11', 'ρ']).values
X = data[['vf', 'c55e', 'e15e', 'q15e', 'ϵ11e', 'μ11e', 'α11e', 'ρe', 'c55f', 'e15f', 'q15f', 'ϵ11f', 'μ11f', 'α11f', 'ρf']].values
y = data[['c44', 'e15', 'q15', 'μ11', 'ϵ11', 'α11', 'ρ']].values


In [None]:
from sklearn.preprocessing import RobustScaler, PowerTransformer, PolynomialFeatures

# Feature scaling
feature_robust_scaler = RobustScaler()
X_robust_scaled = feature_robust_scaler.fit_transform(X)

feature_power_scaler = PowerTransformer(method='yeo-johnson')
X_power_scaled = feature_power_scaler.fit_transform(X)

poly = PolynomialFeatures(degree=2, include_bias=False)
feature_poly_robust_scaler = RobustScaler()
X_poly = poly.fit_transform(X)
X_poly_robust_scaled = feature_poly_robust_scaler.fit_transform(X_poly)


# Target Scaling
target_robust_scaler = RobustScaler()
y_robust = target_robust_scaler.fit_transform(y)

target_power_scaler = PowerTransformer(method='yeo-johnson')
y_power_scaled = target_power_scaler.fit_transform(y)

target_multi_robust_scalers = [RobustScaler() for _ in range(y.shape[1])]
y_multi_robust_scaled = np.zeros_like(y)
for i in range(y.shape[1]):
    y_multi_robust_scaled[:, i] = target_multi_robust_scalers[i].fit_transform(y[:, i].reshape(-1, 1)).ravel()

target_multi_power_scaler = {i: PowerTransformer(method='yeo-johnson') for i in range(y.shape[1])}
y_multi_power_scaled = np.zeros_like(y)
for i in range(y.shape[1]):
    y_multi_power_scaled[:, i] = target_multi_power_scaler[i].fit_transform(y[:, i].reshape(-1, 1)).ravel()


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler, PowerTransformer, PolynomialFeatures

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling for training and testing sets
feature_robust_scaler = RobustScaler()
X_train_robust_scaled = feature_robust_scaler.fit_transform(X_train)
X_test_robust_scaled = feature_robust_scaler.transform(X_test)

feature_power_scaler = PowerTransformer(method='yeo-johnson')
X_train_power_scaled = feature_power_scaler.fit_transform(X_train)
X_test_power_scaled = feature_power_scaler.transform(X_test)

poly = PolynomialFeatures(degree=2, include_bias=False)
feature_poly_robust_scaler = RobustScaler()
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
X_train_poly_robust_scaled = feature_poly_robust_scaler.fit_transform(X_train_poly)
X_test_poly_robust_scaled = feature_poly_robust_scaler.transform(X_test_poly)

# Target scaling for training and testing sets
target_robust_scaler = RobustScaler()
y_train_robust_scaled = target_robust_scaler.fit_transform(y_train)
y_test_robust_scaled = target_robust_scaler.transform(y_test)

target_power_scaler = PowerTransformer(method='yeo-johnson')
y_train_power_scaled = target_power_scaler.fit_transform(y_train)
y_test_power_scaled = target_power_scaler.transform(y_test)

target_multi_robust_scalers = [RobustScaler() for _ in range(y.shape[1])]
y_train_multi_robust_scaled = np.zeros_like(y_train)
y_test_multi_robust_scaled = np.zeros_like(y_test)
for i in range(y.shape[1]):
    y_train_multi_robust_scaled[:, i] = target_multi_robust_scalers[i].fit_transform(y_train[:, i].reshape(-1, 1)).ravel()
    y_test_multi_robust_scaled[:, i] = target_multi_robust_scalers[i].transform(y_test[:, i].reshape(-1, 1)).ravel()

target_multi_power_scaler = {i: PowerTransformer(method='yeo-johnson') for i in range(y.shape[1])}
y_train_multi_power_scaled = np.zeros_like(y_train)
y_test_multi_power_scaled = np.zeros_like(y_test)
for i in range(y.shape[1]):
    y_train_multi_power_scaled[:, i] = target_multi_power_scaler[i].fit_transform(y_train[:, i].reshape(-1, 1)).ravel()
    y_test_multi_power_scaled[:, i] = target_multi_power_scaler[i].transform(y_test[:, i].reshape(-1, 1)).ravel()

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

def model_evaluation(y_true, y_pred, model_name=None, debug=False):
    """
    Evaluate the model using MAE, MSE, and R² metrics.
    
    Parameters:
    - y_true: Actual target values
    - y_pred: Predicted target values
    - model_name: Name of the model (optional)
    Returns:
    - mae: Mean Absolute Error
    - mse: Mean Squared Error
    - r2: R² Score
    """

    # Calculate evaluation metrics for each target variable
    mae_scores = []
    mse_scores = []
    r2_scores = []

    if debug:
        for i in range(y_true.shape[1]):
            mae = mean_absolute_error(y_true[:, i], y_pred[:, i])
            mse = mean_squared_error(y_true[:, i], y_pred[:, i])
            r2 = r2_score(y_true[:, i], y_pred[:, i])

            mae_scores.append(mae)
            mse_scores.append(mse)
            r2_scores.append(r2)

            print(f"Target {i+1}:")
            print(f"  MAE: {mae}")
            print(f"  MSE: {mse}")
            print(f"  R²: {r2}")

    # Calculate average scores across all target variables
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    print(f"{model_name} Performance:")
    print(f"MAE: {mae:.4f}, MSE: {mse:.4f}, R²: {r2:.4f}")
    print("-" * 50)
    return {
        "Model": model_name, "MAE": mae, "MSE": mse, "R2": r2
    }

def print_predictions(y_pred, y, num=0):
    np.set_printoptions(linewidth=np.inf)  # Set the print options to avoid line breaks
    for i in range(num):
        print(f"Predicted: {y_pred[i]}")
        print(f"Actual:    {y[i]}")
        print("-" * 50)

In [27]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.multioutput import MultiOutputRegressor

# Linear Regression
linear_model = MultiOutputRegressor(LinearRegression())
linear_model.fit(X_robust_scaled, y_multi_power_scaled)
y_pred_linear = linear_model.predict(X_robust_scaled)
y_pred_linear_inverse = np.zeros_like(y_pred_linear)
for i in range(y.shape[1]):
    y_pred_linear_inverse[:, i] = target_multi_power_scaler[i].inverse_transform(y_pred_linear[:, i].reshape(-1, 1)).ravel()
print_predictions(y, y_pred_linear_inverse)
result_linear = pd.DataFrame(model_evaluation(y, y_pred_linear_inverse, model_name='Linear Regression'))

# Decision Tree Regressor
decision_tree_model = MultiOutputRegressor(DecisionTreeRegressor(random_state=42))
decision_tree_model.fit(X_robust_scaled, y_multi_power_scaled)
y_pred_tree = decision_tree_model.predict(X_robust_scaled)
y_pred_tree_inverse = np.zeros_like(y_pred_tree)
for i in range(y.shape[1]):
    y_pred_tree_inverse[:, i] = target_multi_power_scaler[i].inverse_transform(y_pred_tree[:, i].reshape(-1, 1)).ravel()
print_predictions(y, y_pred_tree_inverse)
result_tree = pd.DataFrame(model_evaluation(y, y_pred_tree_inverse, model_name='Decision Tree Regressor'))

# Random Forest Regressor
random_forest_model = MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=42))
random_forest_model.fit(X_robust_scaled, y_multi_power_scaled)
y_pred_rf = random_forest_model.predict(X_robust_scaled)
y_pred_rf_inverse = np.zeros_like(y_pred_rf)
for i in range(y.shape[1]):
    y_pred_rf_inverse[:, i] = target_multi_power_scaler[i].inverse_transform(y_pred_rf[:, i].reshape(-1, 1)).ravel()
print_predictions(y, y_pred_rf_inverse)
result_rf = pd.DataFrame(model_evaluation(y, y_pred_rf_inverse, model_name='Random Forest Regressor'))

# Gradient Boosting Regressor
gradient_boosting_model = MultiOutputRegressor(GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42))
gradient_boosting_model.fit(X_robust_scaled, y_multi_power_scaled)
y_pred_gb = gradient_boosting_model.predict(X_robust_scaled)
y_pred_gb_inverse = np.zeros_like(y_pred_gb)
for i in range(y.shape[1]):
    y_pred_gb_inverse[:, i] = target_multi_power_scaler[i].inverse_transform(y_pred_gb[:, i].reshape(-1, 1)).ravel()
print_predictions(y, y_pred_gb_inverse)
result_gb = pd.DataFrame(model_evaluation(y, y_pred_gb_inverse, model_name='Gradient Boosting Regressor'))


Average Scores:
  Model: Linear Regression
  Average MAE: 9462221827.06768
  Average MSE: 1.0566172586499425e+22
  Average R²: 0.25785807270131944
--------------------------------------------------
--------------------------------------------------

Average Scores:
  Model: Decision Tree Regressor
  Average MAE: 336403592.679377
  Average MSE: 2.2266554471050408e+19
  Average R²: 0.9912597305466812
--------------------------------------------------
--------------------------------------------------

Average Scores:
  Model: Random Forest Regressor
  Average MAE: 1793554839.0025222
  Average MSE: 2.2505204451944689e+21
  Average R²: 0.9276987351377376
--------------------------------------------------
--------------------------------------------------

Average Scores:
  Model: Gradient Boosting Regressor
  Average MAE: 502529528.06820476
  Average MSE: 1.7807977793253884e+19
  Average R²: 0.9929411331266785
--------------------------------------------------
----------------------------

In [None]:
from sklearn.linear_model import LinearRegression, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.neural_network import MLPRegressor
from catboost import CatBoostRegressor
import pandas as pd
import numpy as np

# Function to evaluate model performance

# Linear Regression
linear_model = MultiOutputRegressor(LinearRegression())
linear_model.fit(X_train_robust_scaled, y_train_multi_power_scaled)
y_pred_linear = linear_model.predict(X_test_robust_scaled)
y_pred_linear_inverse = np.zeros_like(y_pred_linear)
for i in range(y_test.shape[1]):
    y_pred_linear_inverse[:, i] = target_multi_power_scaler[i].inverse_transform(y_pred_linear[:, i].reshape(-1, 1)).ravel()
model_evaluation(y_test, y_pred_linear_inverse, "Linear Regression")

# Decision Tree Regressor
decision_tree_model = MultiOutputRegressor(DecisionTreeRegressor(random_state=42))
decision_tree_model.fit(X_train_robust_scaled, y_train_multi_power_scaled)
y_pred_tree = decision_tree_model.predict(X_test_robust_scaled)
y_pred_tree_inverse = np.zeros_like(y_pred_tree)
for i in range(y_test.shape[1]):
    y_pred_tree_inverse[:, i] = target_multi_power_scaler[i].inverse_transform(y_pred_tree[:, i].reshape(-1, 1)).ravel()
model_evaluation(y_test, y_pred_tree_inverse, "Decision Tree Regressor")

# Random Forest Regressor
random_forest_model = MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=42))
random_forest_model.fit(X_train_robust_scaled, y_train_multi_power_scaled)
y_pred_rf = random_forest_model.predict(X_test_robust_scaled)
y_pred_rf_inverse = np.zeros_like(y_pred_rf)
for i in range(y_test.shape[1]):
    y_pred_rf_inverse[:, i] = target_multi_power_scaler[i].inverse_transform(y_pred_rf[:, i].reshape(-1, 1)).ravel()
model_evaluation(y_test, y_pred_rf_inverse, "Random Forest Regressor")

# Gradient Boosting Regressor
gradient_boosting_model = MultiOutputRegressor(GradientBoostingRegressor(n_estimators=500, learning_rate=0.1, max_depth=4, random_state=42))
gradient_boosting_model.fit(X_train_robust_scaled, y_train_multi_power_scaled)
y_pred_gb = gradient_boosting_model.predict(X_test_robust_scaled)
y_pred_gb_inverse = np.zeros_like(y_pred_gb)
for i in range(y_test.shape[1]):
    y_pred_gb_inverse[:, i] = target_multi_power_scaler[i].inverse_transform(y_pred_gb[:, i].reshape(-1, 1)).ravel()
model_evaluation(y_test, y_pred_gb_inverse, "Gradient Boosting Regressor")




Average Scores:
  Model: Linear Regression
  Average MAE: 9299407598.948652
  Average MSE: 7.217289587774969e+21
  Average R²: 0.26545276520420286
--------------------------------------------------
--------------------------------------------------

Average Scores:
  Model: Decision Tree Regressor
  Average MAE: 4227153307.7195926
  Average MSE: 1.0916194733214086e+22
  Average R²: 0.4594239423832113
--------------------------------------------------
--------------------------------------------------

Average Scores:
  Model: Random Forest Regressor
  Average MAE: 2383434685.3320704
  Average MSE: 2.3522872233063639e+21
  Average R²: 0.768215331241982
--------------------------------------------------
--------------------------------------------------

Average Scores:
  Model: Gradient Boosting Regressor
  Average MAE: 2480369773.3216805
  Average MSE: 1.8087715797271097e+21
  Average R²: 0.8400063846900897
--------------------------------------------------
---------------------------

{'Model': ['Gradient Boosting Regressor'],
 'MAE': [2480369773.3216805],
 'MSE': [1.8087715797271097e+21],
 'R²': [0.8400063846900897]}