**Gradient Boosting**

In [27]:
model_results = []

In [28]:
from sklearn.preprocessing import RobustScaler, PowerTransformer
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.pipeline import Pipeline
import numpy as np
import pandas as pd

# Load Data (replace this with actual CSV file or DataFrame)
data = pd.read_csv('merged_properties.csv')  # Replace 'your_file.csv' with your actual file path
# Features and targets
# X = data.drop(columns=['c44', 'e15', 'q15', 'μ11', 'ϵ11', 'α11', 'ρ']).values
X = data[['vf', 'c55e', 'e15e', 'q15e', 'ϵ11e', 'μ11e', 'α11e', 'ρe', 'c55f', 'e15f', 'q15f', 'ϵ11f', 'μ11f', 'α11f', 'ρf']].values
y = data[['c44', 'e15', 'q15', 'μ11', 'ϵ11', 'α11', 'ρ']].values

# Feature scaling
feature_scaler = RobustScaler()

# Target transformations
target_transformer = PowerTransformer(method='yeo-johnson')

# For each target variable
output_transformers = {}
for i in range(y.shape[1]):
    output_transformers[i] = PowerTransformer(method='yeo-johnson')

# Preprocessing
X_scaled = feature_scaler.fit_transform(X)

# Transform each target separately
y_transformed = np.zeros_like(y)
for i in range(y.shape[1]):
    y_transformed[:, i] = output_transformers[i].fit_transform(y[:, i].reshape(-1, 1)).ravel()

# Model pipeline
gb_model = MultiOutputRegressor(GradientBoostingRegressor(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=5,
    random_state=42
))

# Train
gb_model.fit(X_scaled, y_transformed)

# Predict and inverse transform
y_pred_transformed = gb_model.predict(X_scaled)
y_pred = np.zeros_like(y_pred_transformed)
for i in range(y.shape[1]):
    y_pred[:, i] = output_transformers[i].inverse_transform(y_pred_transformed[:, i].reshape(-1, 1)).ravel()

# Print predictions

np.set_printoptions(linewidth=np.inf)  # Set the print options to avoid line breaks
for i in range(5):
    print(f"Predicted: {y_pred[i]}")
    print(f"Actual:    {y[i]}")
    print("-" * 50)

Predicted: [ 4.95252513e+10  2.12031114e-02  4.95931889e+02  1.41678144e-04 -6.53959781e-06 -4.79261075e-12  5.32096364e+03]
Actual:    [ 4.76657088e+10  1.58911049e-02  4.96356224e+02  1.42281371e-04  9.72737333e-11 -4.04666773e-12  5.32000000e+03]
--------------------------------------------------
Predicted: [ 4.95252513e+10  2.80453750e-02  4.52765860e+02  1.29243572e-04 -3.32601440e-05 -7.16893211e-12  5.35031710e+03]
Actual:    [ 4.82809038e+10  2.72979275e-02  4.49900677e+02  1.29409565e-04  1.07791406e-10 -6.69366364e-12  5.34000000e+03]
--------------------------------------------------
Predicted: [ 4.95060958e+10  3.73409941e-02  4.10603427e+02  1.17683767e-04 -2.28150797e-07 -8.69437855e-12  5.35971859e+03]
Actual:    [ 4.89392674e+10  3.92584247e-02  4.09485667e+02  1.18222423e-04  1.18891373e-10 -9.06587901e-12  5.36000000e+03]
--------------------------------------------------
Predicted: [ 4.97738272e+10  5.43008004e-02  3.69471113e+02  1.06802338e-04 -3.97881342e-06 -9.98

In [29]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Calculate evaluation metrics for each target variable
mae_scores = []
mse_scores = []
r2_scores = []

for i in range(y.shape[1]):
    mae = mean_absolute_error(y[:, i], y_pred[:, i])
    mse = mean_squared_error(y[:, i], y_pred[:, i])
    r2 = r2_score(y[:, i], y_pred[:, i])

    mae_scores.append(mae)
    mse_scores.append(mse)
    r2_scores.append(r2)

    print(f"Target {i+1}:")
    print(f"  MAE: {mae}")
    print(f"  MSE: {mse}")
    print(f"  R²: {r2}")

# Calculate average scores across all target variables
average_mae = np.mean(mae_scores)
average_mse = np.mean(mse_scores)
average_r2 = np.mean(r2_scores)

print("\nAverage Scores:")
print(f"  Average MAE: {average_mae}")
print(f"  Average MSE: {average_mse}")
print(f"  Average R²: {average_r2}")


model_name = 'GradientBoostingRegressor'
# Save the results 
results = pd.DataFrame({
    'Model': [model_name],
    'MAE': [average_mae],
    'MSE': [average_mse],
    'R²': [average_r2]
})

model_results.append(results)

Target 1:
  MAE: 2538691730.975251
  MSE: 6.275206071783603e+19
  R²: 0.9991881156970343
Target 2:
  MAE: 0.15953151412712088
  MSE: 0.2545383955859068
  R²: 0.9999697528600925
Target 3:
  MAE: 3.7912886551165443
  MSE: 183.0489944073925
  R²: 0.9989915695636539
Target 4:
  MAE: 2.051707835873977e-06
  MSE: 2.8144161104170293e-11
  R²: 0.9999768638506548
Target 5:
  MAE: 0.0020339600760367047
  MSE: 0.0009571000968851482
  R²: 0.9926139110506323
Target 6:
  MAE: 9.32641161408819e-11
  MSE: 9.140428866049495e-20
  R²: 0.9998612196511251
Target 7:
  MAE: 5.335547741434522
  MSE: 57.13443436161579
  R²: 0.9999728684379342

Average Scores:
  Average MAE: 362670248.60909367
  Average MSE: 8.964580102548005e+18
  Average R²: 0.998653471587304


**Neural Network**

In [30]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# Load Data (replace this with actual CSV file or DataFrame)
data = pd.read_csv('merged_properties.csv')  # Replace 'your_file.csv' with your actual file path
# Features and targets
# X = data.drop(columns=['c44', 'e15', 'q15', 'μ11', 'ϵ11', 'α11', 'ρ']).values
X = data[['vf', 'c55e', 'e15e', 'q15e', 'ϵ11e', 'μ11e', 'α11e', 'ρe', 'c55f', 'e15f', 'q15f', 'ϵ11f', 'μ11f', 'α11f', 'ρf']].values
y = data[['c44', 'e15', 'q15', 'μ11', 'ϵ11', 'α11', 'ρ']].values

# Feature scaling
feature_scaler = RobustScaler()
target_scaler = RobustScaler()

# Preprocessing
X_scaled = feature_scaler.fit_transform(X)
y_scaled = target_scaler.fit_transform(y)

# Define model
nn_model = Sequential([
    BatchNormalization(),
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),
    Dense(32, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dense(y.shape[1], activation='linear')
])

# Compile
nn_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Early stopping
early_stopping = EarlyStopping(patience=20, restore_best_weights=True)

# Train
nn_model.fit(
    X_scaled, y_scaled,
    epochs=200,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stopping],
    verbose=0
)

# Predict and inverse transform
y_pred_scaled = nn_model.predict(X_scaled)
y_pred = target_scaler.inverse_transform(y_pred_scaled)



# Print predictions
np.set_printoptions(linewidth=np.inf)  # Set the print options to avoid line breaks
for i in range(5):
    print(f"Predicted: {y_pred[i]}")
    print(f"Actual:    {y[i]}")
    print("-" * 50)

[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Predicted: [ 4.3764990e+10  2.5326817e+00 -4.7346306e+00  6.7365303e-04  7.4888994e-06 -3.0149292e-09  6.0475752e+03]
Actual:    [ 4.76657088e+10  1.58911049e-02  4.96356224e+02  1.42281371e-04  9.72737333e-11 -4.04666773e-12  5.32000000e+03]
--------------------------------------------------
Predicted: [ 4.3703927e+10  2.4387929e+00 -1.7985158e+00  6.7344279e-04  6.6845955e-06 -2.8861264e-09  6.0328984e+03]
Actual:    [ 4.82809038e+10  2.72979275e-02  4.49900677e+02  1.29409565e-04  1.07791406e-10 -6.69366364e-12  5.34000000e+03]
--------------------------------------------------
Predicted: [ 4.3751674e+10  2.3712888e+00  1.2550834e+00  6.7452498e-04  6.1288920e-06 -2.7927856e-09  6.0115527e+03]
Actual:    [ 4.89392674e+10  3.92584247e-02  4.09485667e+02  1.18222423e-04  1.18891373e-10 -9.06587901e-12  5.36000000e+03]
--------------------------------------------------
Predicted: [ 4.3800134e+10  2.3044260e+00  4

In [31]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Calculate evaluation metrics for each target variable
mae_scores = []
mse_scores = []
r2_scores = []

for i in range(y.shape[1]):
    mae = mean_absolute_error(y[:, i], y_pred[:, i])
    mse = mean_squared_error(y[:, i], y_pred[:, i])
    r2 = r2_score(y[:, i], y_pred[:, i])

    mae_scores.append(mae)
    mse_scores.append(mse)
    r2_scores.append(r2)

    print(f"Target {i+1}:")
    print(f"  MAE: {mae}")
    print(f"  MSE: {mse}")
    print(f"  R²: {r2}")

# Calculate average scores across all target variables
average_mae = np.mean(mae_scores)
average_mse = np.mean(mse_scores)
average_r2 = np.mean(r2_scores)

print("\nAverage Scores:")
print(f"  Average MAE: {average_mae}")
print(f"  Average MSE: {average_mse}")
print(f"  Average R²: {average_r2}")


model_name = 'NeuralNetwork'
# Save the results 
results = pd.DataFrame({
    'Model': [model_name],
    'MAE': [average_mae],
    'MSE': [average_mse],
    'R²': [average_r2]
})

model_results.append(results)

Target 1:
  MAE: 81958763796.03078
  MSE: 8.611194351637371e+22
  R²: -0.11411377473616247
Target 2:
  MAE: 14.755620079050608
  MSE: 8375.15507233397
  R²: 0.004769057978232016
Target 3:
  MAE: 173.85156307678778
  MSE: 456771.6884196564
  R²: -1.516388984024759
Target 4:
  MAE: 0.0008796226837980667
  MSE: 1.4341710482853959e-06
  R²: -0.17897262728714036
Target 5:
  MAE: 0.05532649217292052
  MSE: 0.12974732706775927
  R²: -0.0012801187503812539
Target 6:
  MAE: 8.254639116797784e-09
  MSE: 6.202432183551563e-16
  R²: 0.05827646063368086
Target 7:
  MAE: 1176.131694826555
  MSE: 3485253.81387252
  R²: -0.6550506051667022

Average Scores:
  Average MAE: 11708395022.975124
  Average MSE: 1.2301706216624817e+22
  Average R²: -0.34325151305046175


**Random Forest with Feature Engineering**

In [32]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import PolynomialFeatures

# Load Data (replace this with actual CSV file or DataFrame)
data = pd.read_csv('merged_properties.csv')  # Replace 'your_file.csv' with your actual file path
# Features and targets
# X = data.drop(columns=['c44', 'e15', 'q15', 'μ11', 'ϵ11', 'α11', 'ρ']).values
X = data[['vf', 'c55e', 'e15e', 'q15e', 'ϵ11e', 'μ11e', 'α11e', 'ρe', 'c55f', 'e15f', 'q15f', 'ϵ11f', 'μ11f', 'α11f', 'ρf']].values
y = data[['c44', 'e15', 'q15', 'μ11', 'ϵ11', 'α11', 'ρ']].values


# Feature engineering
poly = PolynomialFeatures(degree=2, include_bias=False)
feature_scaler = RobustScaler()

# Target transformations
target_transformer = {}
for i in range(y.shape[1]):
    target_transformer[i] = PowerTransformer(method='yeo-johnson')

# Preprocessing
X_poly = poly.fit_transform(X)
X_scaled = feature_scaler.fit_transform(X_poly)

# Transform targets
y_transformed = np.zeros_like(y)
for i in range(y.shape[1]):
    y_transformed[:, i] = target_transformer[i].fit_transform(y[:, i].reshape(-1, 1)).ravel()

# Model
rf_model = MultiOutputRegressor(RandomForestRegressor(
    n_estimators=100,
    max_depth=15,
    min_samples_split=5,
    random_state=42
))

# Train
rf_model.fit(X_scaled, y_transformed)

# Predict and inverse transform
y_pred_transformed = rf_model.predict(X_scaled)
y_pred = np.zeros_like(y_pred_transformed)
for i in range(y.shape[1]):
    y_pred[:, i] = target_transformer[i].inverse_transform(y_pred_transformed[:, i].reshape(-1, 1)).ravel()



# Print predictions

np.set_printoptions(linewidth=np.inf)  # Set the print options to avoid line breaks
for i in range(5):
    print(f"Predicted: {y_pred[i]}")
    print(f"Actual:    {y[i]}")
    print("-" * 50)

Predicted: [ 4.76841837e+10  2.03740775e-02  4.95953663e+02  1.42297792e-04  6.57952359e-10 -5.94924110e-12  5.33318379e+03]
Actual:    [ 4.76657088e+10  1.58911049e-02  4.96356224e+02  1.42281371e-04  9.72737333e-11 -4.04666773e-12  5.32000000e+03]
--------------------------------------------------
Predicted: [ 4.81837559e+10  2.86249649e-02  4.49478256e+02  1.29391463e-04  6.57952359e-10 -7.53686002e-12  5.33772044e+03]
Actual:    [ 4.82809038e+10  2.72979275e-02  4.49900677e+02  1.29409565e-04  1.07791406e-10 -6.69366364e-12  5.34000000e+03]
--------------------------------------------------
Predicted: [ 4.87530239e+10  4.32669430e-02  4.08749136e+02  1.17968211e-04  6.57952359e-10 -1.01965103e-11  5.35543246e+03]
Actual:    [ 4.89392674e+10  3.92584247e-02  4.09485667e+02  1.18222423e-04  1.18891373e-10 -9.06587901e-12  5.36000000e+03]
--------------------------------------------------
Predicted: [ 4.96252325e+10  5.65690322e-02  3.71600082e+02  1.07761690e-04  6.57952359e-10 -1.14

In [33]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Calculate evaluation metrics for each target variable
mae_scores = []
mse_scores = []
r2_scores = []

for i in range(y.shape[1]):
    mae = mean_absolute_error(y[:, i], y_pred[:, i])
    mse = mean_squared_error(y[:, i], y_pred[:, i])
    r2 = r2_score(y[:, i], y_pred[:, i])

    mae_scores.append(mae)
    mse_scores.append(mse)
    r2_scores.append(r2)

    print(f"Target {i+1}:")
    print(f"  MAE: {mae}")
    print(f"  MSE: {mse}")
    print(f"  R²: {r2}")

# Calculate average scores across all target variables
average_mae = np.mean(mae_scores)
average_mse = np.mean(mse_scores)
average_r2 = np.mean(r2_scores)

print("\nAverage Scores:")
print(f"  Average MAE: {average_mae}")
print(f"  Average MSE: {average_mse}")
print(f"  Average R²: {average_r2}")


model_name = 'RandomForestRegressor'
# Save the results 
results = pd.DataFrame({
    'Model': [model_name],
    'MAE': [average_mae],
    'MSE': [average_mse],
    'R²': [average_r2]
})

model_results.append(results)


Target 1:
  MAE: 17736060587.119816
  MSE: 2.7729074834363327e+22
  R²: 0.6412420510776202
Target 2:
  MAE: 4.947719460255993
  MSE: 1945.129652501506
  R²: 0.7688576271490972
Target 3:
  MAE: 11.885805841702211
  MSE: 8197.491960189553
  R²: 0.9548394110488289
Target 4:
  MAE: 1.7767391096939292e-06
  MSE: 1.4576658858053153e-10
  R²: 0.999880171323975
Target 5:
  MAE: 0.010821888736867648
  MSE: 0.016794749080189617
  R²: 0.8703923330565955
Target 6:
  MAE: 1.3093656265603795e-09
  MSE: 1.320879634603557e-16
  R²: 0.7994490858159641
Target 7:
  MAE: 20.970984796329358
  MSE: 2419.962606460625
  R²: 0.9988508267144386

Average Scores:
  Average MAE: 2533722946.419307
  Average MSE: 3.961296404909047e+21
  Average R²: 0.8619302151695027


**XGBoost with Custom Objective**

In [11]:
import xgboost as xgb
import numpy as np

# Load Data (replace this with actual CSV file or DataFrame)
data = pd.read_csv('merged_properties.csv')  # Replace 'your_file.csv' with your actual file path
# Features and targets
# X = data.drop(columns=['c44', 'e15', 'q15', 'μ11', 'ϵ11', 'α11', 'ρ']).values
X = data[['vf', 'c55e', 'e15e', 'q15e', 'ϵ11e', 'μ11e', 'α11e', 'ρe', 'c55f', 'e15f', 'q15f', 'ϵ11f', 'μ11f', 'α11f', 'ρf']].values
y = data[['c44', 'e15', 'q15', 'μ11', 'ϵ11', 'α11', 'ρ']].values

# Feature scaling
feature_scaler = RobustScaler()
target_scalers = [RobustScaler() for _ in range(y.shape[1])]

# Preprocess
X_scaled = feature_scaler.fit_transform(X)
y_scaled = np.zeros_like(y)
for i in range(y.shape[1]):
    y_scaled[:, i] = target_scalers[i].fit_transform(y[:, i].reshape(-1, 1)).ravel()

# Train separate models for each output
xgb_models = []
for i in range(y.shape[1]):
    dtrain = xgb.DMatrix(X_scaled, label=y_scaled[:, i])
    params = {
        'objective': 'reg:squarederror',
        'eta': 0.1,
        'max_depth': 6,
        'subsample': 0.8,
        'colsample_bytree': 0.8
    }
    model = xgb.train(params, dtrain, num_boost_round=100)
    xgb_models.append(model)

# Predict and inverse transform
y_pred_scaled = np.zeros_like(y)
for i in range(y.shape[1]):
    dtest = xgb.DMatrix(X_scaled)
    y_pred_scaled[:, i] = xgb_models[i].predict(dtest)

y_pred = np.zeros_like(y_pred_scaled)
for i in range(y.shape[1]):
    y_pred[:, i] = target_scalers[i].inverse_transform(y_pred_scaled[:, i].reshape(-1, 1)).ravel()




# Print predictions

np.set_printoptions(linewidth=np.inf)  # Set the print options to avoid line breaks
for i in range(5):
    print(f"Predicted: {y_pred[i]}")
    print(f"Actual:    {y[i]}")
    print("-" * 50)

XGBoostError: 
XGBoost Library (libxgboost.dylib) could not be loaded.
Likely causes:
  * OpenMP runtime is not installed
    - vcomp140.dll or libgomp-1.dll for Windows
    - libomp.dylib for Mac OSX
    - libgomp.so for Linux and other UNIX-like OSes
    Mac OSX users: Run `brew install libomp` to install OpenMP runtime.

  * You are running 32-bit Python on a 64-bit OS

Error message(s): ["dlopen(/opt/anaconda3/envs/Thesis/lib/python3.12/site-packages/xgboost/lib/libxgboost.dylib, 0x0006): Library not loaded: @rpath/libomp.dylib\n  Referenced from: <54A1AE05-1E14-3DA2-A8D0-062134694298> /opt/anaconda3/envs/Thesis/lib/python3.12/site-packages/xgboost/lib/libxgboost.dylib\n  Reason: tried: '/opt/homebrew/opt/libomp/lib/libomp.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/opt/homebrew/opt/libomp/lib/libomp.dylib' (no such file), '/opt/homebrew/opt/libomp/lib/libomp.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/opt/homebrew/opt/libomp/lib/libomp.dylib' (no such file), '/opt/anaconda3/envs/Thesis/lib/python3.12/lib-dynload/../../libomp.dylib' (no such file), '/opt/anaconda3/envs/Thesis/bin/../lib/libomp.dylib' (no such file)"]


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Calculate evaluation metrics for each target variable
mae_scores = []
mse_scores = []
r2_scores = []

for i in range(y.shape[1]):
    mae = mean_absolute_error(y[:, i], y_pred[:, i])
    mse = mean_squared_error(y[:, i], y_pred[:, i])
    r2 = r2_score(y[:, i], y_pred[:, i])

    mae_scores.append(mae)
    mse_scores.append(mse)
    r2_scores.append(r2)

    print(f"Target {i+1}:")
    print(f"  MAE: {mae}")
    print(f"  MSE: {mse}")
    print(f"  R²: {r2}")

# Calculate average scores across all target variables
average_mae = np.mean(mae_scores)
average_mse = np.mean(mse_scores)
average_r2 = np.mean(r2_scores)

print("\nAverage Scores:")
print(f"  Average MAE: {average_mae}")
print(f"  Average MSE: {average_mse}")
print(f"  Average R²: {average_r2}")


model_name = 'XGBoost'
# Save the results 
results = pd.DataFrame({
    'Model': [model_name],
    'MAE': [average_mae],
    'MSE': [average_mse],
    'R²': [average_r2]
})

model_results.append(results)

Target 1:
  MAE: 17736060587.119816
  MSE: 2.7729074834363327e+22
  R²: 0.6412420510776202
Target 2:
  MAE: 4.947719460255993
  MSE: 1945.129652501506
  R²: 0.7688576271490972
Target 3:
  MAE: 11.885805841702211
  MSE: 8197.491960189553
  R²: 0.9548394110488289
Target 4:
  MAE: 1.7767391096939292e-06
  MSE: 1.4576658858053153e-10
  R²: 0.999880171323975
Target 5:
  MAE: 0.010821888736867648
  MSE: 0.016794749080189617
  R²: 0.8703923330565955
Target 6:
  MAE: 1.3093656265603795e-09
  MSE: 1.320879634603557e-16
  R²: 0.7994490858159641
Target 7:
  MAE: 20.970984796329358
  MSE: 2419.962606460625
  R²: 0.9988508267144386

Average Scores:
  Average MAE: 2533722946.419307
  Average MSE: 3.961296404909047e+21
  Average R²: 0.8619302151695027


**Transformer Model for Time Series Properties**

In [34]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import Dense, MultiHeadAttention, LayerNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input

# Load Data (replace this with actual CSV file or DataFrame)
data = pd.read_csv('merged_properties.csv')  # Replace 'your_file.csv' with your actual file path
# Features and targets
# X = data.drop(columns=['c44', 'e15', 'q15', 'μ11', 'ϵ11', 'α11', 'ρ']).values
X = data[['vf', 'c55e', 'e15e', 'q15e', 'ϵ11e', 'μ11e', 'α11e', 'ρe', 'c55f', 'e15f', 'q15f', 'ϵ11f', 'μ11f', 'α11f', 'ρf']].values
y = data[['c44', 'e15', 'q15', 'μ11', 'ϵ11', 'α11', 'ρ']].values

# Feature scaling
feature_scaler = RobustScaler()
target_scaler = RobustScaler()

# Preprocess
X_scaled = feature_scaler.fit_transform(X)
y_scaled = target_scaler.fit_transform(y)

# Convert 2D data to 3D by treating each sample as a sequence of features
X_3d = X_scaled.reshape(X_scaled.shape[0], 1, X_scaled.shape[1])

# Define Transformer Model
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Multi-head attention
    attention_output = MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(inputs, inputs)
    attention_output = LayerNormalization(epsilon=1e-6)(inputs + attention_output)

    # Feed-forward network
    ffn_output = Dense(ff_dim, activation="relu")(attention_output)
    ffn_output = Dense(inputs.shape[-1])(ffn_output)
    ffn_output = LayerNormalization(epsilon=1e-6)(attention_output + ffn_output)

    return ffn_output

# Build the model
inputs = Input(shape=(1, X.shape[1]))
x = transformer_encoder(inputs, head_size=16, num_heads=2, ff_dim=32, dropout=0.1)
x = tf.keras.layers.Flatten()(x)
x = Dense(32, activation="relu")(x)
outputs = Dense(y.shape[1])(x)

transformer_model = Model(inputs=inputs, outputs=outputs)
transformer_model.compile(optimizer="adam", loss="mse")

# Train
transformer_model.fit(
    X_3d, y_scaled,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    verbose=0
)

# Predict and inverse transform
y_pred_scaled = transformer_model.predict(X_3d)
y_pred = target_scaler.inverse_transform(y_pred_scaled)



# Print predictions

np.set_printoptions(linewidth=np.inf)  # Set the print options to avoid line breaks
for i in range(5):
    print(f"Predicted: {y_pred[i]}")
    print(f"Actual:    {y[i]}")
    print("-" * 50)



[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Predicted: [5.4461436e+10 1.6134955e-01 4.2003253e+02 7.3577242e-04 7.9252681e-11 3.2005180e-11 5.2433730e+03]
Actual:    [ 4.76657088e+10  1.58911049e-02  4.96356224e+02  1.42281371e-04  9.72737333e-11 -4.04666773e-12  5.32000000e+03]
--------------------------------------------------
Predicted: [ 5.4870790e+10  1.4926365e-01  4.1573822e+02  6.1308179e-04 -6.8304357e-13  3.2176085e-11  5.2667866e+03]
Actual:    [ 4.82809038e+10  2.72979275e-02  4.49900677e+02  1.29409565e-04  1.07791406e-10 -6.69366364e-12  5.34000000e+03]
--------------------------------------------------
Predicted: [ 5.4857466e+10  1.3418195e-01  4.0726968e+02  5.0456583e-04 -7.0301605e-11  4.3088946e-11  5.2924146e+03]
Actual:    [ 4.89392674e+10  3.92584247e-02  4.09485667e+02  1.18222423e-04  1.18891373e-10 -9.06587901e-12  5.36000000e+03]
--------------------------------------------------
Predicted: [ 5.4664462e+10  1.1815179e-01  3.9697284



In [35]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Calculate evaluation metrics for each target variable
mae_scores = []
mse_scores = []
r2_scores = []

for i in range(y.shape[1]):
    mae = mean_absolute_error(y[:, i], y_pred[:, i])
    mse = mean_squared_error(y[:, i], y_pred[:, i])
    r2 = r2_score(y[:, i], y_pred[:, i])

    mae_scores.append(mae)
    mse_scores.append(mse)
    r2_scores.append(r2)

    print(f"Target {i+1}:")
    print(f"  MAE: {mae}")
    print(f"  MSE: {mse}")
    print(f"  R²: {r2}")

# Calculate average scores across all target variables
average_mae = np.mean(mae_scores)
average_mse = np.mean(mse_scores)
average_r2 = np.mean(r2_scores)

print("\nAverage Scores:")
print(f"  Average MAE: {average_mae}")
print(f"  Average MSE: {average_mse}")
print(f"  Average R²: {average_r2}")


model_name = 'Transformer'
# Save the results 
results = pd.DataFrame({
    'Model': [model_name],
    'MAE': [average_mae],
    'MSE': [average_mse],
    'R²': [average_r2]
})

model_results.append(results)

Target 1:
  MAE: 42065830729.26874
  MSE: 7.165271866049694e+22
  R²: 0.07295924819330768
Target 2:
  MAE: 13.192160335660914
  MSE: 8368.218899405729
  R²: 0.005593292736607247
Target 3:
  MAE: 114.4721857119104
  MSE: 172435.03518631298
  R²: 0.05004264930688085
Target 4:
  MAE: 0.00021341387620981244
  MSE: 9.316860103498646e-08
  R²: 0.9234099513623518
Target 5:
  MAE: 0.05534829666124632
  MSE: 0.1297734788849429
  R²: -0.0014819363539257946
Target 6:
  MAE: 5.80512866967703e-09
  MSE: 5.747225875368768e-16
  R²: 0.1273910406883575
Target 7:
  MAE: 602.510003224058
  MSE: 787533.5878438124
  R²: 0.6260220888470188

Average Scores:
  Average MAE: 6009404494.214093
  Average MSE: 1.0236102665785278e+22
  Average R²: 0.25770519068294256


**Stacked Model with Different Base Learners**

In [36]:
from sklearn.linear_model import ElasticNet
from sklearn.svm import SVR
from sklearn.ensemble import StackingRegressor

# Load Data (replace this with actual CSV file or DataFrame)
data = pd.read_csv('merged_properties.csv')  # Replace 'your_file.csv' with your actual file path
# Features and targets
# X = data.drop(columns=['c44', 'e15', 'q15', 'μ11', 'ϵ11', 'α11', 'ρ']).values
X = data[['vf', 'c55e', 'e15e', 'q15e', 'ϵ11e', 'μ11e', 'α11e', 'ρe', 'c55f', 'e15f', 'q15f', 'ϵ11f', 'μ11f', 'α11f', 'ρf']].values
y = data[['c44', 'e15', 'q15', 'μ11', 'ϵ11', 'α11', 'ρ']].values

# Feature scaling
feature_scaler = RobustScaler()
target_transformers = {}
for i in range(y.shape[1]):
    target_transformers[i] = PowerTransformer(method='yeo-johnson')

# Preprocess features
X_scaled = feature_scaler.fit_transform(X)

# Transform targets
y_transformed = np.zeros_like(y)
for i in range(y.shape[1]):
    y_transformed[:, i] = target_transformers[i].fit_transform(y[:, i].reshape(-1, 1)).ravel()

# Base models for stacking
estimators = [
    ('rf', RandomForestRegressor(n_estimators=100, random_state=42)),
    ('gbr', GradientBoostingRegressor(n_estimators=100, random_state=42)),
    ('en', ElasticNet(random_state=42))
]

# Stacked models for each output
stacked_models = []
for i in range(y.shape[1]):
    # Create stacking regressor
    stacked_model = StackingRegressor(
        estimators=estimators,
        final_estimator=SVR(kernel='rbf'),
        cv=5
    )
    # Train
    stacked_model.fit(X_scaled, y_transformed[:, i])
    stacked_models.append(stacked_model)

# Predict and inverse transform
y_pred_transformed = np.zeros_like(y)
for i in range(y.shape[1]):
    y_pred_transformed[:, i] = stacked_models[i].predict(X_scaled)

y_pred = np.zeros_like(y_pred_transformed)
for i in range(y.shape[1]):
    y_pred[:, i] = target_transformers[i].inverse_transform(y_pred_transformed[:, i].reshape(-1, 1)).ravel()



# Print predictions

np.set_printoptions(linewidth=np.inf)  # Set the print options to avoid line breaks
for i in range(5):
    print(f"Predicted: {y_pred[i]}")
    print(f"Actual:    {y[i]}")
    print("-" * 50)

Predicted: [ 4.16594797e+10  3.53714626e+00  4.78233025e+02  1.01545074e-04  9.00985268e-03 -1.18999988e-09  5.28776962e+03]
Actual:    [ 4.76657088e+10  1.58911049e-02  4.96356224e+02  1.42281371e-04  9.72737333e-11 -4.04666773e-12  5.32000000e+03]
--------------------------------------------------
Predicted: [ 4.20502945e+10  3.54207461e+00  4.54047537e+02  9.04697414e-05  9.01032790e-03 -1.17589360e-09  5.27513785e+03]
Actual:    [ 4.82809038e+10  2.72979275e-02  4.49900677e+02  1.29409565e-04  1.07791406e-10 -6.69366364e-12  5.34000000e+03]
--------------------------------------------------
Predicted: [ 4.26179428e+10  3.54704423e+00  4.13781048e+02  7.84136474e-05  9.01353744e-03 -1.17348509e-09  5.28300131e+03]
Actual:    [ 4.89392674e+10  3.92584247e-02  4.09485667e+02  1.18222423e-04  1.18891373e-10 -9.06587901e-12  5.36000000e+03]
--------------------------------------------------
Predicted: [ 4.30917955e+10  3.55150843e+00  3.83007637e+02  6.88954746e-05  9.03175899e-03 -1.17

In [37]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Calculate evaluation metrics for each target variable
mae_scores = []
mse_scores = []
r2_scores = []

for i in range(y.shape[1]):
    mae = mean_absolute_error(y[:, i], y_pred[:, i])
    mse = mean_squared_error(y[:, i], y_pred[:, i])
    r2 = r2_score(y[:, i], y_pred[:, i])

    mae_scores.append(mae)
    mse_scores.append(mse)
    r2_scores.append(r2)

    print(f"Target {i+1}:")
    print(f"  MAE: {mae}")
    print(f"  MSE: {mse}")
    print(f"  R²: {r2}")

# Calculate average scores across all target variables
average_mae = np.mean(mae_scores)
average_mse = np.mean(mse_scores)
average_r2 = np.mean(r2_scores)

print("\nAverage Scores:")
print(f"  Average MAE: {average_mae}")
print(f"  Average MSE: {average_mse}")
print(f"  Average R²: {average_r2}")


model_name = 'StackingRegressor'
# Save the results 
results = pd.DataFrame({
    'Model': [model_name],
    'MAE': [average_mae],
    'MSE': [average_mse],
    'R²': [average_r2]
})

model_results.append(results)

Target 1:
  MAE: 31046276415.322464
  MSE: 6.704429072309565e+22
  R²: 0.13258295235417683
Target 2:
  MAE: 12.191703856724741
  MSE: 7901.68363715561
  R²: 0.06103230545045357
Target 3:
  MAE: 66.6666481734622
  MSE: 180994.4175596139
  R²: 0.002888379328480739
Target 4:
  MAE: 8.444626148340269e-05
  MSE: 3.202069394203956e-08
  R²: 0.9736771135426718
Target 5:
  MAE: 0.046902841993351076
  MSE: 0.13247394678586036
  R²: -0.022321863322904312
Target 6:
  MAE: 3.9919456879571465e-09
  MSE: 4.3241901830208627e-16
  R²: 0.3434524451800248
Target 7:
  MAE: 212.71456872399685
  MSE: 514123.5889203161
  R²: 0.7558569325464428

Average Scores:
  Average MAE: 4435182386.706053
  Average MSE: 9.577755817585093e+21
  Average R²: 0.32102403786847805


**Support Vector Regression with RBF Kernel**

In [38]:
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import MinMaxScaler

# Load Data (replace this with actual CSV file or DataFrame)
data = pd.read_csv('merged_properties.csv')  # Replace 'your_file.csv' with your actual file path
# Features and targets
# X = data.drop(columns=['c44', 'e15', 'q15', 'μ11', 'ϵ11', 'α11', 'ρ']).values
X = data[['vf', 'c55e', 'e15e', 'q15e', 'ϵ11e', 'μ11e', 'α11e', 'ρe', 'c55f', 'e15f', 'q15f', 'ϵ11f', 'μ11f', 'α11f', 'ρf']].values
y = data[['c44', 'e15', 'q15', 'μ11', 'ϵ11', 'α11', 'ρ']].values

# Feature scaling
feature_scaler = MinMaxScaler()  # SVR works better with [0,1] scaling
target_scalers = [MinMaxScaler() for _ in range(y.shape[1])]

# Preprocess
X_scaled = feature_scaler.fit_transform(X)
y_scaled = np.zeros_like(y)
for i in range(y.shape[1]):
    y_scaled[:, i] = target_scalers[i].fit_transform(y[:, i].reshape(-1, 1)).ravel()

# Create SVR model
svr_model = MultiOutputRegressor(SVR(
    kernel='rbf',
    C=10.0,
    epsilon=0.1,
    gamma='scale'
))

# Train
svr_model.fit(X_scaled, y_scaled)

# Predict and inverse transform
y_pred_scaled = svr_model.predict(X_scaled)
y_pred = np.zeros_like(y_pred_scaled)
for i in range(y.shape[1]):
    y_pred[:, i] = target_scalers[i].inverse_transform(y_pred_scaled[:, i].reshape(-1, 1)).ravel()



# Print predictions

np.set_printoptions(linewidth=np.inf)  # Set the print options to avoid line breaks
for i in range(5):
    print(f"Predicted: {y_pred[i]}")
    print(f"Actual:    {y[i]}")
    print("-" * 50)

Predicted: [ 4.33495997e+11 -6.67025712e+01  5.04721190e+02  6.04056778e-04  2.16787282e-02  2.77597574e-08  5.90822875e+03]
Actual:    [ 4.76657088e+10  1.58911049e-02  4.96356224e+02  1.42281371e-04  9.72737333e-11 -4.04666773e-12  5.32000000e+03]
--------------------------------------------------
Predicted: [ 4.53586095e+11 -5.35379107e+01  4.96742450e+02  5.58315678e-04  2.16345798e-02  2.43148182e-08  5.93303317e+03]
Actual:    [ 4.82809038e+10  2.72979275e-02  4.49900677e+02  1.29409565e-04  1.07791406e-10 -6.69366364e-12  5.34000000e+03]
--------------------------------------------------
Predicted: [ 4.76382278e+11 -4.04119919e+01  4.87892233e+02  5.13741178e-04  2.08741428e-02  2.07065233e-08  5.95871831e+03]
Actual:    [ 4.89392674e+10  3.92584247e-02  4.09485667e+02  1.18222423e-04  1.18891373e-10 -9.06587901e-12  5.36000000e+03]
--------------------------------------------------
Predicted: [ 5.01697281e+11 -2.75029682e+01  4.78128903e+02  4.70679718e-04  1.93004509e-02  1.69

In [39]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Calculate evaluation metrics for each target variable
mae_scores = []
mse_scores = []
r2_scores = []

for i in range(y.shape[1]):
    mae = mean_absolute_error(y[:, i], y_pred[:, i])
    mse = mean_squared_error(y[:, i], y_pred[:, i])
    r2 = r2_score(y[:, i], y_pred[:, i])

    mae_scores.append(mae)
    mse_scores.append(mse)
    r2_scores.append(r2)

    print(f"Target {i+1}:")
    print(f"  MAE: {mae}")
    print(f"  MSE: {mse}")
    print(f"  R²: {r2}")

# Calculate average scores across all target variables
average_mae = np.mean(mae_scores)
average_mse = np.mean(mse_scores)
average_r2 = np.mean(r2_scores)

print("\nAverage Scores:")
print(f"  Average MAE: {average_mae}")
print(f"  Average MSE: {average_mse}")
print(f"  Average R²: {average_r2}")


model_name = 'SupportVectorRegressor'
# Save the results 
results = pd.DataFrame({
    'Model': [model_name],
    'MAE': [average_mae],
    'MSE': [average_mse],
    'R²': [average_r2]
})

model_results.append(results)

Target 1:
  MAE: 630200292725.166
  MSE: 4.6250869347332626e+23
  R²: -4.983923777494108
Target 2:
  MAE: 74.01372351936517
  MSE: 15388.788905032345
  R²: -0.8286704838602361
Target 3:
  MAE: 210.73074061308026
  MSE: 87601.25955055548
  R²: 0.5173981879604049
Target 4:
  MAE: 0.00025718312154098633
  MSE: 8.751368111816894e-08
  R²: 0.9280586268459348
Target 5:
  MAE: 0.24315491976164708
  MSE: 0.09239163492133329
  R²: 0.2869995145467462
Target 6:
  MAE: 1.601291689043629e-08
  MSE: 8.489888038282431e-16
  R²: -0.28903100842241414
Target 7:
  MAE: 347.2037367187923
  MSE: 165944.09465824167
  R²: 0.9211977408374743

Average Scores:
  Average MAE: 90028613336.76537
  Average MSE: 6.607267049618946e+22
  Average R²: -0.4925673142265999


**Multi-Task Lasso with Polynomial Features**

In [40]:
from sklearn.linear_model import MultiTaskLasso
from sklearn.preprocessing import PolynomialFeatures

# Load Data (replace this with actual CSV file or DataFrame)
data = pd.read_csv('merged_properties.csv')  # Replace 'your_file.csv' with your actual file path
# Features and targets
# X = data.drop(columns=['c44', 'e15', 'q15', 'μ11', 'ϵ11', 'α11', 'ρ']).values
X = data[['vf', 'c55e', 'e15e', 'q15e', 'ϵ11e', 'μ11e', 'α11e', 'ρe', 'c55f', 'e15f', 'q15f', 'ϵ11f', 'μ11f', 'α11f', 'ρf']].values
y = data[['c44', 'e15', 'q15', 'μ11', 'ϵ11', 'α11', 'ρ']].values


# Feature engineering
poly = PolynomialFeatures(degree=3, include_bias=False)
feature_scaler = RobustScaler()
target_scaler = RobustScaler()

# Preprocess
X_poly = poly.fit_transform(X)
X_scaled = feature_scaler.fit_transform(X_poly)
y_scaled = target_scaler.fit_transform(y)

# Define model
mtl_model = MultiTaskLasso(
    alpha=0.1,
    max_iter=10000,
    random_state=42
)

# Train
mtl_model.fit(X_scaled, y_scaled)

# Predict and inverse transform
y_pred_scaled = mtl_model.predict(X_scaled)
y_pred = target_scaler.inverse_transform(y_pred_scaled)



# Print predictions

np.set_printoptions(linewidth=np.inf)  # Set the print options to avoid line breaks
for i in range(5):
    print(f"Predicted: {y_pred[i]}")
    print(f"Actual:    {y[i]}")
    print("-" * 50)

Predicted: [-9.24547202e+09 -2.78102877e+01  4.87191778e+02  1.56044897e-04  9.23998293e-03  5.39206942e-09  5.31956805e+03]
Actual:    [ 4.76657088e+10  1.58911049e-02  4.96356224e+02  1.42281371e-04  9.72737333e-11 -4.04666773e-12  5.32000000e+03]
--------------------------------------------------
Predicted: [-9.30285938e+09 -2.76559042e+01  4.25421419e+02  1.30619796e-04 -8.86000078e-03  6.10405763e-09  5.33959574e+03]
Actual:    [ 4.82809038e+10  2.72979275e-02  4.49900677e+02  1.29409565e-04  1.07791406e-10 -6.69366364e-12  5.34000000e+03]
--------------------------------------------------
Predicted: [-6.81145156e+09 -2.61365766e+01  3.73479979e+02  1.10119240e-04 -2.06239138e-02  6.29579464e-09  5.35962731e+03]
Actual:    [ 4.89392674e+10  3.92584247e-02  4.09485667e+02  1.18222423e-04  1.18891373e-10 -9.06587901e-12  5.36000000e+03]
--------------------------------------------------
Predicted: [-2.09662135e+09 -2.34616391e+01  3.30302860e+02  9.39862246e-05 -2.68834109e-02  6.03

  ) = cd_fast.enet_coordinate_descent_multi_task(


In [41]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Calculate evaluation metrics for each target variable
mae_scores = []
mse_scores = []
r2_scores = []

for i in range(y.shape[1]):
    mae = mean_absolute_error(y[:, i], y_pred[:, i])
    mse = mean_squared_error(y[:, i], y_pred[:, i])
    r2 = r2_score(y[:, i], y_pred[:, i])

    mae_scores.append(mae)
    mse_scores.append(mse)
    r2_scores.append(r2)

    print(f"Target {i+1}:")
    print(f"  MAE: {mae}")
    print(f"  MSE: {mse}")
    print(f"  R²: {r2}")

# Calculate average scores across all target variables
average_mae = np.mean(mae_scores)
average_mse = np.mean(mse_scores)
average_r2 = np.mean(r2_scores)

print("\nAverage Scores:")
print(f"  Average MAE: {average_mae}")
print(f"  Average MSE: {average_mse}")
print(f"  Average R²: {average_r2}")

model_name = 'MultiTaskLasso'
# Save the results 
results = pd.DataFrame({
    'Model': [model_name],
    'MAE': [average_mae],
    'MSE': [average_mse],
    'R²': [average_r2]
})

model_results.append(results)

Target 1:
  MAE: 70201893695.53142
  MSE: 6.366160124537224e+22
  R²: 0.17634808862787066
Target 2:
  MAE: 23.446047456457723
  MSE: 7662.550075493771
  R²: 0.0894488682228739
Target 3:
  MAE: 88.5219590301462
  MSE: 45525.703464258826
  R²: 0.7491955355556383
Target 4:
  MAE: 1.1381383163193892e-05
  MSE: 4.873936614224665e-10
  R²: 0.9995993338547606
Target 5:
  MAE: 0.07421471935489946
  MSE: 0.033059826267002344
  R²: 0.7448722257437812
Target 6:
  MAE: 5.41656006638848e-09
  MSE: 4.754836324536006e-16
  R²: 0.27806686794186
Target 7:
  MAE: 0.9956746762154437
  MSE: 3.489762368188492
  R²: 0.9999983428084072

Average Scores:
  Average MAE: 10028841972.652761
  Average MSE: 9.094514463624607e+21
  Average R²: 0.576789894679313


**Bayesian Ridge Regression with Custom Basis Functions**

In [42]:
from sklearn.linear_model import ARDRegression
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import FunctionTransformer, RobustScaler, PowerTransformer
import numpy as np
import pandas as pd

# Load Data (replace this with actual CSV file or DataFrame)
data = pd.read_csv('merged_properties.csv')  # Replace 'your_file.csv' with your actual file path
# Features and targets
X = data[['vf', 'c55e', 'e15e', 'q15e', 'ϵ11e', 'μ11e', 'α11e', 'ρe', 'c55f', 'e15f', 'q15f', 'ϵ11f', 'μ11f', 'α11f', 'ρf']].values
y = data[['c44', 'e15', 'q15', 'μ11', 'ϵ11', 'α11', 'ρ']].values

# Custom feature transformations - using physics-inspired basis functions
def custom_basis_functions(X):
    # Original features
    features = X.copy()

    # Add squared volume fraction (vf^2) - important for composite properties
    features = np.column_stack((features, X[:, 0]**2))

    # Add exponential decay related to volume fraction
    features = np.column_stack((features, np.exp(-X[:, 0])))

    # Add interaction between volume fraction and each material property
    for i in range(1, X.shape[1]):
        features = np.column_stack((features, X[:, 0] * X[:, i]))

    return features

# Create transformer
basis_transformer = FunctionTransformer(custom_basis_functions)

# Feature scaling
feature_scaler = RobustScaler()
target_transformers = {}
for i in range(y.shape[1]):
    target_transformers[i] = PowerTransformer(method='yeo-johnson')

# Preprocess
X_basis = basis_transformer.fit_transform(X)
X_scaled = feature_scaler.fit_transform(X_basis)

# Transform targets
y_transformed = np.zeros_like(y)
for i in range(y.shape[1]):
    y_transformed[:, i] = target_transformers[i].fit_transform(y[:, i].reshape(-1, 1)).ravel()

# Model
ard_model = MultiOutputRegressor(ARDRegression(
    tol=1e-4,
    alpha_1=1e-6,
    alpha_2=1e-6,
    lambda_1=1e-6,
    lambda_2=1e-6
))

# Train
ard_model.fit(X_scaled, y_transformed)

# Predict and inverse transform
y_pred_transformed = ard_model.predict(X_scaled)
y_pred = np.zeros_like(y_pred_transformed)
for i in range(y.shape[1]):
    y_pred[:, i] = target_transformers[i].inverse_transform(y_pred_transformed[:, i].reshape(-1, 1)).ravel()



# Print predictions

np.set_printoptions(linewidth=np.inf)  # Set the print options to avoid line breaks
for i in range(5):
    print(f"Predicted: {y_pred[i]}")
    print(f"Actual:    {y[i]}")
    print("-" * 50)

Predicted: [ 8.46935102e+10  5.09327954e+00  8.23552117e+01  4.69812337e-04  1.95466541e-02 -4.40034253e-09  6.00444442e+03]
Actual:    [ 4.76657088e+10  1.58911049e-02  4.96356224e+02  1.42281371e-04  9.72737333e-11 -4.04666773e-12  5.32000000e+03]
--------------------------------------------------
Predicted: [ 8.46935102e+10  5.09327954e+00  8.23552117e+01  4.69812337e-04  1.95466541e-02 -4.40034253e-09  6.00444442e+03]
Actual:    [ 4.82809038e+10  2.72979275e-02  4.49900677e+02  1.29409565e-04  1.07791406e-10 -6.69366364e-12  5.34000000e+03]
--------------------------------------------------
Predicted: [ 8.46935102e+10  5.09327954e+00  8.23552117e+01  4.69812337e-04  1.95466541e-02 -4.40034253e-09  6.00444442e+03]
Actual:    [ 4.89392674e+10  3.92584247e-02  4.09485667e+02  1.18222423e-04  1.18891373e-10 -9.06587901e-12  5.36000000e+03]
--------------------------------------------------
Predicted: [ 8.46935102e+10  5.09327954e+00  8.23552117e+01  4.69812337e-04  1.95466541e-02 -4.40

In [43]:
# Evaluation
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Calculate evaluation metrics for each target variable
mae_scores = []
mse_scores = []
r2_scores = []

for i in range(y.shape[1]):
    mae = mean_absolute_error(y[:, i], y_pred[:, i])
    mse = mean_squared_error(y[:, i], y_pred[:, i])
    r2 = r2_score(y[:, i], y_pred[:, i])

    mae_scores.append(mae)
    mse_scores.append(mse)
    r2_scores.append(r2)

    print(f"Target {i+1}:")
    print(f"  MAE: {mae}")
    print(f"  MSE: {mse}")
    print(f"  R²: {r2}")

# Calculate average scores across all target variables
average_mae = np.mean(mae_scores)
average_mse = np.mean(mse_scores)
average_r2 = np.mean(r2_scores)

print("\nAverage Scores:")
print(f"  Average MAE: {average_mae}")
print(f"  Average MSE: {average_mse}")
print(f"  Average R²: {average_r2}")

model_name = 'BayesianRegression'
# Save the results 
results = pd.DataFrame({
    'Model': [model_name],
    'MAE': [average_mae],
    'MSE': [average_mse],
    'R²': [average_r2]
})

model_results.append(results)

Target 1:
  MAE: 89230754914.82697
  MSE: 7.742998139515294e+22
  R²: -0.0017868059556882532
Target 2:
  MAE: 16.02361095370023
  MSE: 8416.77169307311
  R²: -0.00017630103951260523
Target 3:
  MAE: 144.79582829434295
  MSE: 183385.9375816095
  R²: -0.010286681190803115
Target 4:
  MAE: 0.0007734085581937731
  MSE: 1.332981008065628e-06
  R²: -0.09578848567737985
Target 5:
  MAE: 0.06801132940515982
  MSE: 0.13069737796699848
  R²: -0.008611807955140938
Target 6:
  MAE: 9.624209413854393e-09
  MSE: 6.586286880124984e-16
  R²: -4.709198073493681e-06
Target 7:
  MAE: 1173.4864399204766
  MSE: 2106282.1127194623
  R²: -0.00021509808916708728

Average Scores:
  Average MAE: 12747250892.743088
  Average MSE: 1.1061425913593277e+22
  Average R²: -0.016695698443680764


**CatBoost Regressor with Feature Interactions**

In [44]:
!pip install --upgrade numpy catboost

Collecting numpy
  Using cached numpy-2.2.4-cp312-cp312-macosx_14_0_arm64.whl.metadata (62 kB)


In [47]:
from catboost import CatBoostRegressor
import numpy as np

# Load Data (replace this with actual CSV file or DataFrame)
data = pd.read_csv('merged_properties.csv')  # Replace 'your_file.csv' with your actual file path
# Features and targets
# X = data.drop(columns=['c44', 'e15', 'q15', 'μ11', 'ϵ11', 'α11', 'ρ']).values
X = data[['vf', 'c55e', 'e15e', 'q15e', 'ϵ11e', 'μ11e', 'α11e', 'ρe', 'c55f', 'e15f', 'q15f', 'ϵ11f', 'μ11f', 'α11f', 'ρf']].values
y = data[['c44', 'e15', 'q15', 'μ11', 'ϵ11', 'α11', 'ρ']].values

# Feature scaling - CatBoost handles scaling internally
target_transformers = {}
for i in range(y.shape[1]):
    target_transformers[i] = PowerTransformer(method='yeo-johnson')

# Transform targets only
y_transformed = np.zeros_like(y)
for i in range(y.shape[1]):
    y_transformed[:, i] = target_transformers[i].fit_transform(y[:, i].reshape(-1, 1)).ravel()

# Train separate models for each output
catboost_models = []
for i in range(y.shape[1]):
    model = CatBoostRegressor(
        iterations=500,
        learning_rate=0.1,
        depth=6,
        loss_function='RMSE',
        #feature_interactions='Quadratic',  # Enable feature interactions
        verbose=False
    )

    model.fit(X, y_transformed[:, i])
    catboost_models.append(model)

# Predict and inverse transform
y_pred_transformed = np.zeros_like(y)
for i in range(y.shape[1]):
    y_pred_transformed[:, i] = catboost_models[i].predict(X)

y_pred = np.zeros_like(y_pred_transformed)
for i in range(y.shape[1]):
    y_pred[:, i] = target_transformers[i].inverse_transform(y_pred_transformed[:, i].reshape(-1, 1)).ravel()

ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Calculate evaluation metrics for each target variable
mae_scores = []
mse_scores = []
r2_scores = []

for i in range(y.shape[1]):
    mae = mean_absolute_error(y[:, i], y_pred[:, i])
    mse = mean_squared_error(y[:, i], y_pred[:, i])
    r2 = r2_score(y[:, i], y_pred[:, i])

    mae_scores.append(mae)
    mse_scores.append(mse)
    r2_scores.append(r2)

    print(f"Target {i+1}:")
    print(f"  MAE: {mae}")
    print(f"  MSE: {mse}")
    print(f"  R²: {r2}")

# Calculate average scores across all target variables
average_mae = np.mean(mae_scores)
average_mse = np.mean(mse_scores)
average_r2 = np.mean(r2_scores)

print("\nAverage Scores:")
print(f"  Average MAE: {average_mae}")
print(f"  Average MSE: {average_mse}")
print(f"  Average R²: {average_r2}")


model_name = 'CatBoost'
# Save the results 
results = pd.DataFrame({
    'Model': [model_name],
    'MAE': [average_mae],
    'MSE': [average_mse],
    'R²': [average_r2]
})

model_results.append(results)

Target 1:
  MAE: 89230754914.82697
  MSE: 7.742998139515294e+22
  R²: -0.0017868059556882532
Target 2:
  MAE: 16.02361095370023
  MSE: 8416.77169307311
  R²: -0.00017630103951260523
Target 3:
  MAE: 144.79582829434295
  MSE: 183385.9375816095
  R²: -0.010286681190803115
Target 4:
  MAE: 0.0007734085581937731
  MSE: 1.332981008065628e-06
  R²: -0.09578848567737985
Target 5:
  MAE: 0.06801132940515982
  MSE: 0.13069737796699848
  R²: -0.008611807955140938
Target 6:
  MAE: 9.624209413854393e-09
  MSE: 6.586286880124984e-16
  R²: -4.709198073493681e-06
Target 7:
  MAE: 1173.4864399204766
  MSE: 2106282.1127194623
  R²: -0.00021509808916708728

Average Scores:
  Average MAE: 12747250892.743088
  Average MSE: 1.1061425913593277e+22
  Average R²: -0.016695698443680764


NameError: name 'model_results' is not defined

In [46]:
model_results

[                       Model           MAE           MSE        R²
 0  GradientBoostingRegressor  3.626702e+08  8.964580e+18  0.998653,
            Model           MAE           MSE        R²
 0  NeuralNetwork  1.170840e+10  1.230171e+22 -0.343252,
                    Model           MAE           MSE       R²
 0  RandomForestRegressor  2.533723e+09  3.961296e+21  0.86193,
          Model           MAE           MSE        R²
 0  Transformer  6.009404e+09  1.023610e+22  0.257705,
                Model           MAE           MSE        R²
 0  StackingRegressor  4.435182e+09  9.577756e+21  0.321024,
                     Model           MAE           MSE        R²
 0  SupportVectorRegressor  9.002861e+10  6.607267e+22 -0.492567,
             Model           MAE           MSE       R²
 0  MultiTaskLasso  1.002884e+10  9.094514e+21  0.57679,
                 Model           MAE           MSE        R²
 0  BayesianRegression  1.274725e+10  1.106143e+22 -0.016696]

In [48]:
# Combine all results into a single DataFrame
final_results = pd.concat(model_results, ignore_index=True)

# Save to CSV
final_results.to_csv('model_results.csv', index=False)