# model_experiments

این نوت‌بوک شامل بخش‌های مربوط به ساخت، آموزش، و ارزیابی مدل‌ها برای پیش‌بینی انرژی بایندینگ است. محتوای این فایل از `BindingEnergy.ipynb` استخراج شده است.

In [None]:
import os
import json
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, PowerTransformer, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.utils.validation import check_is_fitted

import joblib
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers, callbacks

# Reproducibility
np.random.seed(42)
tf.keras.utils.set_random_seed(42)

# Paths
DATA_PATH = "/content/data.csv"
ARTIFACT_DIR = "./artifacts"
CKPT_DIR = os.path.join(ARTIFACT_DIR, "checkpoints")
PLOTS_DIR = os.path.join(ARTIFACT_DIR, "plots")
os.makedirs(ARTIFACT_DIR, exist_ok=True)
os.makedirs(CKPT_DIR, exist_ok=True)
os.makedirs(PLOTS_DIR, exist_ok=True)

TARGET_COLUMN = "binding_energy"


##3. Stratified Train/Test Split for Regression

In [None]:
try:
    y_bins = pd.qcut(y, q=10, duplicates="drop", labels=False)
except Exception:
    # Fallback if qcut fails (e.g., too many duplicates)
    y_bins = pd.cut(y, bins=10, labels=False)

X_train, X_test, y_train, y_test, bins_train, bins_test = train_test_split(
    X, y, y_bins,
    test_size=0.2,
    random_state=42,
    stratify=y_bins
)

print(f"\nTrain shape: {X_train.shape}, Test shape: {X_test.shape}")



Train shape: (2720, 15), Test shape: (680, 15)


In [None]:
ohe = OneHotEncoder(
    handle_unknown="ignore",
    min_frequency=0.01
)

numeric_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="median")),
    ("power", PowerTransformer(method="yeo-johnson", standardize=False)),
    ("scaler", StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("onehot", ohe)
])

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_cols),
        ("cat", categorical_transformer, categorical_cols)
    ],
    remainder="drop"  # keep only specified columns
)

# Fit preprocessing on TRAIN ONLY to avoid leakage
preprocessor.fit(X_train)

# Transform
X_train_proc = preprocessor.transform(X_train).toarray()
X_test_proc = preprocessor.transform(X_test).toarray()

# Feature names (for reference)
try:
    feature_names = preprocessor.get_feature_names_out().tolist()
except Exception:
    feature_names = [f"f{i}" for i in range(X_train_proc.shape[1])]

print(f"\nProcessed feature matrix: train={X_train_proc.shape}, test={X_test_proc.shape}")

# Save preprocessor and metadata
joblib.dump(preprocessor, os.path.join(ARTIFACT_DIR, "preprocessor.pkl"))
with open(os.path.join(ARTIFACT_DIR, "feature_names.json"), "w") as f:
    json.dump(feature_names, f, indent=2)



Processed feature matrix: train=(2720, 67), test=(680, 67)


##5. Optional Target Scaling (stabilizes training)

##6. Model Architecture (Residual MLP with regularization)

In [None]:
def build_model(input_dim: int,
                width: int = 256,
                blocks: int = 3,
                dropout: float = 0.2,
                l2_reg: float = 1e-4,
                lr: float = 1e-3,
                weight_decay: float = 1e-5) -> tf.keras.Model:
    inp = layers.Input(shape=(input_dim,), name="features")

    # Stem
    x = layers.Dense(width, kernel_initializer="he_normal",
                     kernel_regularizer=regularizers.l2(l2_reg))(inp)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("gelu")(x)
    x = layers.Dropout(dropout)(x)

    # Residual blocks (keep same width to allow skip connections)
    for i in range(blocks):
        shortcut = x
        y = layers.Dense(width, kernel_initializer="he_normal",
                         kernel_regularizer=regularizers.l2(l2_reg))(x)
        y = layers.BatchNormalization()(y)
        y = layers.Activation("gelu")(y)
        y = layers.Dropout(dropout)(y)

        y = layers.Dense(width, kernel_initializer="he_normal",
                         kernel_regularizer=regularizers.l2(l2_reg))(y)
        y = layers.BatchNormalization()(y)
        # Pre-activation residual connection
        x = layers.Add()([shortcut, y])
        x = layers.Activation("gelu")(x)
        x = layers.Dropout(dropout)(x)

    # Head
    out = layers.Dense(1, name="y_scaled")(x)

    # Optimizer: AdamW if available, else Adam
    try:
        opt = tf.keras.optimizers.AdamW(learning_rate=lr, weight_decay=weight_decay)
    except Exception:
        opt = tf.keras.optimizers.Adam(learning_rate=lr)

    model = models.Model(inputs=inp, outputs=out, name="ResidualMLPRegressor")
    model.compile(
        optimizer=opt,
        loss="mse",
        metrics=[tf.keras.metrics.MeanAbsoluteError(name="mae"),
                 tf.keras.metrics.RootMeanSquaredError(name="rmse")]
    )
    return model

model = build_model(
    input_dim=X_train_proc.shape[1],
    width=256,
    blocks=3,
    dropout=0.25,
    l2_reg=1e-4,
    lr=1e-3,
    weight_decay=1e-5
)

model.summary()

##7. Training with Callbacks

In [None]:
early_stop = callbacks.EarlyStopping(
    monitor="val_loss",
    patience=30,
    restore_best_weights=True,
    verbose=1
)

reduce_lr = callbacks.ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.5,
    patience=10,
    min_lr=1e-6,
    verbose=1
)

ckpt_path = os.path.join(CKPT_DIR, "best_model.keras")
model_ckpt = callbacks.ModelCheckpoint(
    filepath=ckpt_path,
    monitor="val_loss",
    save_best_only=True,
    verbose=1
)

history = model.fit(
    X_train_proc, y_train_scaled,
    validation_split=0.2,  # validation from TRAIN only
    epochs=500,
    batch_size=64,
    callbacks=[early_stop, reduce_lr, model_ckpt],
    verbose=1
)

# Load best checkpoint (safeguard)
best_model = tf.keras.models.load_model(ckpt_path)
print("\nLoaded best model from checkpoint.")


Epoch 1/500
[1m30/34[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 4ms/step - loss: 0.1178 - mae: 0.1273 - rmse: 0.2019
Epoch 1: val_loss improved from inf to 0.16864, saving model to ./artifacts/checkpoints/best_model.keras
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.1197 - mae: 0.1282 - rmse: 0.2066 - val_loss: 0.1686 - val_mae: 0.1260 - val_rmse: 0.3044 - learning_rate: 6.2500e-05
Epoch 2/500
[1m28/34[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 4ms/step - loss: 0.1162 - mae: 0.1248 - rmse: 0.1999
Epoch 2: val_loss improved from 0.16864 to 0.15359, saving model to ./artifacts/checkpoints/best_model.keras
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.1180 - mae: 0.1250 - rmse: 0.2043 - val_loss: 0.1536 - val_mae: 0.1168 - val_rmse: 0.2789 - learning_rate: 6.2500e-05
Epoch 3/500
[1m28/34[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 4ms/step - loss: 0.1137 - mae: 0.1195 - rmse: 0.

In [None]:
eval_results = best_model.evaluate(X_test_proc, y_test_scaled, verbose=0)
print("\n========== Test Metrics (scaled target) ==========")
print(f"Loss (MSE): {eval_results[0]:.6f}")
print(f"MAE:        {eval_results[1]:.6f}")
print(f"RMSE:       {eval_results[2]:.6f}")

# Predictions -> inverse scale to original units
y_pred_scaled = best_model.predict(X_test_proc, verbose=0)
y_pred = y_scaler.inverse_transform(y_pred_scaled)

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
mae = np.mean(np.abs(y_test.values.reshape(-1, 1) - y_pred))

print("\n========== Test Metrics (original target) ==========")
print(f"MAE:  {mae:.6f}")
print(f"MSE:  {mse:.6f}")
print(f"RMSE: {rmse:.6f}")
print(f"R^2:  {r2:.6f}")



Loss (MSE): 0.163377
MAE:        0.125585
RMSE:       0.301150

MAE:  65.136266
MSE:  24396.891355
RMSE: 156.195043
R^2:  0.919825


In [None]:
final_model_path = os.path.join(ARTIFACT_DIR, "final_model.keras")
best_model.save(final_model_path)
print(f"\nSaved final model to: {final_model_path}")

# Save run metadata
metadata = {
    "input_dim": int(X_train_proc.shape[1]),
    "numeric_cols": numeric_cols,
    "categorical_cols": categorical_cols,
    "train_shape": list(X_train.shape),
    "test_shape": list(X_test.shape),
    "metrics": {
        "mae": float(mae),
        "mse": float(mse),
        "rmse": float(rmse),
        "r2": float(r2)
    }
}
with open(os.path.join(ARTIFACT_DIR, "run_metadata.json"), "w") as f:
    json.dump(metadata, f, indent=2)

print("\nTraining and evaluation completed successfully!")
print(f"Artifacts saved in: {ARTIFACT_DIR}")



Saved final model to: ./artifacts/final_model.keras

Training and evaluation completed successfully!
Artifacts saved in: ./artifacts
