# Surrogate 3D Model
Objectives:
- Train a neural-network surrogate model capable of predicting reservoir pressure from spatial coordinates and simulation features.
- Reduce computational cost by replacing full-physics simulations with a fast, data-driven approximation.
- Prepare model outputs (predictions and metrics) for downstream visualization and spatial analysis.
- Evaluate model performance using MSE and R² to ensure accuracy and stability before moving to the next notebook.
- Generate a saved version of the trained model for reuse in later steps of the workflow.

**Source:** Society of Petroleum Engineers (SPE)  
**Dataset:** SPE Comparative Solution Project - Model 11C (3D CO₂ Injection)

In [5]:
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Paths
PROCESSED = Path(r"C:\Users\tetec\Documents\Data Project Coding\.vscode\Project data\spe11c\data\processed")

X4d = np.load(PROCESSED / "x.npy")
y3d = np.load(PROCESSED / "y.npy")

NX, NY, NZ, n_features = X4d.shape

print("Loaded dataset:")
print(f"Grid size: {NX} * {NY} * {NZ}")
print(f"Features per cell: {n_features}") 

Loaded dataset:
Grid size: 20 * 20 * 10
Features per cell: 27


## 1. Train–Test Split and Standardization of Input–Output Variables

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X_flat = X4d.reshape(-1, n_features)
y_flat = y3d.reshape(-1)

X_train, X_test, y_train, y_test = train_test_split(
    X_flat, y_flat, test_size=0.2, random_state=42
)

scaler_x = StandardScaler()
X_train_scaled = scaler_x.fit_transform(X_train)
X_test_scaled = scaler_x.transform(X_test)

scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).flatten()
y_test_scaled = scaler_y.transform(y_test.reshape(-1, 1)).flatten()

print("Train/Test shapes:", X_train_scaled.shape, X_test_scaled.shape)

Train/Test shapes: (3200, 27) (800, 27)


## 2. Implementation and Training of the Surrogate Multilayer Perceptron (MLP)

In [7]:
# Define surrogate MLP
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping

def build_mlp(input_dim):
    return keras.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(128, activation="relu"),
        layers.Dense(128, activation="relu"),
        layers.Dense(64, activation="relu"),
        layers.Dense(1, activation="linear")
    ])

mlp = build_mlp(n_features)

mlp.compile(
    optimizer=keras.optimizers.Adam(1e-3),
    loss="mse",
    metrics=["mse"]
)

In [8]:
es = EarlyStopping(
    monitor="val_loss",
    patience=10,
    restore_best_weights=True
)

history = mlp.fit(
    X_train_scaled,
    y_train_scaled,
    validation_split=0.2,
    epochs=300,
    batch_size=32,
    callbacks=[es],
    verbose=1
)

Epoch 1/300
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0527 - mse: 0.0527 - val_loss: 0.0023 - val_mse: 0.0023
Epoch 2/300
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0011 - mse: 0.0011 - val_loss: 8.6083e-04 - val_mse: 8.6083e-04
Epoch 3/300
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 7.1731e-04 - mse: 7.1731e-04 - val_loss: 8.3084e-04 - val_mse: 8.3084e-04
Epoch 4/300
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 5.8716e-04 - mse: 5.8716e-04 - val_loss: 0.0010 - val_mse: 0.0010
Epoch 5/300
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0012 - mse: 0.0012 - val_loss: 0.0068 - val_mse: 0.0068
Epoch 6/300
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0015 - mse: 0.0015 - val_loss: 5.4914e-04 - val_mse: 5.4914e-04
Epoch 7/300
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0

## 3. Performance Assessment and Model Serialization

In [9]:
from sklearn.metrics import mean_squared_error, r2_score

y_pred_scaled = mlp.predict(X_test_scaled).flatten()

y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1,1)).flatten()

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Surrogate MLP - MSE: {mse:.4f}")
print(f"Surrogate MLP - R²: {r2:.4f}")

[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Surrogate MLP - MSE: 1364424320.0000
Surrogate MLP - R²: 0.9999


In [10]:
models_dir = Path(r"C:\Users\tetec\Documents\Data Project Coding\.vscode\Project data\spe11c\models")
models_dir.mkdir(parents=True, exist_ok=True)

mlp.save(models_dir / "Surrogate_MLP.keras")

import joblib
joblib.dump(scaler_y, models_dir / "y_scaler.pkl")

print("Model and scaler saved.")


Model and scaler saved.


In [11]:
print("y_train.shape:", y_train.shape)
print("y_test.shape:", y_test.shape)
print("y_pred.shape:", y_pred.shape)

y_train.shape: (3200,)
y_test.shape: (800,)
y_pred.shape: (800,)
