# CNN Benchmark for MT Inversion – 100 Layers, 100,000 Samples

Configuration uses 90 frequencies, 100 depth outputs, Gaussian noise tests (1%, 3%, 5%), and a Conv1D architecture with final Dense(100).

## Scientific Background: Magnetotellurics (MT) Inversion

**What is Magnetotellurics?**
Magnetotellurics is a geophysical method that uses natural electromagnetic fields from the Earth's magnetosphere and lightning to probe the electrical conductivity structure of the subsurface. MT measurements record time-varying electric (E) and magnetic (H) fields at the Earth's surface.

**The MT Transfer Function:**
The relationship between E and H fields is described by the impedance tensor Z(ω), where ω is the angular frequency:
- E(ω) = Z(ω) · H(ω)

**Observable Quantities:**
From the impedance Z, we derive two key observables:
1. **Apparent Resistivity (ρₐ)**: ρₐ = |Z|²/(μ₀ω), measured in Ohm·m
2. **Phase (φ)**: φ = arg(Z), measured in degrees

**The Inverse Problem:**
The goal of MT inversion is to recover the true subsurface resistivity structure ρ(z) from the measured apparent resistivity and phase. This is a classic **ill-posed inverse problem** because:
- Multiple resistivity models can produce similar surface measurements
- Noise in measurements adds uncertainty
- The problem is non-linear

**CNN Approach:**
This notebook uses Convolutional Neural Networks (CNNs) to solve the MT inverse problem by:
1. Training on 100,000 synthetic forward models
2. Learning the mapping from (ρₐ, φ) → ρ(z)
3. Testing robustness against measurement noise (1%, 3%, 5%)


In [None]:
# ============================================================================
# IMPORT LIBRARIES
# ============================================================================

import os, random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Scikit-learn utilities for data preprocessing and splitting
from sklearn.preprocessing import MinMaxScaler  # Scales features to [0,1] range
from sklearn.model_selection import train_test_split  # Splits data into train/val/test sets
from scipy.interpolate import make_interp_spline  # Creates smooth spline interpolation for resistivity profiles

# TensorFlow and Keras for deep learning
import tensorflow as tf
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# ============================================================================
# REPRODUCIBILITY SETUP
# ============================================================================
# Set random seeds for all libraries to ensure reproducible results across runs

SEED = 42
os.environ["PYTHONHASHSEED"] = str(SEED)  # Hash-based operations deterministic
random.seed(SEED)  # Python's built-in random module
np.random.seed(SEED)  # NumPy random number generation
tf.random.set_seed(SEED)  # TensorFlow random operations

# ============================================================================
# GPU CONFIGURATION
# ============================================================================
# Configure TensorFlow to use GPU if available and enable dynamic memory growth
# to prevent TensorFlow from allocating all GPU memory at once

print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            # Enable memory growth: TensorFlow allocates GPU memory as needed
            # rather than pre-allocating all available GPU memory
            tf.config.experimental.set_memory_growth(gpu, True)
        print("Using GPU:", gpus[0])
    except RuntimeError as e:
        print(e)

# ============================================================================
# CUSTOM METRICS: CORRELATION COEFFICIENT
# ============================================================================

def corrcoef_batch(y_true, y_pred):
    """
    Compute Pearson correlation coefficient averaged over a batch.
    
    This metric measures the linear relationship between predicted and true
    resistivity profiles. R values range from -1 (perfect negative correlation)
    to +1 (perfect positive correlation), with 0 indicating no correlation.
    
    For MT inversion, high R values (>0.9) indicate the model accurately
    captures the overall shape and trends of resistivity profiles.
    
    Args:
        y_true: True resistivity profiles, shape (batch_size, num_layers)
        y_pred: Predicted resistivity profiles, shape (batch_size, num_layers)
    
    Returns:
        Mean correlation coefficient across the batch (scalar)
    """
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)
    
    # Compute mean for each sample (along depth dimension)
    y_true_mean = tf.reduce_mean(y_true, axis=1, keepdims=True)
    y_pred_mean = tf.reduce_mean(y_pred, axis=1, keepdims=True)
    
    # Pearson correlation formula: r = Σ((x-x̄)(y-ȳ)) / √(Σ(x-x̄)² · Σ(y-ȳ)²)
    num = tf.reduce_sum((y_true - y_true_mean) * (y_pred - y_pred_mean), axis=1)
    den = tf.sqrt(tf.reduce_sum((y_true - y_true_mean)**2, axis=1) * 
                  tf.reduce_sum((y_pred - y_pred_mean)**2, axis=1) + 1e-12)
    r = num / (den + 1e-12)  # Added epsilon for numerical stability
    
    return tf.reduce_mean(r)  # Average correlation across batch

def numpy_corrcoef(y_true, y_pred):
    """
    NumPy implementation of correlation coefficient for evaluation.
    
    Used during final model evaluation to compute correlation on CPU.
    Computes correlation for each sample individually, then averages.
    
    Args:
        y_true: True resistivity profiles (N_samples, num_layers)
        y_pred: Predicted resistivity profiles (N_samples, num_layers)
    
    Returns:
        Mean correlation coefficient (float)
    """
    rs = []
    for t, p in zip(y_true, y_pred):
        t_mean = t.mean()
        p_mean = p.mean()
        num = np.sum((t - t_mean) * (p - p_mean))
        den = np.sqrt(np.sum((t - t_mean)**2) * np.sum((p - p_mean)**2) + 1e-12)
        rs.append(num / (den + 1e-12))
    return float(np.mean(rs))


### Forward Modeling Physics

**Key Physical Principle - Maxwell's Equations:**
MT forward modeling solves the 1D diffusion equation derived from Maxwell's equations in a layered medium:

∂²E/∂z² = iωμ₀σE

where:
- σ = 1/ρ (conductivity = inverse of resistivity)
- ω = 2π/T (angular frequency)
- μ₀ = 4π×10⁻⁷ H/m (magnetic permeability of free space)

**Recursive Impedance Algorithm:**
The code implements Wait's recursion (1954) to efficiently compute impedance:
1. Start at the bottom layer (half-space)
2. Propagate impedance upward through each layer using:
   - Wave number: K = √(σμ₀ωi)
   - Layer response with hyperbolic tangent
3. Surface impedance gives apparent resistivity and phase

**Why 100,000 Samples?**
Deep learning requires large datasets to:
- Learn complex non-linear relationships
- Generalize across diverse resistivity structures
- Handle measurement noise robustly


## Data Generation: Forward MT Modeling

**Physical Parameters:**
The forward problem calculates how electromagnetic waves propagate through a layered Earth based on:
- **Skin depth (δ)**: The depth at which EM field amplitude decays to 1/e (~37%) of surface value
  - δ ≈ 500√(ρT) meters, where ρ is resistivity (Ohm·m) and T is period (seconds)
  - Longer periods penetrate deeper into the Earth

**Frequency Range:**
Using 90 logarithmically-spaced periods from 0.001s to 1000s:
- Short periods (0.001-1s): Probe shallow depths (meters to hundreds of meters)
- Long periods (1-1000s): Probe deep structures (kilometers to tens of kilometers)

**Layer Discretization:**
The subsurface is divided into 100 layers with exponentially increasing thickness:
- Thin layers near surface for high resolution
- Thick layers at depth where resolution decreases
- Layer thicknesses scale with skin depth to capture physical behavior

**Synthetic Data Generation:**
1. Create smooth resistivity profiles using spline interpolation (realistic geological structures)
2. Solve Maxwell's equations using recursive impedance formulation
3. Generate apparent resistivity and phase curves
4. Add realistic Gaussian noise to test robustness

### Why Data Normalization?

**MinMax Scaling to [0,1]:**
MT data spans many orders of magnitude:
- Resistivity: 1 to 10,000 Ohm·m (4 orders)
- Phase: 0° to 90°
- Depths: exponential spacing

Normalization ensures:
1. **Numerical stability**: Prevents gradient vanishing/explosion
2. **Fair feature weighting**: ρₐ and φ contribute equally
3. **Faster convergence**: Optimization works better on [0,1] scale
4. **Better generalization**: Network learns relative patterns, not absolute values

All predictions are inverse-transformed back to physical units for evaluation.


In [None]:

mu_0 = 4 * np.pi * 1e-7
N_FREQ = 90
periods = np.logspace(-3, 3, N_FREQ)
resistivity_range = [1.0, 1e4]
rho_ref = 100.0

min_period = periods.min()
max_period = periods.max()
min_skin_depth = 500 * np.sqrt(rho_ref * min_period) / 4
max_skin_depth = 500 * np.sqrt(rho_ref * max_period)

M_OUTPUT = 100
r = (max_skin_depth / min_skin_depth) ** (1.0 / (M_OUTPUT - 1))
layer_thicknesses = np.array([min_skin_depth * (r ** i) for i in range(M_OUTPUT)])
num_layers = len(layer_thicknesses)
print("Layers:", num_layers)

def generate_smooth_resistivity_profile(num_points, resistivity_range, n_ctrl=10, seed=None):
    rng = np.random.default_rng(seed)
    depth_points = np.linspace(0, num_points - 1, num=n_ctrl)
    resistivity_points = rng.uniform(resistivity_range[0], resistivity_range[1], size=n_ctrl)
    spline = make_interp_spline(depth_points, resistivity_points, k=3)
    smooth_resistivities = spline(np.linspace(0, num_points - 1, num_points))
    smooth_resistivities = np.clip(smooth_resistivities, resistivity_range[0], resistivity_range[1])
    return smooth_resistivities

def compute_apparent_resistivity_and_phase(thicknesses, conductivities, periods):
    apparent_resistivity = []
    phase = []
    for T in periods:
        omega = 2 * np.pi / T
        cns = np.zeros(len(conductivities), dtype=complex)
        cns[-1] = 1 / np.sqrt(mu_0 * omega * conductivities[-1] * 1j)
        for j in reversed(range(len(thicknesses))):
            K = np.sqrt(conductivities[j] * mu_0 * omega * 1j)
            layer_thickness = thicknesses[j] if j < len(thicknesses) - 1 else np.inf
            if j + 1 < len(cns):
                cns[j] = (1 / K) * ((K * cns[j + 1] + np.tanh(K * layer_thickness)) / (1 + K * cns[j + 1] * np.tanh(K * layer_thickness)))
        Z = cns[0]
        rho_apparent = np.abs(Z) ** 2 * (mu_0 * omega)
        phi = np.degrees(np.angle(Z)) + 90.0
        apparent_resistivity.append(rho_apparent)
        phase.append(phi)
    return np.array(apparent_resistivity), np.array(phase)

def resample_to_M(profile_native, M=M_OUTPUT):
    x_native = np.linspace(0, 1, num=len(profile_native))
    x_target = np.linspace(0, 1, num=M)
    return np.interp(x_target, x_native, profile_native)

N_STATIONS = 100000

X_clean_list = []
y_list = []
rng = np.random.default_rng(SEED)

for _ in range(N_STATIONS):
    resistivities_native = generate_smooth_resistivity_profile(M_OUTPUT, resistivity_range)
    conductivities_native = 1.0 / resistivities_native
    rho_a, phi = compute_apparent_resistivity_and_phase(layer_thicknesses, conductivities_native, periods)
    resistivities_M = resample_to_M(resistivities_native, M_OUTPUT)
    X_pair = np.stack([rho_a, phi], axis=1)
    X_clean_list.append(X_pair.astype(np.float32))
    y_list.append(resistivities_M.astype(np.float32))

X_clean = np.stack(X_clean_list, axis=0)
y = np.stack(y_list, axis=0)
print("X_clean:", X_clean.shape, "| y:", y.shape)

X_rho = X_clean[:, :, 0].reshape(-1, 1)
X_phi = X_clean[:, :, 1].reshape(-1, 1)
y_vec = y.reshape(-1, 1)

scaler_rho = MinMaxScaler()
scaler_phi = MinMaxScaler()
scaler_y = MinMaxScaler()

X_rho_scaled_all = scaler_rho.fit_transform(X_rho).reshape(X_clean.shape[0], X_clean.shape[1])
X_phi_scaled_all = scaler_phi.fit_transform(X_phi).reshape(X_clean.shape[0], X_clean.shape[1])
y_scaled_all = scaler_y.fit_transform(y_vec).reshape(y.shape[0], y.shape[1])

X_scaled = np.stack([X_rho_scaled_all, X_phi_scaled_all], axis=2)
y_scaled = y_scaled_all

def add_gaussian_noise(x, std):
    noisy = x + np.random.normal(loc=0.0, scale=std, size=x.shape).astype(np.float32)
    return np.clip(noisy, 0.0, 1.0)

X_noise_1 = add_gaussian_noise(X_scaled, 0.01)
X_noise_3 = add_gaussian_noise(X_scaled, 0.03)
X_noise_5 = add_gaussian_noise(X_scaled, 0.05)


## Data Splitting Strategy

**Train-Validation-Test Split:**
- **Training set (64%)**: 64,000 samples to learn the inverse mapping
- **Validation set (16%)**: 16,000 samples for hyperparameter tuning and preventing overfitting
- **Test set (20%)**: 20,000 samples for final unbiased performance evaluation

**Noise Augmentation for Robustness:**
Real MT measurements contain noise from:
- Atmospheric electromagnetic interference
- Cultural noise (power lines, railways)
- Instrument drift and digitization errors

We test three noise levels on scaled data [0,1]:
- **1% noise (σ=0.01)**: Excellent field conditions
- **3% noise (σ=0.03)**: Typical field conditions
- **5% noise (σ=0.05)**: Poor field conditions or distant/weak signals

Gaussian noise is added to both apparent resistivity and phase after normalization.

### Deep Learning vs Traditional MT Inversion

**Traditional Methods:**
- **Occam's inversion**: Seeks smoothest model fitting data
- **Iterative linearization**: Slow, requires good initial guess
- **Computationally expensive**: Hours to days per inversion

**CNN Advantages:**
1. **Speed**: Real-time inversion (<1ms per station after training)
2. **No initial model needed**: Direct mapping from data to structure
3. **Handles noise naturally**: Learned during training on noisy data
4. **Batch processing**: Can invert thousands of stations simultaneously

**Trade-offs:**
- Requires large training dataset (expensive forward modeling once)
- Limited to distribution of training models
- Less interpretable than physics-based inversions


In [None]:

X_trainval, X_test, y_trainval, y_test = train_test_split(
    X_scaled, y_scaled, test_size=0.2, random_state=SEED, shuffle=True)

X_train, X_val, y_train, y_val = train_test_split(
    X_trainval, y_trainval, test_size=0.2, random_state=SEED, shuffle=True)

X_test_n1 = X_noise_1[X_scaled.shape[0] - X_test.shape[0]:]
X_test_n3 = X_noise_3[X_scaled.shape[0] - X_test.shape[0]:]
X_test_n5 = X_noise_5[X_scaled.shape[0] - X_test.shape[0]:]

print("Train:", X_train.shape, y_train.shape)
print("Val:", X_val.shape, y_val.shape)
print("Test:", X_test.shape, y_test.shape)


### Training on Large Dataset

**Why 100,000 Samples?**
- MT inverse problem is highly non-unique (many models fit data)
- Need to explore vast resistivity model space
- CNN must learn which features distinguish different structures
- Rare but important geological structures need representation

**GPU Acceleration:**
- Training on 100k samples would take days on CPU
- GPU parallelizes convolution operations across:
  - Batch dimension (128 samples at once)
  - Filter operations (thousands of multiplications)
  - Frequency channels
- Typical speedup: 10-50x over CPU


## CNN Architecture for MT Inversion

**Why CNNs for MT Data?**
Convolutional Neural Networks are ideal because:
1. **Local patterns**: Nearby frequencies are physically related (skin depth smoothly varies)
2. **Translation invariance**: Same patterns at different frequency bands
3. **Hierarchical learning**: Early layers detect local features, deeper layers integrate global structure
4. **Parameter efficiency**: Shared weights across frequency reduce overfitting

**Architecture Design:**
```
Input: (90 frequencies, 2 features [ρₐ, φ])
├─ Conv1D(32 filters, kernel=5) → Learn local frequency patterns
├─ MaxPool(2) → Downsample, extract dominant features
├─ Conv1D(64 filters, kernel=3) → Learn mid-scale patterns
├─ MaxPool(2) → Further abstraction
├─ Conv1D(128 filters, kernel=3) → Learn global frequency relationships
├─ MaxPool(2) → Final feature extraction
├─ Flatten → Convert to 1D feature vector
├─ Dense(128) + Dropout(0.05) → High-level reasoning
└─ Dense(100) → Output: 100-layer resistivity profile
```

**Regularization:**
- **L2 regularization (1e-5)**: Prevents overfitting by penalizing large weights
- **Dropout (5%)**: Randomly disables neurons during training for robustness
- **Early stopping**: Halts training when validation loss stops improving

**Loss Function:**
- **MSE (Mean Squared Error)**: Measures average squared difference in resistivity
- **Correlation coefficient (R)**: Measures how well predicted shape matches true profile (more geophysically meaningful)

In [None]:

def build_cnn(input_shape, l2_reg=1e-5, dropout_rate=0.05):
    inputs = Input(shape=input_shape)
    x = Conv1D(filters=32, kernel_size=5, activation='relu', padding='same')(inputs)
    x = MaxPooling1D(pool_size=2)(x)
    x = Conv1D(filters=64, kernel_size=3, activation='relu', padding='same')(x)
    x = MaxPooling1D(pool_size=2)(x)
    x = Conv1D(filters=128, kernel_size=3, activation='relu', padding='same')(x)
    x = MaxPooling1D(pool_size=2)(x)
    x = Flatten()(x)
    x = Dense(128, activation='relu', kernel_regularizer=regularizers.l2(l2_reg))(x)
    x = Dropout(dropout_rate)(x)
    outputs = Dense(100, activation='linear', kernel_regularizer=regularizers.l2(l2_reg))(x)
    model = Model(inputs, outputs, name='CNN_MT_Inversion')
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
                  loss='mse',
                  metrics=[corrcoef_batch])
    return model

cnn = build_cnn(input_shape=X_train.shape[1:])
cnn.summary()


## Model Training

**Training Strategy:**
The network learns by minimizing the difference between predicted and true resistivity profiles over 64,000 training examples.

**Callbacks:**
1. **Early Stopping (patience=10)**: 
   - Monitors validation loss
   - Stops if no improvement for 10 epochs
   - Restores best weights to prevent overfitting
   - Prevents wasting computation on diminishing returns

2. **ReduceLROnPlateau (patience=5)**:
   - Reduces learning rate by 50% when validation loss plateaus
   - Helps escape local minima
   - Fine-tunes weights in later epochs
   - Minimum LR: 1e-5

**Batch Size = 128:**
- Balances memory usage and convergence speed
- Provides stable gradient estimates
- Allows parallel GPU processing

**Expected Behavior:**
- Training loss should decrease steadily
- Validation loss should follow training loss closely
- Correlation coefficient R should approach 0.95+ for clean data
- Typical convergence: 30-50 epochs

In [None]:

callbacks = [
    EarlyStopping(patience=10, monitor='val_loss', restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', patience=5, factor=0.5, min_lr=1e-5, verbose=1),
]
history = cnn.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=128,
    callbacks=callbacks,
    verbose=1
)


## Model Evaluation and Noise Robustness

**Evaluation Metrics:**

1. **Mean Squared Error (MSE)**:
   - MSE = (1/N)Σ(ρ_true - ρ_pred)²
   - Measures average squared difference in resistivity (Ohm·m)²
   - Sensitive to outliers
   - Lower is better

2. **Correlation Coefficient (R)**:
   - R = Pearson correlation between true and predicted profiles
   - Ranges from -1 to +1 (perfect positive correlation)
   - **R > 0.9**: Excellent inversion quality
   - **R > 0.8**: Good for practical applications
   - **R < 0.7**: Poor, unreliable inversion
   - More geophysically meaningful than MSE

**Noise Robustness Test:**
Real MT data always contains noise. We evaluate on:
1. **Clean data**: Best-case performance baseline
2. **1% noise**: High-quality field data
3. **3% noise**: Typical field conditions
4. **5% noise**: Challenging conditions (requires noise filtering)

**Expected Results:**
- Clean data: R ≈ 0.95-0.98, very low MSE
- Noise degrades both metrics progressively
- Good model should maintain R > 0.85 even at 5% noise
- MSE increases quadratically with noise level

In [None]:

from sklearn.metrics import mean_squared_error

def evaluate_model(model, X, y_true_scaled, scaler_y):
    """
    Evaluate CNN inversion on test data with specified noise level.
    
    Steps:
    1. Predict normalized resistivity profiles
    2. Inverse transform to physical units (Ohm·m)
    3. Compute MSE and correlation coefficient
    
    Returns metrics in physical units for interpretability.
    """
    y_pred_scaled = model.predict(X, verbose=0)
    y_true = scaler_y.inverse_transform(y_true_scaled)
    y_pred = scaler_y.inverse_transform(y_pred_scaled)
    mse = mean_squared_error(y_true.reshape(-1), y_pred.reshape(-1))
    R = numpy_corrcoef(y_true, y_pred)
    return mse, R, y_true, y_pred

# Test on clean and progressively noisier data
results = []
for name, X_eval in [('Clean', X_test), ('Noise 1%', X_test_n1), ('Noise 3%', X_test_n3), ('Noise 5%', X_test_n5)]:
    mse, R, y_true_inv, y_pred_inv = evaluate_model(cnn, X_eval, y_test, scaler_y)
    results.append((name, mse, R))

df_results = pd.DataFrame(results, columns=['Condition', 'MSE', 'R'])
df_results


## Results Visualization

**Plot 1: Training History**
- Shows how loss (MSE) decreases during training
- Validation loss should track training loss (no overfitting)
- Early stopping prevents divergence

**Plot 2: Correlation Evolution**
- Shows R metric improving during training
- Should plateau near 0.95+ for good inversion
- Indicates model is learning physical relationships

**Plot 3: Individual Station Examples**
- Compares true vs predicted resistivity profiles
- X-axis: Normalized depth (0=shallow, 1=deep)
- Y-axis: Resistivity in Ohm·m (log scale expected)
- Good predictions follow true profile shape and amplitude

**Plot 4: Noise Robustness**
- MSE vs noise level
- Should show gradual degradation, not collapse
- Demonstrates model generalization

**Geophysical Interpretation:**
- **Conductive layers** (low ρ): Sediments, water-saturated zones, clay-rich rocks
- **Resistive layers** (high ρ): Crystalline basement, dry rocks, carbonates
- **Sharp boundaries**: Geological contacts (unconformities, faults)
- **Smooth transitions**: Gradational lithology changes

In [None]:

# Plot 1: Loss convergence shows optimization success
plt.figure(figsize=(8, 5), dpi=150)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel('Epoch'); plt.ylabel('MSE'); plt.title('Training/Validation Loss'); plt.legend(); plt.tight_layout()
plt.show()

# Plot 2: Correlation coefficient tracks inversion quality
tr = history.history.get('corrcoef_batch', None)
va = history.history.get('val_corrcoef_batch', None)
if tr is not None and va is not None:
    plt.figure(figsize=(8, 5), dpi=150)
    plt.plot(tr, label='Train R')
    plt.plot(va, label='Val R')
    plt.xlabel('Epoch'); plt.ylabel('R'); plt.title('Training/Validation Correlation'); plt.legend(); plt.tight_layout()
    plt.show()

# Plot 3: Individual station inversions demonstrate accuracy
n_show = 5
idxs = np.random.choice(X_test.shape[0], size=n_show, replace=False)
y_pred_scaled = cnn.predict(X_test[idxs], verbose=0)
y_pred = scaler_y.inverse_transform(y_pred_scaled)
y_true = scaler_y.inverse_transform(y_test[idxs])
depths = np.linspace(0, 1, 100)

fig, axes = plt.subplots(n_show, 1, figsize=(7, 2.5*n_show), dpi=150, sharex=True)
if n_show == 1: axes = [axes]
for ax, i in zip(axes, range(n_show)):
    ax.plot(depths, y_true[i], label='True', lw=2)
    ax.plot(depths, y_pred[i], label='Pred', lw=1.5)
    ax.set_ylabel('Resistivity (Ohm·m)')
    ax.set_title(f'Test Station Example {i+1}')
    ax.grid(True, alpha=0.3)
axes[-1].set_xlabel('Normalized Depth')
axes[0].legend()
plt.tight_layout()
plt.show()

# Plot 4: Noise robustness test - critical for field applications
plt.figure(figsize=(6,4), dpi=150)
plt.plot(df_results['Condition'], df_results['MSE'], marker='o')
plt.xlabel('Condition'); plt.ylabel('MSE'); plt.title('Noise Robustness (CNN)'); plt.tight_layout()
plt.show()

df_results
