In [1]:
# ===================================================================
# FINAL OUT-OF-SAMPLE TEST SCRIPT FOR 2D ZONAL-MEAN MODELS (with PCA)
# ===================================================================
# This script is specifically designed to test models that were
# trained by applying PCA to the target variable (y).
# It correctly loads the saved PCA transformer for each fold.

import os
import gc
import joblib # <-- Required to load the PCA model
import numpy as np
import tensorflow as tf
from sklearn.metrics import r2_score
from scipy.stats import pearsonr
from keras.models import load_model
import xarray as xr
import scipy.io as sio
import matplotlib.pyplot as plt
from tensorflow.keras import backend as K

# --- 1. Configuration: Set Correct Paths for the PCA Model Run ---
print("--- Setting up for Zonal-Mean Out-of-Sample Test (PCA version) ---")

# **FIX**: Define separate paths for models vs. pca/log files
# path_model_dir = '/ocean/projects/ees250004p/ezhu3/data/CESM2/trained_model_2D_PCA'
# path_results_dir = '/ocean/projects/ees250004p/ezhu3/data/CESM2/trained_model_2D_PCA/NeuralNet/CNN_Neur32x32_BS32_5foldCV_Reg0Drop0.25_gelu+PReLU/TOA_anom'

#C1:
path_model_dir = "/ocean/projects/ees250004p/ezhu3/data/CESM1/trained_model_2D_PCA"
path_results_dir = "/ocean/projects/ees250004p/ezhu3/data/CESM1/trained_model_2D_PCA/NeuralNet/CNN_Neur32x32_BS32_5foldCV_Reg0Drop0.25_gelu+PReLU/TOA_anom"


# Define variable names for your test files
In_name = "TS"
Out_name = "TOA_anom"

# Define paths to your out-of-sample 4xCO2 test files
# file_4xCO2_input = "/ocean/projects/ees250004p/ezhu3/data/CESM2/test/test.CESM2-4xCO2.ANN.nc"
# file_4xCO2_output = "/ocean/projects/ees250004p/ezhu3/data/CESM2/test/test.CESM2-4xCO2.zmean.ANN.nc"
file_4xCO2_input = "/ocean/projects/ees250004p/ezhu3/data/CESM1/test/test.4xCO2.ANN.new.nc"
file_4xCO2_output = "/ocean/projects/ees250004p/ezhu3/data/CESM1/test/test.4xCO2.zmean.ANN.new.nc"


# --- 2. Load Normalization Data from the Training Run ---
print("Loading normalization data...")
# **FIX**: Normalization file is in the main model directory
normalization_path = os.path.join(path_model_dir, 'Normalization_zonal.mat')
normalization = sio.loadmat(normalization_path)
X_mean = normalization['X_mean']
X_std = normalization['X_std']
y_mean = normalization['y_mean']
y_std = normalization['y_std']
print("✅ Normalization data loaded successfully.")

# --- 3. Load and Preprocess 4xCO2 Test Data ---
print("\nLoading and preprocessing 4xCO2 test data...")
ds_4xCO2_X = xr.open_dataset(file_4xCO2_input)
ds_4xCO2_y = xr.open_dataset(file_4xCO2_output)

TS_4xCO2_raw = ds_4xCO2_X[In_name]
TOA_4xCO2_truth = ds_4xCO2_y[Out_name].values
lat = ds_4xCO2_X['lat'].values
time_4xCO2 = ds_4xCO2_X['year'].values if 'year' in ds_4xCO2_X else ds_4xCO2_X['time'].values

TS_4xCO2_norm = (TS_4xCO2_raw.values[..., np.newaxis] - X_mean) / X_std
print("✅ Test data preprocessed.")

# --- 4. Prediction Loop with PCA Inverse Transform ---
print("\n--- Running Ensemble Predictions for 4xCO2 ---")
n_folds = 5
predictions_from_folds = []

for fold_no in range(1, n_folds + 1):
    K.clear_session(); gc.collect()
    
    # **FIX**: Construct the full path to the .h5 file and the .pkl file
    model_path = os.path.join(path_model_dir, f'model_fold{fold_no}_ens1.h5')
    pca_path = os.path.join(path_results_dir, f'pca_y_fold{fold_no}.pkl')
    
    print(f"    Loading model: {model_path}")
    print(f"    Loading PCA transformer: {pca_path}")
    
    # Load the trained model from its .h5 file path and the PCA transformer
    model = load_model(model_path, compile=False)
    pca_y = joblib.load(pca_path)
    
    # 1. Predict the PCA components
    pred_4xco2_pca = model.predict(TS_4xCO2_norm)
    print(pred_4xco2_pca.shape)
    
    # 2. Inverse transform from PCA space to normalized data space
    pred_4xco2_norm = pca_y.inverse_transform(pred_4xco2_pca)
    
    # 3. Un-normalize the data
    pred_4xco2_unnorm = pred_4xco2_norm * y_std + y_mean
    
    predictions_from_folds.append(pred_4xco2_unnorm)

# Average predictions across the folds
Model_pred_4xco2 = np.mean(np.stack(predictions_from_folds), axis=0)
print("\n✅ Prediction complete.")


# =================================================================
# Out-of-Sample Analysis and Visualization (No changes needed here)
# =================================================================
print("\n--- Starting Out-of-Sample Analysis ---")

# --- Task 1: Calculate Overall Pattern Correlation ---
print("\n    Calculating Overall Pattern Correlation...")
truth_flat = TOA_4xCO2_truth.flatten()
pred_flat = Model_pred_4xco2.flatten()
pattern_r, _ = pearsonr(truth_flat, pred_flat)
print(f"✅ Overall Pattern Correlation (r) = {pattern_r:.4f}")

# --- Task 2: Plot R-squared as a Function of Latitude ---
print("\n    Calculating and plotting R-squared per latitude...")
r2_by_latitude = [r2_score(TOA_4xCO2_truth[:, i], Model_pred_4xco2[:, i]) for i in range(len(lat))]

plt.figure(figsize=(10, 6))
plt.plot(lat, r2_by_latitude, marker='o', linestyle='-')
plt.title('Out-of-Sample Performance (R²) by Latitude - 4xCO2', fontsize=16)
plt.xlabel('Latitude', fontsize=12)
plt.ylabel('R-squared Score', fontsize=12)
plt.grid(True, linestyle='--'); plt.ylim(0, 1)
plt.show()

# --- Task 3: Plot Truth vs. Prediction as a 2D Contour Map ---
print("\n    Plotting Truth vs. Prediction as contour maps...")

time_axis_for_plot = np.arange(TOA_4xCO2_truth.shape[0])

fig, axes = plt.subplots(1, 2, figsize=(18, 6), sharey=True)
vmax = np.percentile(np.abs(TOA_4xCO2_truth), 98)
vmin = -vmax

axes[0].set_title('Ground Truth TOA Zonal Mean', fontsize=16)
cf1 = axes[0].contourf(time_axis_for_plot, lat, TOA_4xCO2_truth.T, levels=20, cmap='RdBu_r', vmin=vmin, vmax=vmax)
axes[0].set_xlabel('Time (Model Years)', fontsize=12)
axes[0].set_ylabel('Latitude', fontsize=12)

axes[1].set_title('Predicted TOA Zonal Mean', fontsize=16)
cf2 = axes[1].contourf(time_axis_for_plot, lat, Model_pred_4xco2.T, levels=20, cmap='RdBu_r', vmin=vmin, vmax=vmax)
axes[1].set_xlabel('Time (Model Years)', fontsize=12)

fig.colorbar(cf1, ax=axes.ravel().tolist(), shrink=0.8, label='TOA Anomaly (W/m²)')
fig.suptitle("Out-of-Sample Results - 4xCO2 Scenario", fontsize=18, fontweight='bold')
plt.show()

2025-07-24 16:31:58.026375: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-07-24 16:31:58.216031: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-24 16:31:58.254650: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


--- Setting up for Zonal-Mean Out-of-Sample Test (PCA version) ---
Loading normalization data...
✅ Normalization data loaded successfully.

Loading and preprocessing 4xCO2 test data...
✅ Test data preprocessed.

--- Running Ensemble Predictions for 4xCO2 ---
    Loading model: /ocean/projects/ees250004p/ezhu3/data/CESM2/trained_model_2D_PCA/model_fold1_ens1.h5
    Loading PCA transformer: /ocean/projects/ees250004p/ezhu3/data/CESM1/trained_model_2D_PCA/NeuralNet/CNN_Neur32x32_BS32_5foldCV_Reg0Drop0.25_gelu+PReLU/TOA_anom/pca_y_fold1.pkl


2025-07-24 16:32:02.952013: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-07-24 16:32:03.984315: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1616] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 223 MB memory:  -> device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:3a:00.0, compute capability: 7.0
2025-07-24 16:32:14.742291: W tensorflow/core/common_runtime/bfc_allocator.cc:479] Allocator (GPU_0_bfc) ran out of memory trying to allocate 216.00MiB (rounded to 226492416)requested by op model/conv2d/Conv2D
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation

ResourceExhaustedError: Graph execution error:

Detected at node 'model/conv2d/Conv2D' defined at (most recent call last):
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/runpy.py", line 194, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/traitlets/config/application.py", line 992, in launch_instance
      app.start()
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 711, in start
      self.io_loop.start()
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
      self._run_once()
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
      handle._run()
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/asyncio/events.py", line 81, in _run
      self._context.run(self._callback, *self._args)
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell
      await result
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 729, in execute_request
      reply_content = await reply_content
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 411, in do_execute
      res = shell.run_cell(
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 531, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2961, in run_cell
      result = self._run_cell(
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3016, in _run_cell
      result = runner(coro)
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3221, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3400, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3460, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/var/tmp/ipykernel_34548/4228956120.py", line 88, in <module>
      pred_4xco2_pca = model.predict(TS_4xCO2_norm)
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/keras/engine/training.py", line 2253, in predict
      tmp_batch_outputs = self.predict_function(iterator)
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/keras/engine/training.py", line 2041, in predict_function
      return step_function(self, iterator)
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/keras/engine/training.py", line 2027, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/keras/engine/training.py", line 2015, in run_step
      outputs = model.predict_step(data)
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/keras/engine/training.py", line 1983, in predict_step
      return self(x, training=False)
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/keras/engine/training.py", line 557, in __call__
      return super().__call__(*args, **kwargs)
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/keras/engine/base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/keras/engine/functional.py", line 510, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/keras/engine/functional.py", line 667, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/keras/engine/base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/keras/layers/convolutional/base_conv.py", line 283, in call
      outputs = self.convolution_op(inputs, self.kernel)
    File "/jet/home/ezhu3/.conda/envs/tf210/lib/python3.8/site-packages/keras/layers/convolutional/base_conv.py", line 255, in convolution_op
      return tf.nn.convolution(
Node: 'model/conv2d/Conv2D'
OOM when allocating tensor with shape[32,32,192,288] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node model/conv2d/Conv2D}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_predict_function_352]