# ENSO_pattern: zonal structure of boreal winter SST anomalies in the equatorial Pacific

Computes the zonal root mean square error (RMSE) of boreal winter (December) sea surface temperature anomalies (SSTA) along the equatorial Pacific (150°E–90°W), averaged meridionally over 5°S–5°N. Observations and model outputs are compared after processing, including smoothing with a 5-month triangular-weighted moving average.

## Datasets Used for SSTA Comparison
The following reference datasets are employed to evaluate the ENSO-related SST patterns in model simulations:

- **TropFlux** (1979–2018): The primary dataset for comparison, providing high-quality estimates of surface fluxes and anomalies designed for tropical ocean and climate studies, combining in-situ measurements with satellite-derived data.
Additional datasets include:

- **20CRv2** (1871–2012): A century-scale reanalysis leveraging surface pressure observations to reconstruct global atmospheric conditions.- **ERA-Interim** (1979–2018): A widely used reanalysis dataset offering high-resolution estimates of atmospheric and surface variables with robust data assimilation techniques.
- **ERSSTv5** (1854–2018): A globally gridded dataset of historical SSTs derived from in-situ measurements and optimised for climate monitoring and ENSO studies.
- **HadISST** (1870–2018): A long-term SST and sea ice dataset combining ship and buoy observations with historical data reconstruction.
- **NCEP2** (1979–2018): The second-generation reanalysis dataset from NCEP, improving upon the earlier version for global atmospheric and oceanic variability analysis.

In [1]:
from esmvalcore.dataset import Dataset

In [2]:
model_datasets = {
"ACCESS-ESM1-5": 
    Dataset(
    short_name='tos',
    project='CMIP6',
    mip="Omon",
    exp="historical",
    ensemble="r1i1p1f1",
    timerange="19790101/20190101",
    dataset="ACCESS-ESM1-5",
    grid="gn"
)}

model_datasets["ACCESS-ESM1-5"].add_supplementary(short_name='areacello', mip='Ofx')

obs_datasets = {
"HadISST": 
    Dataset(
    short_name='tos',
    dataset='HadISST',
    mip="Omon",
    project='OBS',
    type='reanaly',
    tier=2),
"ERSSTv5":
    Dataset(
    short_name='tos',
    dataset='NOAA-ERSSTv5',
    mip="Omon",
    project='OBS6',
    type='reanaly',
    tier=2),
# "ERA-Interim":  #kj13
#     Dataset(
#     short_name='tos',
#     dataset='ERA-Interim',
#     mip="Omon",
#     project='OBS6',
#     type='reanaly',
#     timerange="19790101/20190101",
#     tier=3)
}

In [3]:
model_datasets = {name: dataset.load() for name, dataset in model_datasets.items()}
obs_datasets = {name: dataset.load() for name, dataset in obs_datasets.items()}



In [4]:
from esmvalcore.preprocessor import anomalies
from esmvalcore.preprocessor import area_statistics
# from esmvalcore.preprocessor import climate_statistics
from esmvalcore.preprocessor import rolling_window_statistics
from esmvalcore.preprocessor import convert_units
from esmvalcore.preprocessor import extract_region
from esmvalcore.preprocessor import extract_month
from esmvalcore.preprocessor import regrid
from esmvalcore.preprocessor import detrend
from esmvalcore.preprocessor import meridional_statistics
from esmvalcore.preprocessor import mask_landsea
import iris

import matplotlib.pyplot as plt
import iris.quickplot as qplt
import numpy as np
import scipy.stats


In [5]:
## pattern enso, eq
def sst_enso(cube):
    nino34_latext_region = {"start_longitude": 190., "end_longitude": 240., "start_latitude": -5., "end_latitude": 5.}
    cube = convert_units(cube, units="degC")
    # cube = mask_landsea(cube, mask_out="land") #shp or land fraction
# detrend?
    cube = extract_region(cube, **nino34_latext_region)
    cube = rolling_window_statistics(cube, coordinate='time', operator='mean', window_length=5)
    cube = rolling_window_statistics(cube, coordinate='time', operator='mean', window_length=5)
    cube = area_statistics(cube,operator='mean')
    cube = extract_month(cube,12) # get DEC
    #remove time mean
    cube = anomalies(cube,period='monthly')
    
    return cube

def sst_eq(cube):
    region = {"start_longitude": 150., "end_longitude": 270., "start_latitude": -5., "end_latitude": 5.}
    cube = regrid(cube, target_grid="1x1", scheme="linear")
    cube = convert_units(cube, units="degC")
    # cube = mask_landsea(cube, mask_out="land")
    cube = extract_region(cube, **region)
    cube = rolling_window_statistics(cube, coordinate='time', operator='mean', window_length=5)
    cube = rolling_window_statistics(cube, coordinate='time', operator='mean', window_length=5)
    cube = extract_month(cube,12) # get DEC
# remove time mean
    cube = anomalies(cube, period='monthly')
    cube = meridional_statistics(cube, 'mean')

    return cube

#linear regression of sst_enso on sst_eq
def lin_regress(cube_ssta, cube_nino34): #1d 
    slope_ls = []
    for lon_slice in cube_ssta.slices(['time']): # iterate over 120 lon points
        res = scipy.stats.linregress(cube_nino34.data, lon_slice.data)
        # res = scipy.stats.linregress(lon_slice.data, cube_nino34.data)
        slope_ls.append(res[0])

    return cube_ssta.coord('longitude').points, slope_ls

# rmse = np.sqrt(np.mean((obs_regressed - model_regressed) ** 2))

In [6]:
def sst_eq2(cube):
    region = {"start_longitude": 150., "end_longitude": 270., "start_latitude": -15., "end_latitude": 15.}
    cube = regrid(cube, target_grid="1x1", scheme="linear")
    cube = convert_units(cube, units="degC")

    cube = extract_region(cube, **region)
    cube = rolling_window_statistics(cube, coordinate='time', operator='mean', window_length=5)
    cube = rolling_window_statistics(cube, coordinate='time', operator='mean', window_length=5)
    cube = extract_month(cube,12) # get DEC

    cube = anomalies(cube, period='monthly')
    return cube
    
# iterate over lat/lon for 2d
def lin_regress_2(cube_ssta, cube_nino34): # cube_ssta(no meridional_statistics)
    slope_ls = []
    ## flatten and reshape
    for lonlat_slice in cube_ssta.slices(['time']):
        res = scipy.stats.linregress(cube_nino34.data, lonlat_slice.data)
        slope_ls.append(res[0])
    
    slope_array = np.array(slope_ls)
    ssta_reg = slope_array.reshape(cube_ssta.shape[1],cube_ssta.shape[2])
    cube = iris.cube.Cube(ssta_reg, long_name='regression ENSO SSTA',
                          dim_coords_and_dims=[(cube_ssta.coord('latitude'),0),
                                               (cube_ssta.coord('longitude'),1)])

    return cube

In [7]:
model_datasets_prep1 = {name: sst_enso(dataset) for name, dataset in model_datasets.items()}
model_datasets_prep2 = {name: sst_eq(dataset) for name, dataset in model_datasets.items()}
model_datasets_prep3 = {name: sst_eq2(dataset) for name, dataset in model_datasets.items()}

obs_datasets_prep1 = {name: sst_enso(dataset) for name, dataset in obs_datasets.items()}
obs_datasets_prep2 = {name: sst_eq(dataset) for name, dataset in obs_datasets.items()}
obs_datasets_prep3 = {name: sst_eq2(dataset) for name, dataset in obs_datasets.items()}



# Diagnostic Level 1

The first level shows the diagnostic used to compute the metric and highlight the main differences between the model and the reference.

In [None]:
## linear regression sst_eq on sst_enso

def format_longitude(x, pos):
    if x > 180:
        return f'{int(360 - x)}°W'
    elif x == 180:
        return f'{int(x)}°'
    else:
        return f'{int(x)}°E'


reg_mod = lin_regress(model_datasets_prep2["ACCESS-ESM1-5"], model_datasets_prep1["ACCESS-ESM1-5"])

# return slope data to longitude - array?
plt.plot(reg_mod[0], reg_mod[1], label="ACCESS-ESM1-5") #units

plt.gca().xaxis.set_major_formatter(plt.FuncFormatter(format_longitude))

reg = lin_regress(obs_datasets_prep2["HadISST"], obs_datasets_prep1["HadISST"])
plt.plot(reg[0],reg[1], color='black',label='ref: HadISST')

plt.yticks(np.arange(-2,3, step=1))
plt.axhline(y=0, color='black', linewidth=1)
plt.ylabel("reg(ENSO SSTA, SSTA)")
plt.title('ENSO pattern') #
plt.legend()
plt.grid(linestyle='--')

rmse = np.sqrt(np.mean((np.array(reg[1]) - np.array(reg_mod[1])) ** 2)) #metric

plt.text(0.5, 0.95, f"RMSE: {rmse:.2f} ", fontsize=12, ha='center', transform=plt.gca().transAxes,
        bbox=dict(facecolor='white', alpha=0.8, edgecolor='none'))

**Figure 1**: Zonal structure of sea surface temperature anomalies (SSTA) in the equatorial Pacific (averaged between 5°S and 5°N). The figure highlights the zonal distribution of SSTA associated with ENSO, which is typically overestimated west of the dateline; in this case, the anomalies are too strong in the central Pacific. The black curve represents the reference data, while the blue curve corresponds to the model output. The derived metric is the zonal root mean square error (RMSE) between the model and reference curves.

## Diagnostic Level 2

The second level shows the broader picture to better understand the spatial pattern of ENSO: the map of the anomalies in the equatorial Pacific.


In [None]:
reg2_mod = lin_regress_2(model_datasets_prep3["ACCESS-ESM1-5"], model_datasets_prep1["ACCESS-ESM1-5"])
reg2_obs = lin_regress_2(obs_datasets_prep3["HadISST"], obs_datasets_prep1["HadISST"])
#make dict process
process = {"ACCESS-ESM1-5":reg2_mod , "HadISST":reg2_obs} 

In [None]:
# 
import iris.plot as iplt
import matplotlib.colors as mcolors
import cartopy.feature as cfeature
import cartopy.crs as ccrs

fig = plt.figure(figsize=(20, 7))
proj = ccrs.Orthographic(central_longitude=210.0)
i =121
# process = {**model_datasets_prep3, **obs_datasets_prep3}


for label, cube in process.items():
    
    ax1 = plt.subplot(i,projection=proj)
    ax1.add_feature(cfeature.LAND, facecolor='gray')  # Add land feature with gray color
    ax1.coastlines()
    cf1 = iplt.contourf(cube, levels=np.arange(-1.5,2,0.1), cmap='RdBu_r')
    # cf1 = plt.contourf(reg2[0],reg2[1],reg2[2], cmap='RdBu_r', levels=np.arange(0,2,0.1))

    ax1.set_extent([130, 290, -20, 20], crs=ccrs.PlateCarree())
    ax1.set_title(label)

    # Add gridlines for latitude and longitude
    gl1 = ax1.gridlines(draw_labels=True, linestyle='--')
    gl1.top_labels = False
    gl1.right_labels = False

    i+=1


# Add a single colorbar at the bottom
cax = plt.axes([0.15,0.08,0.7,0.05])
cbar = fig.colorbar(cf1, cax=cax, orientation='horizontal', extend='both', ticks=np.arange(-2,2.5,0.5))
cbar.set_label('regression(ENSO SSTA, SSTA) (°C/°C)')


Figure 2: sea surface temperature anomalies (SSTA) associated with ENSO in the equatorial Pacific, showing usually the SSTA too far west (here too strong in the central Pacific). The left and right maps show respectively the reference and the model.

In [None]:
import accessvis
import numpy as np
import numpy.ma as ma
import matplotlib.pyplot as plt
import os
import pandas as pd
import os
import xarray as xr
import lavavu

import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib.colors as mcolors
import cmocean
from skimage.transform import resize

from tqdm import tqdm
from scipy.ndimage import gaussian_filter

In [None]:
obs_data = reg2_obs
model_data = reg2_mod

def generate_rgba(data, cmap, vmin, vmax):
    data = model_data.data  # 2D numpy array
    lon = model_data.coord('longitude').points
    lat = model_data.coord('latitude').points
    
    lon2d, lat2d = np.meshgrid(lon, lat)
    
    # cmap = cmocean.cm.balance
    norm = mcolors.Normalize(vmin=vmin, vmax=vmax)
    
    fig, ax = plt.subplots(figsize=(6, 3), dpi=100)
    cf = ax.contourf(lon2d, lat2d, data, cmap=cmap, norm=norm, levels=20)
    ax.axis('off')
    fig.subplots_adjust(left=0, right=1, top=1, bottom=0)

    fig.canvas.draw()
    rgba = np.frombuffer(fig.canvas.tostring_argb(), dtype=np.uint8)
    rgba = rgba.reshape(fig.canvas.get_width_height()[::-1] + (4,))  # (H, W, 4)

    rgba = rgba[:, :, [1, 2, 3, 0]]
    plt.close(fig)
    return rgba

def resize_rgba(data, width, height):
    #If the original image is of type uint8, it needs to be converted to float32 before resizing.
    rgba_float = rgba.astype(np.float32) / 255.0

    # Resize the image (while preserving the number of channels).
    rgba_resized = resize(rgba_float, (width, height, 4), preserve_range=True, anti_aliasing=True)
    
    # change back to uint8
    rgba_resized = np.clip(rgba_resized * 255, 0, 255).astype(np.uint8)

    return rgba_resized

def pad_rgba(data, pad_width,pad_height,pad_depth=None,constant_values=255):
    if pad_depth:
        padded_rgba = np.pad(
                    data,
                    pad_width=(pad_width, pad_height, pad_depth),
                    mode='constant',
                    constant_values=constant_values
        )
    else:
         padded_rgba = np.pad(
                    data,
                    pad_width=(pad_width, pad_height), 
                    mode='constant',
                    constant_values=constant_values
        )
    return padded_rgba

def normalise_array(values, minimum=None, maximum=None):
    """
    Normalize an array to the range [0,1]

    Parameters
    ----------
    values : numpy.ndarray
        Values to convert, numpy array
    minimum: number
        Use a fixed minimum bound, default is to use the data minimum
    maximum: number
        Use a fixed maximum bound, default is to use the data maximum
    """

    # Ignore nan when getting min/max
    if not minimum:
        minimum = np.nanmin(values)
    if not maximum:
        maximum = np.nanmax(values)

    # Normalise
    array = (values - minimum) / (maximum - minimum)
    # Clip out of [0,1] range - in case defined range is not the global minima/maxima
    array = np.clip(array, 0, 1)

    return array

def opacity_rgba(padded_array, opacity_array):
    array = normalise_array(opacity_array)
    oarray = array
    oarray = np.nan_to_num(oarray)
    oarray = (oarray * 255).astype(np.uint8)
    padded_array[::, ::, 3] = oarray
    return padded_array

In [None]:
cmap = plt.get_cmap('RdBu_r')

rgba=generate_rgba(model_data.data, cmap = cmap, vmin=0, vmax=64)
resized_rgba=resize_rgba(rgba,width=150,height=600)
padded_rgba=pad_rgba(resized_rgba,pad_width=(375, 375),pad_height=(750, 450),pad_depth=(0, 0))
opacity_array = resize(model_data.data, (150, 600), order=1, preserve_range=True, anti_aliasing=True)
padded_opacity_array=pad_rgba(opacity_array,pad_width=(375, 375),pad_height=(750, 450),constant_values=0)
opacitied_rgba=opacity_rgba(padded_rgba, padded_opacity_array)

lv = accessvis.plot_earth(texture='bluemarble', background="white", vertical_exaggeration=20)
lv.rotation(15.0, -180.0, 0.0) #Rotate to Australia
lv.set_properties(diffuse=0.8, ambient=0.1, specular=0.35, shininess=0.03, light=[1,1,0.98]) # make pretty
lv.brightness_contrast_saturation(0.5, 0.5, 0.65)
accessvis.update_earth_values(lv, dataMode=0, data=opacitied_rgba)

lv.window(resolution=(700,700))