In [2]:
import xarray as xr
e1set1 = xr.open_dataset('b.e21.BHISTcmip6.f09_g17.LE2-1001.001.pop.h.SST.185001-185912.nc')

# Print the dataset summary to see the variables, dimensions, etc.
#print(e1set1)

# Print the first few lines of the dataset
print(e1set1.isel(time=slice(0, 5)))

<xarray.Dataset>
Dimensions:                 (nlat: 384, nlon: 320, time: 5, z_t: 1, z_w: 60,
                             lat_aux_grid: 395, moc_comp: 3, moc_z: 61, d2: 2,
                             transport_comp: 5, transport_reg: 2, z_t_150m: 15,
                             z_w_bot: 60, z_w_top: 60)
Coordinates:
    TLAT                    (nlat, nlon) float64 ...
    TLONG                   (nlat, nlon) float64 ...
    ULAT                    (nlat, nlon) float64 ...
    ULONG                   (nlat, nlon) float64 ...
  * lat_aux_grid            (lat_aux_grid) float32 -79.49 -78.95 ... 89.47 90.0
  * moc_z                   (moc_z) float32 0.0 1e+03 2e+03 ... 5.25e+05 5.5e+05
  * time                    (time) object 1850-02-01 00:00:00 ... 1850-06-01 ...
  * z_t                     (z_t) float32 500.0
  * z_t_150m                (z_t_150m) float32 500.0 1.5e+03 ... 1.45e+04
  * z_w                     (z_w) float32 0.0 1e+03 2e+03 ... 5e+05 5.25e+05
  * z_w_bot               

In [3]:
import numpy as np
import pandas as pd
import xarray as xr

# Function to calculate the UCEI
def calculate_ucei(data):
    real_part = data['Nino3'] + data['Nino4']
    imag_part = data['Nino3'] - data['Nino4']
    r = np.sqrt(real_part**2 + imag_part**2)
    
    # Calculating theta based on conditions 
    theta = np.zeros_like(real_part)
    for i in range(len(theta)):
        if real_part[i] > 0:
            theta[i] = np.arctan((data['Nino3'][i] - data['Nino4'][i]) / (data['Nino3'][i] + data['Nino4'][i]))
        elif real_part[i] < 0 and imag_part[i] > 0:
            theta[i] = np.arctan2((data['Nino3'][i] - data['Nino4'][i]), (data['Nino3'][i] + data['Nino4'][i])) + np.pi
        elif real_part[i] < 0 and imag_part[i] < 0:
            theta[i] = np.arctan2((data['Nino3'][i] - data['Nino4'][i]), (data['Nino3'][i] + data['Nino4'][i])) - np.pi
    
    ucei = r * np.exp(1j * theta)
    
    # Adding r and theta to the DataFrame
    data['r'] = r
    data['theta'] = theta
    
    return ucei, r, theta

# Function to classify El Niño events
def classify_el_nino(data):
    ucei, r, theta = calculate_ucei(data)
    
    # Defining El Niño types based on theta
    data['El_Niño_Type'] = np.where((theta > np.radians(15)) & (theta < np.radians(90)), 'EP-type',
                                    np.where((theta > np.radians(-90)) & (theta < np.radians(-15)), 'CP-type',
                                             np.where((theta > np.radians(-15)) & (theta < np.radians(15)), 'Mixed-type', 'None')))
    
    return data

# List of model files
model_files = [
    'b.e21.BHISTcmip6.f09_g17.LE2-1001.001.pop.h.SST.185001-185912.nc',
    'b.e21.BHISTcmip6.f09_g17.LE2-1001.001.pop.h.SST.186001-186912.nc',
    'b.e21.BHISTcmip6.f09_g17.LE2-1001.001.pop.h.SST.187001-187912.nc',
    'b.e21.BHISTcmip6.f09_g17.LE2-1001.001.pop.h.SST.188001-188912.nc',
    'b.e21.BHISTcmip6.f09_g17.LE2-1001.001.pop.h.SST.189001-189912.nc',
    'b.e21.BHISTcmip6.f09_g17.LE2-1001.001.pop.h.SST.190001-190912.nc',
    'b.e21.BHISTcmip6.f09_g17.LE2-1001.001.pop.h.SST.191001-191912.nc',
    'b.e21.BHISTcmip6.f09_g17.LE2-1001.001.pop.h.SST.192001-192912.nc',
    'b.e21.BHISTcmip6.f09_g17.LE2-1001.001.pop.h.SST.193001-193912.nc',
    'b.e21.BHISTcmip6.f09_g17.LE2-1001.001.pop.h.SST.194001-194912.nc',
    'b.e21.BHISTcmip6.f09_g17.LE2-1001.001.pop.h.SST.195001-195912.nc',
    'b.e21.BHISTcmip6.f09_g17.LE2-1001.001.pop.h.SST.196001-196912.nc',
    'b.e21.BHISTcmip6.f09_g17.LE2-1001.001.pop.h.SST.197001-197912.nc',
    'b.e21.BHISTcmip6.f09_g17.LE2-1001.001.pop.h.SST.198001-198912.nc',
    'b.e21.BHISTcmip6.f09_g17.LE2-1001.001.pop.h.SST.199001-199912.nc',
    'b.e21.BHISTcmip6.f09_g17.LE2-1001.001.pop.h.SST.200001-200912.nc',
    'b.e21.BHISTcmip6.f09_g17.LE2-1001.001.pop.h.SST.201001-201412.nc'
]

# Process each model file
for file in model_files:
    model_sst_data = xr.open_dataset(file, chunks={'time': 10})
    
    # Check if the dataset has valid latitude and longitude coordinates
    if 'TLAT' in model_sst_data.variables and 'TLONG' in model_sst_data.variables:
        lat_coord = 'TLAT'
        lon_coord = 'TLONG'
    else:
        raise KeyError("The dataset does not have valid latitude and longitude coordinates.")
    
    # Longitude slicing for the Nino3 and Nino4 regions
    model_nino3_region = model_sst_data.where((model_sst_data[lat_coord] >= -5) & (model_sst_data[lat_coord] <= 5) &
                                              (model_sst_data[lon_coord] >= 210) & (model_sst_data[lon_coord] <= 270), drop=True)
    
    model_nino4_region_1 = model_sst_data.where((model_sst_data[lat_coord] >= -5) & (model_sst_data[lat_coord] <= 5) &
                                                (model_sst_data[lon_coord] >= 160) & (model_sst_data[lon_coord] <= 210), drop=True)
    
    model_nino4_region_2 = model_sst_data.where((model_sst_data[lat_coord] >= -5) & (model_sst_data[lat_coord] <= 5) &
                                                ((model_sst_data[lon_coord] >= -180) & (model_sst_data[lon_coord] <= -150)), drop=True)
    
    model_nino4_region_2 = model_nino4_region_2.assign_coords({lon_coord: (model_nino4_region_2[lon_coord] + 360) % 360})
    
    model_nino4_region = xr.concat([model_nino4_region_1, model_nino4_region_2], dim=lon_coord)

    # Calculate the mean SST for Nino3 and Nino4 regions
    model_nino3_index = model_nino3_region['SST'].mean(dim=[lat_coord, lon_coord])
    model_nino4_index = model_nino4_region['SST'].mean(dim=[lat_coord, lon_coord])

    # Create a DataFrame with the extracted indices
    model_data = pd.DataFrame({
        'Date': pd.to_datetime(model_nino3_index['time'].values),
        'Nino3': model_nino3_index.values,
        'Nino4': model_nino4_index.values
    })

    # Handle missing values using interpolation
    model_data['Nino3'].interpolate(method='linear', inplace=True)
    model_data['Nino4'].interpolate(method='linear', inplace=True)

    # Normalize Nino3 and Nino4 using the equation X' = (X - mean) / stdv
    model_data['Nino3'] = (model_data['Nino3'] - model_data['Nino3'].mean()) / model_data['Nino3'].std()
    model_data['Nino4'] = (model_data['Nino4'] - model_data['Nino4'].mean()) / model_data['Nino4'].std()

    # Classify El Niño events
    model_data = classify_el_nino(model_data)
    
    # Display the classified El Niño events
    print(f"Results for {file}:")
    print(model_data[['Date', 'El_Niño_Type']])

KeyError: 'Indexing with a boolean dask array is not allowed. This will result in a dask array of unknown shape. Such arrays are unsupported by Xarray.Please compute the indexer first using .compute()'