Bin NCEP Reanalysis air temperature data by distance from NSIDC sea ice edges in a selected Arctic region

In [29]:
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
from scipy.interpolate import griddata
import regionmask
import glob
import os

In [30]:
# Open NCEP Reanalysis monthly mean air temperature file
f = xr.open_dataset('/home/rcostell/Observation_Files/NCEP_Reanalysis/Air_Temperature/Monthly/air.mon.mean.nc')

# Select data from 2007 to 2014
f = f.sel(time=slice("2007-01-01", "2014-12-31"))

# Decode time coordinate to proper datetime objects
f['time'] = xr.decode_cf(f)['time']

# Resample to monthly means (ensures uniform time spacing)
f = f.resample(time='1M').mean()

# Define latitude range
lat0, lat1 = 60, 90

# Convert longitudes from [0, 360] to [-180, 180]
f.coords['lon'] = (f.coords['lon'] + 180) % 360 - 180

# Sort longitude values in ascending order
sorted_indices = np.argsort(f.lon.values)
f = f.isel(lon=sorted_indices)

# Sort latitude values from south to north
f = f.sortby(f.lat)

f = f.sel(lat=slice(lat0, lat1))

lon_plot = f.lon.values
lat_plot = f.lat.values

  self.index_grouper = pd.Grouper(


In [31]:
# Path and file pattern for NSIDC regridded monthly sea ice concentration (NH)
path = '/home/rcostell/Observation_Files/NSIDC/Regridded_Sea_Ice_Concentration/Monthly/Northern_Hemisphere/'
file_pattern = '*_nh_*regridded*.nc'

# Find and sort all matching sea ice files
files = sorted(glob.glob(path + file_pattern))

# Open multiple files as a single dataset
f0 = xr.open_mfdataset(files, combine='nested', concat_dim='tdim')

# Rename 'tdim' to 'time' and interpolate to match the temperature grid
f0 = f0.rename({'tdim': 'time'}).interp(lat=lat_plot, lon=lon_plot)

# Match time coordinates between the air temperature dataset (f) and sea ice dataset (f0)
tolerance = np.timedelta64(1, 's')  # acceptable time difference
valid_times, ind = [], []

for d in f.time.values:
    if np.isfinite(d):
        match = np.where(np.abs(f0.time.values - d) <= tolerance)[0]
        if match.size > 0:
            valid_times.append(d)
            ind.append(match[0])

# Convert to arrays and subset sea ice and air temp datasets to common times
valid_times = np.array(valid_times)
f0 = f0.isel(time=ind).cdr_seaice_conc_gridded
f = f.sel(time=valid_times)

# Assign sea ice concentration to the temperature dataset
f0['time'] = f.time
f = f.assign(seaice_conc=f0)

f

  f0 = f0.rename({'tdim': 'time'}).interp(lat=lat_plot, lon=lon_plot)


Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,1.37 MiB
Shape,"(13, 144, 96)","(13, 144, 96)"
Dask graph,1 chunks in 37 graph layers,1 chunks in 37 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.37 MiB 1.37 MiB Shape (13, 144, 96) (13, 144, 96) Dask graph 1 chunks in 37 graph layers Data type float64 numpy.ndarray",96  144  13,

Unnamed: 0,Array,Chunk
Bytes,1.37 MiB,1.37 MiB
Shape,"(13, 144, 96)","(13, 144, 96)"
Dask graph,1 chunks in 37 graph layers,1 chunks in 37 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [32]:
f = f.sel(lat=slice(lat0, lat1)).sel(lon=slice(-15, 55))

ice_achv = f.seaice_conc.values
air_achv = f.air.values

lat = f.lat.values
lon = f.lon.values
time = f.time.values 
level = f.level.values
nlat = len(lat)
nlon = len(lon)
ntime = len(time)
nlevel = len(level)

In [33]:
def Binning_distance_from_ice_edge(ice_achv, var_achv, ntime, nlat, nlon, hem, outputcount=False, model=False):
    """
    Bin a 2D or 3D variable (e.g., cloud fraction) by distance from the sea ice edge.

    Parameters
    ----------
    ice_achv : array
        Sea ice concentration field, with time as first or last dimension.
    var_achv : array
        Cloud or other atmospheric variable (time, lat, lon) or (time, height, lat, lon).
    ntime, nlat, nlon : int
        Time, latitude, and longitude sizes.
    hem : str
        'nh' for Northern Hemisphere, 'sh' for Southern Hemisphere.
    outputcount : bool, optional
        If True, returns pixel counts for each bin.
    model : bool, optional
        If True, expects time as first dimension in ice_achv.
    """

    for j in range(ntime):  # Loop over all time steps
        # Select correct time slice depending on input orientation
        if model:
            ice = ice_achv[j, :, :]
        else:
            ice = ice_achv[:, :, j]

        # Extract the variable for the same time
        var = var_achv[j]
        ndim = var.ndim  # 2D = surface data, 3D = vertical levels

        for i in range(nlon):  # Loop through longitude indices
            tmp = ice[:, i]  # Sea ice concentration along one longitude

            # Extract variable values along the same longitude
            if ndim == 3:
                var_tmp = var[:, :, i]
                nh = var_tmp.shape[0]  # Number of height levels
            else:
                var_tmp = var[:, i]

            # Identify ocean vs ice points depending on hemisphere
            if hem == 'nh':
                ind1 = np.where(tmp <= 0.5)[0]  # Ocean
                ind2 = np.where(tmp > 0.5)[0]   # Ice
            else:
                ind1 = np.where(tmp > 0.5)[0]   # Ice
                ind2 = np.where(tmp <= 0.5)[0]  # Ocean

            n1, n2 = ind1.size, ind2.size  # Number of ocean and ice points

            # Initialize arrays for first longitude
            if i == 0:
                tmp0 = np.full((2 * nlat), -999.)
                tmp0[nlat - n1:nlat] = tmp[ind1]       # Ocean side
                tmp0[nlat:nlat + n2] = tmp[ind2]       # Ice side

                count0 = np.full((2 * nlat), 1.)       # Counting mask

                # Create corresponding variable arrays
                if ndim == 3:
                    var_tmp0 = np.full((nh, 2 * nlat), -999.)
                    var_tmp0[:, nlat - n1:nlat] = var_tmp[:, ind1]
                    var_tmp0[:, nlat:nlat + n2] = var_tmp[:, ind2]
                else:
                    var_tmp0 = np.full((2 * nlat), -999.)
                    var_tmp0[nlat - n1:nlat] = var_tmp[ind1]
                    var_tmp0[nlat:nlat + n2] = var_tmp[ind2]

            # Process all additional longitudes and stack
            else:
                tmp1 = np.full((2 * nlat), -999.)
                tmp1[nlat - n1:nlat] = tmp[ind1]
                tmp1[nlat:nlat + n2] = tmp[ind2]

                count1 = np.full((2 * nlat), 1.)

                if ndim == 3:
                    var_tmp1 = np.full((nh, 2 * nlat), -999.)
                    var_tmp1[:, nlat - n1:nlat] = var_tmp[:, ind1]
                    var_tmp1[:, nlat:nlat + n2] = var_tmp[:, ind2]
                else:
                    var_tmp1 = np.full((2 * nlat), -999.)
                    var_tmp1[nlat - n1:nlat] = var_tmp[ind1]
                    var_tmp1[nlat:nlat + n2] = var_tmp[ind2]

                # Stack across longitudes
                tmp0 = np.dstack((tmp0, tmp1))
                count0 = np.dstack((count0, count1))
                var_tmp0 = np.dstack((var_tmp0, var_tmp1))

        # Replace placeholder values with NaN
        var_tmp0[var_tmp0 == -999.] = np.nan
        tmp0[tmp0 == -999.] = np.nan

        # Remove invalid count entries for 2D data
        if ndim == 2:
            count0[np.isnan(var_tmp0)] = 0.

        # Average across longitudes
        count00 = nlon * np.nanmean(count0[0], axis=1)
        tmp00 = np.nanmean(tmp0[0], axis=1)

        if ndim == 3:
            var_tmp00 = np.nanmean(var_tmp0, axis=2)
        else:
            var_tmp00 = np.nanmean(var_tmp0[0], axis=1)

        # Stack across time
        if j == 0:
            count_out = count00
            var_out = var_tmp00
            ice_out = tmp00
        else:
            count_out = np.dstack((count_out, count00))
            var_out = np.dstack((var_out, var_tmp00))
            ice_out = np.dstack((ice_out, tmp00))

    # Final cleanup and output formatting
    count_out = count_out[0]
    ice_out = ice_out[0]
    if ndim != 3:
        var_out = var_out[0]

    # Return outputs depending on requested flags
    if ndim == 3:
        return var_out, ice_out
    else:
        if outputcount:
            return var_out, ice_out, count_out
        else:
            return var_out, ice_out

In [34]:
print("Start extracting")
print(air_achv.shape)

air_out, ice_out = Binning_distance_from_ice_edge(ice_achv, air_achv, ntime, nlat, nlon, 'nh')

print("Binning done")

# --- Convert binned outputs to xarray DataArrays ---
# Define the coordinate system: “distance” from ice edge and “time”.
# The distance coordinate runs from negative (sea-ice side) to positive (open-ocean side) values.
ds_tmp1 = xr.DataArray(ice_out, coords=[np.arange(-nlat + 0.5, nlat + 0.5, 1.), f0.time.values],
                      dims=["distance","time"]).rename('seaice_conc')

ds_tmp2 = xr.DataArray(air_out, coords=[level,np.arange(-nlat + 0.5, nlat + 0.5, 1.), f0.time.values],
                      dims=["level","distance","time"]).rename('air')

# --- Combine all DataArrays into a single dataset ---
ds_out = xr.merge([ds_tmp1, ds_tmp2])

#ds_out.to_netcdf("/home/rcostell/Observation_Files/Binned/Binned_Observed_3D_Ta.nc")
print('NH Job accomplished')

Start extracting
(96, 17, 13, 29)
Binning done
NH Job accomplished


  tmp00 = np.nanmean(tmp0[0], axis=1)
  var_tmp00 = np.nanmean(var_tmp0, axis=2)
