Regrid CMIP6 raw sea ice concentration files into a dataset more compatible with CMIP6 CALIPSO cloud data

In [None]:
import xarray as xr
import numpy as np
import numpy.ma as ma
from scipy import spatial
import glob
import os

In [None]:
#File pattern for sea ice data
file_pattern = 'many_monthly_sea_ice_conc_files_from_the_same_model*.nc'

# To exclude files that are part of 'file_pattern' if needed
files = sorted([f for f in glob.glob(file_pattern) if 'example_text' not in f])

# Open multiple files and combine along the time dimension
f_selected = xr.open_mfdataset(files, combine='by_coords')
f_selected = f_selected.sel(time=slice('2007-01-01', '2014-12-31'))

# Prepare a list to store processed time slices
processed_data = []

# Loop through the filtered dataset and adjust longitude
for i in range(f_selected.sizes['time']):
    lon = f_selected.lon.values  # 'lon' instead of 'longitude'
    lat = f_selected.lat.values  # 'lat' instead of 'latitude'
    ice = f_selected.isel(time=i).siconc

    # Adjust longitude values to be between -180 and 180
    lon = (lon + 180) % 360 - 180
    f_selected = f_selected.assign_coords(lon=lon).sortby('lon')
    # Mask lat and lon properly with respect to ice data shape
    # Use broadcasting with 2D (lat, lon) meshgrids
    lon_grid, lat_grid = np.meshgrid(lon, lat)

    # Convert the NumPy arrays to xarray.DataArray objects
    lon_da = xr.DataArray(lon_grid, dims=['lat', 'lon'], coords={'lat': lat, 'lon': lon})
    lat_da = xr.DataArray(lat_grid, dims=['lat', 'lon'], coords={'lat': lat, 'lon': lon})

    # Apply the valid mask directly in xarray, and use it for ice concentration
    valid_mask = (lon_da < 1e+36) & (lat_da < 1e+36)

    # Apply the mask to the ice concentration data using xarray
    ice_filtered = ice.where(valid_mask, drop=True)

    # Create a DataArray for the processed time slice
    processed_slice = xr.DataArray(
        data=ice_filtered.values,
        coords={'time': f_selected.time.isel(time=i), 'lat': ice_filtered.lat, 'lon': ice_filtered.lon},
        dims=['lat', 'lon']
    )
    
    # Append the processed slice to the list
    processed_data.append(processed_slice)
    
# Combine all processed time slices into a single dataset
final_ice_data = xr.concat(processed_data, dim='time')

# Save the combined dataset to a new file
output_filename = 'example_regridded_sea_ice_dataset.nc'
final_ice_data.to_netcdf(output_filename)
print('Finish processing:' + output_filename)