In [2]:
#!/usr/bin/env python3
"""
Regrid multiple ocean datasets to a common 0.10° spatial resolution.

This script processes:
- AQUA/MODIS Chlor_a (0.04° → 0.10°) - upscaling using coarsen
- OSTIA SST (0.05° → 0.10°) - upscaling using coarsen  
- SMAP SSS (0.25° → 0.10°) - downscaling using interpolation

All datasets should be monthly data from 2015-05 to 2025-05 on the same spatial bounds.
"""

import xarray as xr
import numpy as np
import warnings
warnings.filterwarnings("ignore")

# Configuration
TARGET_RESOLUTION = 0.10  # degrees

# File paths
file_paths = {
    'chlor_a': 'study_data/CHL_MO_subset_combined.nc4',           # 0.04° resolution
    'sst': 'study_data/OSTIA_MO_subset_combined.nc',             # 0.05° resolution
    'sss': 'study_data/SMAP_MO_subset_combined_OISSSfilled.nc4'  # 0.25° resolution
}

# Output file paths
output_paths = {
    'chlor_a': 'chlor_a_010deg.nc4',
    'sst': 'sst_010deg.nc4', 
    'sss': 'sss_010deg.nc4'
}

def get_grid_resolution(dataset):
    """Calculate the grid resolution of a dataset."""
    # Handle different longitude coordinate names
    if 'longitude' in dataset.coords:
        lon_coord = 'longitude'
    elif 'lon' in dataset.coords:
        lon_coord = 'lon'
    else:
        raise ValueError("No longitude coordinate found. Expected 'longitude' or 'lon'")
    
    coords = dataset[lon_coord].values
    return float(coords[1] - coords[0])

def coarsen_dataset(dataset, input_res, target_res, data_vars=None):
    """
    Coarsen (reduce resolution) of a dataset using xarray.coarsen().
    
    Parameters:
    -----------
    dataset : xarray.Dataset
        Input dataset to coarsen
    input_res : float
        Input grid resolution in degrees
    target_res : float  
        Target grid resolution in degrees
    data_vars : list, optional
        List of data variables to process. If None, processes all data variables.
    
    Returns:
    --------
    xarray.Dataset
        Coarsened dataset
    """
    # Calculate coarsening weight
    weight = int(np.ceil(target_res / input_res))
    
    print(f"  Coarsening with weight: {weight}")
    print(f"  From {input_res:.4f}° to ~{input_res * weight:.4f}°")
    
    # Handle different coordinate names
    lon_coord = 'longitude' if 'longitude' in dataset.coords else 'lon'
    lat_coord = 'latitude' if 'latitude' in dataset.coords else 'lat'
    
    # Apply coarsening to longitude and latitude dimensions
    coarsened = dataset.coarsen(**{lon_coord: weight}, boundary="pad").mean() \
                      .coarsen(**{lat_coord: weight}, boundary="pad").mean()
    
    return coarsened

def interpolate_dataset(dataset, input_res, target_res, data_vars=None):
    """
    Interpolate (increase resolution) of a dataset using xarray.interp().
    
    Parameters:
    -----------
    dataset : xarray.Dataset
        Input dataset to interpolate
    input_res : float
        Input grid resolution in degrees
    target_res : float
        Target grid resolution in degrees
    data_vars : list, optional
        List of data variables to process. If None, processes all data variables.
    
    Returns:
    --------
    xarray.Dataset
        Interpolated dataset
    """
    # Calculate increase factor
    increase_factor = int(input_res / target_res)
    
    print(f"  Interpolating with factor: {increase_factor}")
    print(f"  From {input_res:.4f}° to {target_res:.4f}°")
    
    # Handle different coordinate names
    lon_coord = 'longitude' if 'longitude' in dataset.coords else 'lon'
    lat_coord = 'latitude' if 'latitude' in dataset.coords else 'lat'
    
    # Create new coordinate arrays
    new_lon = np.linspace(
        dataset[lon_coord][0], 
        dataset[lon_coord][-1], 
        dataset.dims[lon_coord] * increase_factor
    )
    new_lat = np.linspace(
        dataset[lat_coord][0], 
        dataset[lat_coord][-1], 
        dataset.dims[lat_coord] * increase_factor
    )
    
    # Perform interpolation using proper coordinate names
    interp_coords = {lon_coord: new_lon, lat_coord: new_lat}
    interpolated = dataset.interp(**interp_coords)
    
    return interpolated

def validate_regridding(original, regridded, dataset_name):
    """Validate the regridding results."""
    print(f"\n--- Validation for {dataset_name} ---")
    
    # Handle different coordinate names
    lon_coord = 'longitude' if 'longitude' in regridded.coords else 'lon'
    lat_coord = 'latitude' if 'latitude' in regridded.coords else 'lat'
    
    # Check resolution
    original_res = get_grid_resolution(original)
    new_res = get_grid_resolution(regridded)
    
    print(f"Original resolution: {original_res:.6f}°")
    print(f"New resolution: {new_res:.6f}°")
    print(f"Target resolution: {TARGET_RESOLUTION}°")
    print(f"Resolution error: {abs(new_res - TARGET_RESOLUTION):.6f}°")
    
    # Check dimensions
    print(f"Original shape: {dict(original.dims)}")
    print(f"New shape: {dict(regridded.dims)}")
    
    # Check coordinate ranges
    print(f"Longitude range: [{regridded[lon_coord].min().values:.3f}, {regridded[lon_coord].max().values:.3f}]")
    print(f"Latitude range: [{regridded[lat_coord].min().values:.3f}, {regridded[lat_coord].max().values:.3f}]")
    
    # Check time dimension (should be unchanged)
    if 'time' in original.dims and 'time' in regridded.dims:
        print(f"Time dimension: {original.dims['time']} → {regridded.dims['time']}")
        print(f"Time range: {regridded.time.min().values} to {regridded.time.max().values}")

def process_dataset(file_path, dataset_name, target_resolution):
    """Process a single dataset file."""
    print(f"\n{'='*50}")
    print(f"Processing {dataset_name.upper()}")
    print(f"{'='*50}")
    
    # Load dataset
    print("Loading dataset...")
    try:
        ds = xr.open_dataset(file_path)
        print(f"Successfully loaded: {file_path}")
        print(f"Dataset dimensions: {dict(ds.dims)}")
        print(f"Data variables: {list(ds.data_vars)}")
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return None
    
    # Get current resolution
    current_res = get_grid_resolution(ds)
    print(f"Current resolution: {current_res:.6f}°")
    print(f"Target resolution: {target_resolution}°")
    
    # Determine if we need to coarsen or interpolate
    if current_res < target_resolution:
        print("→ Upscaling (coarsening) required")
        regridded = coarsen_dataset(ds, current_res, target_resolution)
    elif current_res > target_resolution:
        print("→ Downscaling (interpolation) required")
        regridded = interpolate_dataset(ds, current_res, target_resolution)
    else:
        print("→ No regridding needed, resolution already matches target")
        regridded = ds
    
    # Validate results
    validate_regridding(ds, regridded, dataset_name)
    
    # Close original dataset
    ds.close()
    
    return regridded

def main():
    """Main processing function."""
    print("Starting regridding process...")
    print(f"Target resolution: {TARGET_RESOLUTION}°")
    
    # Process each dataset
    regridded_datasets = {}
    
    for dataset_name, file_path in file_paths.items():
        regridded = process_dataset(file_path, dataset_name, TARGET_RESOLUTION)
        if regridded is not None:
            regridded_datasets[dataset_name] = regridded
    
    # Save regridded datasets
    print(f"\n{'='*50}")
    print("SAVING REGRIDDED DATASETS")
    print(f"{'='*50}")
    
    for dataset_name, regridded in regridded_datasets.items():
        output_file = output_paths[dataset_name]
        print(f"Saving {dataset_name} to {output_file}...")
        
        try:
            # Add attributes to document the regridding
            regridded.attrs['regridding_info'] = f'Regridded to {TARGET_RESOLUTION}° resolution'
            regridded.attrs['regridding_method'] = 'xarray coarsen/interp'
            regridded.attrs['processing_date'] = str(np.datetime64('now'))
            
            # Save with compression
            encoding = {}
            for var in regridded.data_vars:
                encoding[var] = {'zlib': True, 'complevel': 4}
            
            regridded.to_netcdf(output_file, encoding=encoding)
            print(f"✓ Successfully saved {output_file}")
            
            # Print file info
            file_size = regridded.nbytes / (1024**2)  # MB
            print(f"  File size: ~{file_size:.1f} MB")
            
        except Exception as e:
            print(f"✗ Error saving {output_file}: {e}")
        
        # Close dataset
        regridded.close()
    
    print(f"\n{'='*50}")
    print("REGRIDDING COMPLETE!")
    print(f"{'='*50}")
    print("All datasets have been regridded to 0.10° resolution.")
    print("You can now use these files for comparative analysis.")

if __name__ == "__main__":
    # Example of how to customize for specific variable names
    # Uncomment and modify if your datasets have different variable names
    
    # For MODIS Chlor_a - common variable names: 'chlor_a', 'CHL', 'chl'
    # For OSTIA SST - common variable names: 'analysed_sst', 'sst', 'SST'  
    # For SMAP SSS - common variable names: 'sss', 'SSS', 'salinity'
    
    main()

    # Optional: Quick validation plot (uncomment if you want visual validation)
    """
    import matplotlib.pyplot as plt
    
    # Load one of the regridded files for quick visual check
    test_file = output_paths['sst']  # or whichever you want to check
    ds_test = xr.open_dataset(test_file)
    
    # Plot first time step
    plt.figure(figsize=(12, 5))
    ds_test.isel(time=0).plot()
    plt.title(f'Regridded data - First time step')
    plt.show()
    
    ds_test.close()
    """

Starting regridding process...
Target resolution: 0.1°

Processing CHLOR_A
Loading dataset...
Successfully loaded: study_data/CHL_MO_subset_combined.nc4
Dataset dimensions: {'time': 121, 'lat': 2040, 'lon': 3120, 'rgb': 3, 'eightbitcolor': 256}
Data variables: ['chlor_a', 'palette']
Current resolution: 0.041672°
Target resolution: 0.1°
→ Upscaling (coarsening) required
  Coarsening with weight: 3
  From 0.0417° to ~0.1250°

--- Validation for chlor_a ---
Original resolution: 0.041672°
New resolution: 0.125000°
Target resolution: 0.1°
Resolution error: 0.025000°
Original shape: {'time': 121, 'lat': 2040, 'lon': 3120, 'rgb': 3, 'eightbitcolor': 256}
New shape: {'time': 121, 'lat': 680, 'lon': 1040, 'rgb': 3, 'eightbitcolor': 256}
Longitude range: [-179.938, -50.062]
Latitude range: [-4.938, 79.938]
Time dimension: 121 → 121
Time range: 2015-05-01T00:00:00.000000000 to 2025-05-01T00:00:00.000000000

Processing SST
Loading dataset...
Successfully loaded: study_data/OSTIA_MO_subset_combined

In [3]:
#!/usr/bin/env python3
"""
Regrid multiple ocean datasets to a common 0.10° spatial resolution.

This script processes:
- AQUA/MODIS Chlor_a (0.04° → 0.10°) - upscaling using coarsen
- OSTIA SST (0.05° → 0.10°) - upscaling using coarsen  
- SMAP SSS (0.25° → 0.10°) - downscaling using interpolation

All datasets should be monthly data from 2015-05 to 2025-05 on the same spatial bounds.
"""

import xarray as xr
import numpy as np
import warnings
warnings.filterwarnings("ignore")

# Configuration
TARGET_RESOLUTION = 0.10  # degrees

# File paths
file_paths = {
    'chlor_a': 'study_data/CHL_MO_subset_combined.nc4',           # 0.04° resolution
    'sst': 'study_data/OSTIA_MO_subset_combined.nc',             # 0.05° resolution
    'sss': 'study_data/SMAP_MO_subset_combined_OISSSfilled.nc4'  # 0.25° resolution
}

# Output file paths
output_paths = {
    'chlor_a': 'chlor_a_010deg.nc4',
    'sst': 'sst_010deg.nc4', 
    'sss': 'sss_010deg.nc4'
}

def get_grid_resolution(dataset):
    """Calculate the grid resolution of a dataset."""
    # Handle different longitude coordinate names
    if 'longitude' in dataset.coords:
        lon_coord = 'longitude'
    elif 'lon' in dataset.coords:
        lon_coord = 'lon'
    else:
        raise ValueError("No longitude coordinate found. Expected 'longitude' or 'lon'")
    
    coords = dataset[lon_coord].values
    return float(coords[1] - coords[0])

def coarsen_dataset(dataset, input_res, target_res, data_vars=None):
    """
    Coarsen (reduce resolution) of a dataset using xarray.coarsen().
    
    Parameters:
    -----------
    dataset : xarray.Dataset
        Input dataset to coarsen
    input_res : float
        Input grid resolution in degrees
    target_res : float  
        Target grid resolution in degrees
    data_vars : list, optional
        List of data variables to process. If None, processes all data variables.
    
    Returns:
    --------
    xarray.Dataset
        Coarsened dataset
    """
    # Calculate coarsening weight
    weight = int(np.ceil(target_res / input_res))
    
    print(f"  Coarsening with weight: {weight}")
    print(f"  From {input_res:.4f}° to ~{input_res * weight:.4f}°")
    
    # Handle different coordinate names
    lon_coord = 'longitude' if 'longitude' in dataset.coords else 'lon'
    lat_coord = 'latitude' if 'latitude' in dataset.coords else 'lat'
    
    # Apply coarsening to longitude and latitude dimensions
    coarsened = dataset.coarsen(**{lon_coord: weight}, boundary="pad").mean() \
                      .coarsen(**{lat_coord: weight}, boundary="pad").mean()
    
    return coarsened

def interpolate_dataset(dataset, input_res, target_res, data_vars=None):
    """
    Interpolate (increase resolution) of a dataset using xarray.interp().
    
    Parameters:
    -----------
    dataset : xarray.Dataset
        Input dataset to interpolate
    input_res : float
        Input grid resolution in degrees
    target_res : float
        Target grid resolution in degrees
    data_vars : list, optional
        List of data variables to process. If None, processes all data variables.
    
    Returns:
    --------
    xarray.Dataset
        Interpolated dataset
    """
    # Calculate increase factor
    increase_factor = int(input_res / target_res)
    
    print(f"  Interpolating with factor: {increase_factor}")
    print(f"  From {input_res:.4f}° to {target_res:.4f}°")
    
    # Handle different coordinate names
    lon_coord = 'longitude' if 'longitude' in dataset.coords else 'lon'
    lat_coord = 'latitude' if 'latitude' in dataset.coords else 'lat'
    
    # Create new coordinate arrays
    new_lon = np.linspace(
        dataset[lon_coord][0], 
        dataset[lon_coord][-1], 
        dataset.dims[lon_coord] * increase_factor
    )
    new_lat = np.linspace(
        dataset[lat_coord][0], 
        dataset[lat_coord][-1], 
        dataset.dims[lat_coord] * increase_factor
    )
    
    # Perform interpolation using proper coordinate names
    interp_coords = {lon_coord: new_lon, lat_coord: new_lat}
    interpolated = dataset.interp(**interp_coords)
    
    return interpolated

def validate_regridding(original, regridded, dataset_name):
    """Validate the regridding results."""
    print(f"\n--- Validation for {dataset_name} ---")
    
    # Handle different coordinate names
    lon_coord = 'longitude' if 'longitude' in regridded.coords else 'lon'
    lat_coord = 'latitude' if 'latitude' in regridded.coords else 'lat'
    
    # Check resolution
    original_res = get_grid_resolution(original)
    new_res = get_grid_resolution(regridded)
    
    print(f"Original resolution: {original_res:.6f}°")
    print(f"New resolution: {new_res:.6f}°")
    print(f"Target resolution: {TARGET_RESOLUTION}°")
    print(f"Resolution error: {abs(new_res - TARGET_RESOLUTION):.6f}°")
    
    # Check dimensions
    print(f"Original shape: {dict(original.dims)}")
    print(f"New shape: {dict(regridded.dims)}")
    
    # Check coordinate ranges
    print(f"Longitude range: [{regridded[lon_coord].min().values:.3f}, {regridded[lon_coord].max().values:.3f}]")
    print(f"Latitude range: [{regridded[lat_coord].min().values:.3f}, {regridded[lat_coord].max().values:.3f}]")
    
    # Check time dimension (should be unchanged)
    if 'time' in original.dims and 'time' in regridded.dims:
        print(f"Time dimension: {original.dims['time']} → {regridded.dims['time']}")
        print(f"Time range: {regridded.time.min().values} to {regridded.time.max().values}")

def final_interpolation_to_target(dataset, target_resolution):
    """
    Final interpolation step to ensure exact target resolution.
    
    Parameters:
    -----------
    dataset : xarray.Dataset
        Dataset to interpolate to exact target resolution
    target_resolution : float
        Exact target resolution in degrees
    
    Returns:
    --------
    xarray.Dataset
        Dataset interpolated to exact target resolution
    """
    # Handle different coordinate names
    lon_coord = 'longitude' if 'longitude' in dataset.coords else 'lon'
    lat_coord = 'latitude' if 'latitude' in dataset.coords else 'lat'
    
    # Get current coordinate ranges
    lon_min = float(dataset[lon_coord].min())
    lon_max = float(dataset[lon_coord].max())
    lat_min = float(dataset[lat_coord].min())
    lat_max = float(dataset[lat_coord].max())
    
    # Create new coordinate arrays with exact target resolution
    # Calculate number of points to maintain coverage
    n_lon = int(np.round((lon_max - lon_min) / target_resolution)) + 1
    n_lat = int(np.round((lat_max - lat_min) / target_resolution)) + 1
    
    new_lon = np.linspace(lon_min, lon_max, n_lon)
    new_lat = np.linspace(lat_min, lat_max, n_lat)
    
    # Verify the resolution is exactly what we want
    actual_lon_res = new_lon[1] - new_lon[0]
    actual_lat_res = new_lat[1] - new_lat[0]
    
    print(f"  Final interpolation - Longitude resolution: {actual_lon_res:.6f}°")
    print(f"  Final interpolation - Latitude resolution: {actual_lat_res:.6f}°")
    
    # Perform final interpolation
    interp_coords = {lon_coord: new_lon, lat_coord: new_lat}
    final_dataset = dataset.interp(**interp_coords)
    
    return final_dataset

def process_dataset(file_path, dataset_name, target_resolution):
    """Process a single dataset file."""
    print(f"\n{'='*50}")
    print(f"Processing {dataset_name.upper()}")
    print(f"{'='*50}")
    
    # Load dataset
    print("Loading dataset...")
    try:
        ds = xr.open_dataset(file_path)
        print(f"Successfully loaded: {file_path}")
        print(f"Dataset dimensions: {dict(ds.dims)}")
        print(f"Data variables: {list(ds.data_vars)}")
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return None
    
    # Get current resolution
    current_res = get_grid_resolution(ds)
    print(f"Current resolution: {current_res:.6f}°")
    print(f"Target resolution: {target_resolution}°")
    
    # Step 1: Initial regridding (coarsen or interpolate to get close to target)
    if current_res < target_resolution:
        print("→ Step 1: Upscaling (coarsening) required")
        regridded = coarsen_dataset(ds, current_res, target_resolution)
    elif current_res > target_resolution:
        print("→ Step 1: Downscaling (interpolation) required")
        regridded = interpolate_dataset(ds, current_res, target_resolution)
    else:
        print("→ Step 1: No initial regridding needed")
        regridded = ds
    
    # Step 2: Final interpolation to exact target resolution
    intermediate_res = get_grid_resolution(regridded)
    print(f"→ Step 2: Final interpolation to exact {target_resolution}°")
    print(f"  Intermediate resolution: {intermediate_res:.6f}°")
    
    final_regridded = final_interpolation_to_target(regridded, target_resolution)
    
    # Validate results
    validate_regridding(ds, final_regridded, dataset_name)
    
    # Close datasets
    ds.close()
    if regridded is not ds:
        regridded.close()
    
    return final_regridded

def main():
    """Main processing function."""
    print("Starting regridding process...")
    print(f"Target resolution: {TARGET_RESOLUTION}°")
    
    # Process each dataset
    regridded_datasets = {}
    
    for dataset_name, file_path in file_paths.items():
        regridded = process_dataset(file_path, dataset_name, TARGET_RESOLUTION)
        if regridded is not None:
            regridded_datasets[dataset_name] = regridded
    
    # Save regridded datasets
    print(f"\n{'='*50}")
    print("SAVING REGRIDDED DATASETS")
    print(f"{'='*50}")
    
    for dataset_name, regridded in regridded_datasets.items():
        output_file = output_paths[dataset_name]
        print(f"Saving {dataset_name} to {output_file}...")
        
        try:
            # Add attributes to document the regridding
            regridded.attrs['regridding_info'] = f'Regridded to {TARGET_RESOLUTION}° resolution'
            regridded.attrs['regridding_method'] = 'xarray coarsen/interp'
            regridded.attrs['processing_date'] = str(np.datetime64('now'))
            
            # Save with compression
            encoding = {}
            for var in regridded.data_vars:
                encoding[var] = {'zlib': True, 'complevel': 4}
            
            regridded.to_netcdf(output_file, encoding=encoding)
            print(f"✓ Successfully saved {output_file}")
            
            # Print file info
            file_size = regridded.nbytes / (1024**2)  # MB
            print(f"  File size: ~{file_size:.1f} MB")
            
        except Exception as e:
            print(f"✗ Error saving {output_file}: {e}")
        
        # Close dataset
        regridded.close()
    
    print(f"\n{'='*50}")
    print("REGRIDDING COMPLETE!")
    print(f"{'='*50}")
    print("All datasets have been regridded to 0.10° resolution.")
    print("You can now use these files for comparative analysis.")

if __name__ == "__main__":
    # Example of how to customize for specific variable names
    # Uncomment and modify if your datasets have different variable names
    
    # For MODIS Chlor_a - common variable names: 'chlor_a', 'CHL', 'chl'
    # For OSTIA SST - common variable names: 'analysed_sst', 'sst', 'SST'  
    # For SMAP SSS - common variable names: 'sss', 'SSS', 'salinity'
    
    main()

    # Optional: Quick validation plot (uncomment if you want visual validation)
    """
    import matplotlib.pyplot as plt
    
    # Load one of the regridded files for quick visual check
    test_file = output_paths['sst']  # or whichever you want to check
    ds_test = xr.open_dataset(test_file)
    
    # Plot first time step
    plt.figure(figsize=(12, 5))
    ds_test.isel(time=0).plot()
    plt.title(f'Regridded data - First time step')
    plt.show()
    
    ds_test.close()
    """

Starting regridding process...
Target resolution: 0.1°

Processing CHLOR_A
Loading dataset...
Successfully loaded: study_data/CHL_MO_subset_combined.nc4
Dataset dimensions: {'time': 121, 'lat': 2040, 'lon': 3120, 'rgb': 3, 'eightbitcolor': 256}
Data variables: ['chlor_a', 'palette']
Current resolution: 0.041672°
Target resolution: 0.1°
→ Step 1: Upscaling (coarsening) required
  Coarsening with weight: 3
  From 0.0417° to ~0.1250°
→ Step 2: Final interpolation to exact 0.1°
  Intermediate resolution: 0.125000°
  Final interpolation - Longitude resolution: 0.099981°
  Final interpolation - Latitude resolution: 0.099971°

--- Validation for chlor_a ---
Original resolution: 0.041672°
New resolution: 0.099981°
Target resolution: 0.1°
Resolution error: 0.000019°
Original shape: {'time': 121, 'lat': 2040, 'lon': 3120, 'rgb': 3, 'eightbitcolor': 256}
New shape: {'time': 121, 'lat': 850, 'lon': 1300, 'rgb': 3, 'eightbitcolor': 256}
Longitude range: [-179.938, -50.062]
Latitude range: [-4.938, 

NameError: name 'install' is not defined