## WICCI Downscaled Data: Tmax and Tmin - 20 year - seasonal - Modeled (CMIP6 Source)

Objectives
* aggregate data for temperature (max and min), e.g. mean, standard deviation, for WICCI's modeled climate data
* run for 20 year model window
* calculate averages by seasons (Winter: DJF, Spring: MAM, Summer: JJA, Fall: SON)
* create new netcdf file(s) for aggregate data for 20-year timeframe
* calculate intermodel standard deviation differently -- find average of each model within the current time window and
  then calculate standard deviation across models

Eric Compas, compase@uww.edu 11/17/2021, 1/19/2022, 1/25/2022, 2/28/2022, 12/4/2024, 1/31/2025, 2/6/2025, 2/13/2025

In [1]:
import netCDF4
import numpy as np
import os
import datetime
import gc
from netCDF4 import Dataset,num2date,date2num
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

Model run settings. Change these settings to change source and output folders, models used, years run, etc.

In [2]:
# Which folder are we getting the downscaled climate data from? Needs to be the data from WICCI Climate Working Group with folder structure as provided. This level should contain the 'rcp45' and 'rcp85' folders to work correctly.
base_folder = r"X:\CMIP6_Data"


# Folder to store the resulting netCDF files in. Can be anywhere on your file system. Note that this script will not overwrite existing files.
out_folder = r"C:\Users\compase\Dropbox\Spring_2025\WICCI\CMIP6_WICCI_Output"


# models to calculate summaries for (new file created for each)
models = ["ssp126","ssp245","ssp370","ssp585"]

In [3]:
# Check inputs
if not os.path.isdir(base_folder):
    print("Base folder not valid")
    quit()
if not os.path.isdir(out_folder):
    print("Out folder not valid")
    quit()

# Define seasons and their date ranges
seasons = [
    ('Winter', ([335,366], [0,59])),    # Dec-Feb
    ('Spring', ([60,151], None)),       # Mar-May
    ('Summer', ([152,243], None)),      # Jun-Aug
    ('Fall', ([244,334], None))         # Sep-Nov
]

def process_season_chunk(data_array, season_range1, season_range2=None):
    """Process a chunk of data for a given season"""
    if season_range2 is None:
        # Regular season
        season_data = data_array[season_range1[0]:season_range1[1]+1, :, :]
    else:
        # Winter season (spans year boundary)
        part1 = data_array[season_range1[0]:season_range1[1], :, :]
        part2 = data_array[season_range2[0]:season_range2[1]+1, :, :]
        season_data = np.ma.concatenate([part1, part2], axis=0)
    return season_data

def calculate_seasonal_stats(data_array, mask):
    """Calculate seasonal statistics for a data array"""
    return np.ma.masked_array(np.ma.mean(data_array, axis=0), mask)

# The BIG LOOP with memory-efficient processing
for m in models:
    print("Processing climate scenario "+m)
    
    GCMs = os.listdir(os.path.join(base_folder,m))
    #GCMs = GCMs[:5]  # for testing only
    
    for y in range(2030,2091,20):    
        print("  Processing base year "+str(y))
        
        # Initialize seasonal arrays
        tmax_seasons_mean = np.ma.empty([4, 89, 91], dtype=np.float32)
        tmax_seasons_std = np.ma.empty([4, 89, 91], dtype=np.float32)
        tmin_seasons_mean = np.ma.empty([4, 89, 91], dtype=np.float32)
        tmin_seasons_std = np.ma.empty([4, 89, 91], dtype=np.float32)
        
        # Process each GCM
        for gcm_idx, gcm in enumerate(GCMs):
            print(f"    Processing GCM {gcm} ({gcm_idx+1}/{len(GCMs)})")
            
            # Process each season
            for season_idx, (season_name, (range1, range2)) in enumerate(seasons):
                print(f"      Processing {season_name}")
                
                # Initialize seasonal accumulators for this GCM
                season_tmax = []
                season_tmin = []
                
                # Process each year
                for yr in range(y-9, y+11):
                    for r in ["01","02","03"]:
                        nf = os.path.join(base_folder,m,gcm,"r1i1p1",f"temp_{r}_{yr}.nc")
                        try:
                            with netCDF4.Dataset(nf) as nds:
                                # Convert to Fahrenheit
                                tmax = nds.variables['tmax'][:] * 9/5 + 32
                                tmin = nds.variables['tmin'][:] * 9/5 + 32
                                
                                if tmax.shape[0] == 366:
                                    # Average Feb 28-29
                                    tmax_leap = tmax[58:60,:,:]
                                    tmax_mean_leap = np.ma.mean(tmax_leap,axis=0)
                                    tmax = np.ma.concatenate([
                                        tmax[0:58],
                                        tmax_mean_leap[np.newaxis],
                                        tmax[60:]
                                    ])
                                    
                                    tmin_leap = tmin[58:60,:,:]
                                    tmin_mean_leap = np.ma.mean(tmin_leap,axis=0)
                                    tmin = np.ma.concatenate([
                                        tmin[0:58],
                                        tmin_mean_leap[np.newaxis],
                                        tmin[60:]
                                    ])
                                
                                # Get seasonal chunks
                                tmax_season = process_season_chunk(tmax, range1, range2)
                                tmin_season = process_season_chunk(tmin, range1, range2)
                                
                                # Save mask from first file
                                if len(season_tmax) == 0:
                                    mask = np.ma.getmask(tmax[0])
                                
                                # Append seasonal data
                                season_tmax.append(tmax_season)
                                season_tmin.append(tmin_season)
                                
                        except:
                            print(f"        Can't locate netCDF file for year: {yr}")
                            continue
                        
                        # Free memory
                        gc.collect()
                
                # Calculate seasonal statistics if we have data
                if len(season_tmax) > 0:
                    # Stack all data for this season
                    tmax_data = np.ma.concatenate(season_tmax, axis=0)
                    tmin_data = np.ma.concatenate(season_tmin, axis=0)
                    
                    # Calculate means
                    tmax_seasons_mean[season_idx] = calculate_seasonal_stats(tmax_data, mask)
                    tmin_seasons_mean[season_idx] = calculate_seasonal_stats(tmin_data, mask)
                    
                    # Calculate standard deviations
                    tmax_seasons_std[season_idx] = np.ma.std(tmax_data, axis=0)
                    tmin_seasons_std[season_idx] = np.ma.std(tmin_data, axis=0)
                    
                    # Free memory
                    del tmax_data, tmin_data
                    gc.collect()
        
        # Write results to netCDF file
        start_year = y - 9
        end_year = y + 10
        filename = f"temp_{m}_{start_year}-{end_year}_seasonal.nc"
        newfile = os.path.join(out_folder,filename)
        with Dataset(newfile, mode='w', format='NETCDF4_CLASSIC') as ncfile:
            # Create dimensions
            lat_dim = ncfile.createDimension('lat', 89)
            lon_dim = ncfile.createDimension('lon', 91)
            time_dim = ncfile.createDimension('time', None)
            
            # Add metadata
            ncfile.title = f'Aggregate seasonal temperature values for WICCI downscaled climate data (CMIP6) for all GCMs for {m} and 20-year window around year {y}'
            ncfile.subtitle = "Data source: UW-Madison WICCI; Data aggregation: Eric Compas, compase@uww.edu"
            
            # Create variables
            lat = ncfile.createVariable('lat', np.float32, ('lat',))
            lon = ncfile.createVariable('lon', np.float32, ('lon',))
            time = ncfile.createVariable('time', np.float32, ('time',))
            
            # Add variable metadata
            lat.units = 'degrees_north'
            lat.long_name = 'latitude'
            lon.units = 'degrees_east'
            lon.long_name = 'longitude'
            time.units = f'days since {y}-01-01'
            time.long_name = 'time'
            
            # Create temperature variables
            var_defs = [
                ('tmax_mean', tmax_seasons_mean, 'mean maximum temperature (F) per season'),
                ('tmax_std', tmax_seasons_std, 'standard deviation of maximum temperature (F) per season'),
                ('tmin_mean', tmin_seasons_mean, 'mean minimum temperature (F) per season'),
                ('tmin_std', tmin_seasons_std, 'standard deviation of minimum temperature (F) per season')
            ]
            
            for name, data, desc in var_defs:
                var = ncfile.createVariable(name, np.float32, ('time','lat','lon'))
                var.units = 'degrees_F'
                var.standard_name = desc
                var.missing_value = -32768
                var[:] = data
            
            # Write coordinates
            lat[:] = nds.variables['lat'][:]
            lon[:] = nds.variables['lon'][:]
            
            # Write time - middle of each season
            season_midpoints = [
                datetime.datetime(y,1,15,0),  # Winter (Jan 15)
                datetime.datetime(y,4,15,0),  # Spring (Apr 15)
                datetime.datetime(y,7,15,0),  # Summer (Jul 15)
                datetime.datetime(y,10,15,0)  # Fall (Oct 15)
            ]
            time[:] = date2num(season_midpoints, time.units)
        
        print(f"Wrote: {filename}")

Processing climate scenario ssp126
  Processing base year 2030
    Processing GCM ACCESS-CM2 (1/26)
      Processing Winter
      Processing Spring
      Processing Summer
      Processing Fall
    Processing GCM CanESM5 (2/26)
      Processing Winter
      Processing Spring
      Processing Summer
      Processing Fall
    Processing GCM CMCC-ESM2 (3/26)
      Processing Winter
      Processing Spring
      Processing Summer
      Processing Fall
    Processing GCM CNRM-CM6-1 (4/26)
      Processing Winter
      Processing Spring
      Processing Summer
      Processing Fall
    Processing GCM CNRM-CM6-1-HR (5/26)
      Processing Winter
      Processing Spring
      Processing Summer
      Processing Fall
    Processing GCM CNRM-ESM2-1 (6/26)
      Processing Winter
      Processing Spring
      Processing Summer
      Processing Fall
    Processing GCM EC-Earth3 (7/26)
      Processing Winter
      Processing Spring
      Processing Summer
      Processing Fall
    Processing GCM EC

  lat[:] = nds.variables['lat'][:]


RuntimeError: NetCDF: Start+count exceeds dimension bound