## WICCI Downscaled Data: Tmax and Tmin - 20 year - seasonal - Modeled (CMIP6 Source)

Objectives
* aggregate data for temperature (max and min), e.g. mean, standard deviation, for WICCI's modeled climate data
* run for 20 year model window
* calculate averages by seasons (Winter: DJF, Spring: MAM, Summer: JJA, Fall: SON)
* create new netcdf file(s) for aggregate data for 20-year timeframe
* calculate intermodel standard deviation differently -- find average of each model within the current time window and
  then calculate standard deviation across models

Eric Compas, compase@uww.edu 11/17/2021, 1/19/2022, 1/25/2022, 2/28/2022, 12/4/2024, 1/31/2025, 2/6/2025

In [None]:
import netCDF4
import numpy as np
import os
import datetime
import gc
from netCDF4 import Dataset,num2date,date2num
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [None]:
# Which folder are we getting the downscaled climate data from? Needs to be the data from WICCI Climate Working Group with folder structure as provided. This level should contain the 'rcp45' and 'rcp85' folders to work correctly.
base_folder = "C:/CMIP6/wisconsin"
if not os.path.isdir(base_folder):
    print("Base folder not valid")

In [None]:
# Folder to store the resulting netCDF files in. Can be anywhere on your file system. Note that this script will not overwrite existing files.
#out_folder = "Z:/Climate_Data/aggregate_files"
out_folder = r"C:\Users\compase\Dropbox\Fall_2024\WICCI\CMIP6_Output"
#out_folder  = "/Users/ericcompas/Climate_Data/output"
if not os.path.isdir(out_folder):
    print("Out folder not valid")

In [None]:
# Define seasons and their date ranges
seasons = [
    ('Winter', ([335,366], [0,59])),    # Dec-Feb
    ('Spring', ([60,151], None)),       # Mar-May
    ('Summer', ([152,243], None)),      # Jun-Aug
    ('Fall', ([244,334], None))         # Sep-Nov
]

def process_season_chunk(data_array, season_range1, season_range2=None):
    """Process a chunk of data for a given season"""
    if season_range2 is None:
        # Regular season
        season_data = data_array[:, season_range1[0]:season_range1[1]+1, :, :]
    else:
        # Winter season (spans year boundary)
        part1 = data_array[:, season_range1[0]:season_range1[1], :, :]
        part2 = data_array[:, season_range2[0]:season_range2[1]+1, :, :]
        season_data = np.ma.concatenate([part1, part2], axis=1)
    return season_data

def calculate_seasonal_stats(data_array, mask):
    """Calculate seasonal statistics for a data array"""
    return np.ma.masked_array(np.ma.mean(data_array, axis=(0,1)), mask)

# The BIG LOOP with memory-efficient processing
models = ["rcp45", "rcp85"]
for m in models:
    print("Processing climate scenario "+m)
    
    GCMs = os.listdir(os.path.join(base_folder,m))
    #GCMs = GCMs[:5]  # for testing only
    
    for y in range(2050,2051,20):    
        print("  Processing base year "+str(y))
        
        # Initialize seasonal arrays
        tmax_seasons_mean = np.ma.empty([4, 89, 91], dtype=np.float64)
        tmax_seasons_std = np.ma.empty([4, 89, 91], dtype=np.float64)
        tmin_seasons_mean = np.ma.empty([4, 89, 91], dtype=np.float64)
        tmin_seasons_std = np.ma.empty([4, 89, 91], dtype=np.float64)
        
        # Process each GCM
        for gcm_idx, gcm in enumerate(GCMs):
            print(f"    Processing GCM {gcm} ({gcm_idx+1}/{len(GCMs)})")
            
            # Process each season
            for season_idx, (season_name, (range1, range2)) in enumerate(seasons):
                print(f"      Processing {season_name}")
                
                # Initialize seasonal accumulators
                tmax_acc = []
                tmin_acc = []
                
                # Process each year
                for yr in range(y-9, y+11):
                    for r in ["01","02","03"]:
                        nf_tmax = os.path.join(base_folder,m,gcm,"r1i1p1",f"tmax_{r}_{yr}.nc")
                        nf_tmin = os.path.join(base_folder,m,gcm,"r1i1p1",f"tmin_{r}_{yr}.nc")
                        try:
                            with netCDF4.Dataset(nf_tmax) as n_tmax, netCDF4.Dataset(nf_tmin) as n_tmin:
                                # Convert to Fahrenheit
                                tmax = n_tmax.variables['tmax'][:] * 9/5 + 32
                                tmin = n_tmin.variables['tmin'][:] * 9/5 + 32
                                
                                if tmax.shape[0] == 366:
                                    # Average Feb 28-29
                                    tmax_leap = tmax[58:60,:,:]
                                    tmax_mean_leap = np.ma.mean(tmax_leap,axis=0)
                                    tmax = np.ma.concatenate([
                                        tmax[0:58],
                                        tmax_mean_leap[np.newaxis],
                                        tmax[60:]
                                    ])
                                    
                                    tmin_leap = tmin[58:60,:,:]
                                    tmin_mean_leap = np.ma.mean(tmin_leap,axis=0)
                                    tmin = np.ma.concatenate([
                                        tmin[0:58],
                                        tmin_mean_leap[np.newaxis],
                                        tmin[60:]
                                    ])
                                
                                # Get seasonal chunks
                                tmax_season = process_season_chunk(tmax, range1, range2)
                                tmin_season = process_season_chunk(tmin, range1, range2)
                                tmax_acc.append(tmax_season)
                                tmin_acc.append(tmin_season)
                                
                                # Save mask from first file
                                if len(tmax_acc) == 1:
                                    mask = np.ma.getmask(tmax[0])
                        except:
                            print(f"        Can't locate netCDF file for year: {yr}")
                            continue
                        
                        # Free memory
                        gc.collect()
                
                # Calculate seasonal statistics
                if len(tmax_acc) > 0:
                    tmax_season_data = np.ma.stack(tmax_acc)
                    tmin_season_data = np.ma.stack(tmin_acc)
                    
                    # Calculate means
                    tmax_seasons_mean[season_idx] = calculate_seasonal_stats(tmax_season_data, mask)
                    tmin_seasons_mean[season_idx] = calculate_seasonal_stats(tmin_season_data, mask)
                    
                    # Calculate standard deviations
                    tmax_seasons_std[season_idx] = np.ma.std(tmax_season_data, axis=(0,1))
                    tmin_seasons_std[season_idx] = np.ma.std(tmin_season_data, axis=(0,1))
                    
                    # Free memory
                    del tmax_season_data, tmin_season_data
                    gc.collect()
        
        # Write results to netCDF file
        filename = f"temp_{m}_2041-2060_seasonal.nc"
        newfile = os.path.join(out_folder,filename)
        with Dataset(newfile, mode='w', format='NETCDF4_CLASSIC') as ncfile:
            # Create dimensions
            lat_dim = ncfile.createDimension('lat', 89)
            lon_dim = ncfile.createDimension('lon', 91)
            time_dim = ncfile.createDimension('time', None)
            
            # Add metadata
            ncfile.title = f'Aggregate seasonal temperature values for WICCI downscaled climate data (CMIP6) for all GCMs for {m} and 20-year window around year {y}'
            ncfile.subtitle = "Data source: UW-Madison WICCI; Data aggregation: Eric Compas, compase@uww.edu"
            
            # Create variables
            lat = ncfile.createVariable('lat', np.float64, ('lat',))
            lon = ncfile.createVariable('lon', np.float64, ('lon',))
            time = ncfile.createVariable('time', np.float64, ('time',))
            
            # Add variable metadata
            lat.units = 'degrees_north'
            lat.long_name = 'latitude'
            lon.units = 'degrees_east'
            lon.long_name = 'longitude'
            time.units = f'days since {y}-01-01'
            time.long_name = 'time'
            
            # Create temperature variables
            var_defs = [
                ('tmax_mean', tmax_seasons_mean, 'mean maximum temperature (F) per season'),
                ('tmax_std', tmax_seasons_std, 'standard deviation of maximum temperature (F) per season'),
                ('tmin_mean', tmin_seasons_mean, 'mean minimum temperature (F) per season'),
                ('tmin_std', tmin_seasons_std, 'standard deviation of minimum temperature (F) per season')
            ]
            
            for name, data, desc in var_defs:
                var = ncfile.createVariable(name, np.float64, ('time','lat','lon'))
                var.units = 'degrees_F'
                var.standard_name = desc
                var.missing_value = -32768
                var[:] = data
            
            # Write coordinates
            lat[:] = n_tmax.variables['lat'][:]
            lon[:] = n_tmax.variables['lon'][:]
            
            # Write time - middle of each season
            season_midpoints = [
                datetime.datetime(y,1,15,0),  # Winter (Jan 15)
                datetime.datetime(y,4,15,0),  # Spring (Apr 15)
                datetime.datetime(y,7,15,0),  # Summer (Jul 15)
                datetime.datetime(y,10,15,0)  # Fall (Oct 15)
            ]
            time[:] = date2num(season_midpoints, time.units)
        
        print(f"Wrote: {filename}")