## WICCI Downscaled Data: Precip - 20 year - monthly

Objectives
* aggregate data for prcp, e.g. mean, standard deviation, days over 1",2",3", for RCP4.5 and RCP8.5
* run for every 20 year model window, e.g. 2021-2040 for 2030 average
* calculate averages by whole month increments
* creat new netcdf file(s) for aggregate data for each 20-year timeframe
* calculate intermodel standard deviation differently -- find average of each model within the current time window and
  then calculate standard deviation across models

Eric Compas, compase@uww.edu 11/17/2021, 1/19/2022, 1/25/2022, 2/28/2022, 6/6/2022

In [1]:
import netCDF4
import numpy as np
import os
import datetime
import gc
from netCDF4 import Dataset,num2date,date2num

In [2]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [3]:
#base_folder = "Z:/Climate_Data"
base_folder = "F:/Climate_Data"
#base_folder = "/Users/ericcompas/Climate_Data"
if not os.path.isdir(base_folder):
    print("Base folder not valid")

In [4]:
#out_folder = "Z:/Climate_Data/aggregate_files"
out_folder = "C:/Users/Eric/Dropbox/Climate_Data/aggregate_files"
#out_folder  = "/Users/ericcompas/Climate_Data/output"
if not os.path.isdir(out_folder):
    print("Out folder not valid")

The BIG LOOP.

Look across all years, models, and create aggregate file for each year.

In [5]:
# loop across models
models = ["rcp45","rcp85"]
for m in models:
    print("Processing climate scenario "+m)

    # get subfolders/GCMs for current model
    GCMs = os.listdir(os.path.join(base_folder,m))
    
    # remove 'GFDL-CM3' from RCP45 scenario -- incorrect precip via WICCI Climate Working Group
    if m == "rcp45":
        GCMs.remove('GFDL-CM3')
        print("Removed GFDL-CM3")

    # loop across years in 20-year increments
    for y in range(2030,2091,20):    # 2030,2091,20
        print("  Processing base year "+str(y))

        # get mean and std for prcp for current 20-year window
        # loop through GCMS first and then years in current window
        # (create large blank masked arrays more memory efficient?)
        num_models = len(GCMs)
        years = 20
        realizations = 3
        prcp_all_array = np.ma.empty([num_models*years*realizations, 365, 48, 62])
        prcp_means_array = np.ma.empty([num_models, 365, 48, 62])
        
         # indices for adding to blank arrays
        i = 0
        j = 0
        
        for gcm in GCMs:      
            print("    Processing GCM "+gcm)
            prcp_gcm_array = []
            for yr in range(y-9,y+11):
                print("      Processing year "+str(yr))
                realizations = ["01","02","03"]
                for r in realizations:
                    nf = os.path.join(base_folder,m,gcm,"r1i1p1","prcp_"+r+"_"+str(yr)+".nc")
                    try:
                        n = netCDF4.Dataset(nf)
                    except:
                        #print("File not found: "+nf)
                        q = 0
                    else:
                        prcp = n.variables['prcp']
                        
                        # convert to inches
                        prcp = prcp[:,:,:] * 0.0393701
                        
                        # get mask - to reapply below
                        mask = np.ma.getmask(prcp[0])
                        
                        # handle leap years -- average Feb 28 and 29
                        if prcp.shape[0] == 366:
                            # get and average Feb 28 and 29 for prcp
                            prcp_leap = prcp[58:60,:,:]
                            prcp_mean_leap = np.ma.mean(prcp_leap,axis=0)
                            prcp_part1 = np.ma.append(prcp[0:58,:,:],np.ma.expand_dims(prcp_mean_leap,axis=0),axis=0)
                            prcp = np.ma.append(prcp_part1,prcp[60:367,:,:],axis=0)
                        
                        # add prcp array for gcm, year, realization to total array
                        prcp_all_array[i] = prcp[:,:,:]
                        
                        # add prcp array for gcm, year, realization to gcm-specific array
                        prcp_gcm_array.append(prcp[:,:,:])
                        
                        i += 1

            # calculate mean precip for gcm over time window (for std calculation)
            prcp_model_mean = np.ma.mean(prcp_gcm_array,axis=0)

            # add this to an array of means (one for each gcm)
            prcp_means_array[j] = prcp_model_mean[:,:,:]
            
            j += 1

        ##################################################
        ## calculate metrics for current 20-year window ##
        ##################################################
        
        # calculate daily mean from total array    
        prcp_mean = np.ma.mean(prcp_all_array,axis=0)

        # calculate monthly mean from total array
        prcp_months_total = np.ma.empty([12, 48, 62])
        prcp_months_std = np.ma.empty([12, 48, 62])
        prcp_months_gt1 = np.ma.empty([12, 48, 62])
        prcp_months_gt2 = np.ma.empty([12, 48, 62])
        prcp_months_gt3 = np.ma.empty([12, 48, 62])
        prcp_months_norain = np.ma.empty([12, 48, 62])
        
        # index for julian/ordinal calendar (start/end of each month)
        julian = [0,31,59,90,120,151,181,212,243,273,304,334,366]
        
        # loop through months
        for month in range(1,13):
            
            # calculate month precip total for each month
            prcp_month_total = np.ma.sum(prcp_mean[(julian[month-1]):(julian[month])],axis=0)
            prcp_months_total[month-1] = prcp_month_total
            
            # calculate inter-model standard deviation
            all_std_nparray = np.array(prcp_means_array)
            month_std_nparray = all_std_nparray[:,(julian[month-1]):(julian[month]),:,:]
            # loop over each model -- add to total monthly sample array
            for i in range(0, (np.shape(all_std_nparray)[0])):
                model_array = month_std_nparray[i]
                if i==0:
                    concat_array = model_array
                else:
                    concat_array = np.ma.concatenate((concat_array,model_array),axis=0)
            std_month = np.ma.std(concat_array,axis=0)
            prcp_months_std[month-1] = np.ma.masked_array(std_month,mask)
            
            # calc extreme estimates - prcp
            all_nparray = np.array(prcp_all_array)
            month_nparray = all_nparray[:,(julian[month-1]):(julian[month]),:,:]
            # get number of days in month and number of "samples"(needed for estimate formula)
            num_days = np.shape(month_nparray)[1]
            num_samples = np.shape(month_nparray)[0]*np.shape(month_nparray)[1]     
            
            # prcp extreme - prcp gt 1" (25.4 mm)
            # get number of days meeting threshold (by iteration)
            extreme_sum_days = (month_nparray >= 1).sum(axis=0)
            # add number of days (by month)
            extreme_sum = extreme_sum_days.sum(axis=0)
            # estimate of number of monthly extreme days
            extreme_days = (extreme_sum / num_samples) * num_days
            # add to array
            prcp_months_gt1[month-1] = np.ma.masked_array(extreme_days,mask)
            
            # prcp extreme - prcp gt 2" (50.8 mm)     
            # get number of days meeting threshold (by iteration)
            extreme_sum_days = (month_nparray >= 2).sum(axis=0)
            # add number of days (by month)
            extreme_sum = extreme_sum_days.sum(axis=0)
            # estimate of number of monthly extreme days
            extreme_days = (extreme_sum / num_samples) * num_days
            # add to array
            prcp_months_gt2[month-1] = np.ma.masked_array(extreme_days,mask)
            
            # prcp extreme - prcp gt 3" (76.2 mm)     
            # get number of days meeting threshold (by iteration)
            extreme_sum_days = (month_nparray >= 3).sum(axis=0)
            # add number of days (by month)
            extreme_sum = extreme_sum_days.sum(axis=0)
            # estimate of number of monthly extreme days
            extreme_days = (extreme_sum / num_samples) * num_days
            # add to array
            prcp_months_gt3[month-1] = np.ma.masked_array(extreme_days,mask)
            
            # prcp extreme(?) - prcp is zero    
            # get number of days meeting threshold (by iteration)
            extreme_sum_days = (month_nparray == 0).sum(axis=0)
            # add number of days (by month)
            extreme_sum = extreme_sum_days.sum(axis=0)
            # estimate of number of monthly extreme days
            extreme_days = (extreme_sum / num_samples) * num_days
            # add to array
            prcp_months_norain[month-1] = np.ma.masked_array(extreme_days,mask)

        # write netcdf files with results
        filename = "prcp_"+m+"_"+str(y)+"_20yr_monthly.nc"
        newfile = os.path.join(out_folder,filename)
        ncfile = netCDF4.Dataset(newfile,mode='w',format='NETCDF4_CLASSIC')
        lat_dim = ncfile.createDimension('lat', 48)     # latitude axis
        lon_dim = ncfile.createDimension('lon', 62)    # longitude axis
        time_dim = ncfile.createDimension('time', None) # unlimited axis (can be appended to).

        ncfile.title='Aggregate monthly prcp values for WICCI downscaled climate data for all GCMs for '+m+' and 20-year window around year '+str(y)
        ncfile.subtitle="Data source: UW-Madison WICCI; Data aggregation: Eric Compas, compase@uww.edu"
        lat = ncfile.createVariable('lat', np.float64, ('lat',))
        lat.units = 'degrees_north'
        lat.long_name = 'latitude'
        lon = ncfile.createVariable('lon', np.float64, ('lon',))
        lon.units = 'degrees_east'
        lon.long_name = 'longitude'
        time = ncfile.createVariable('time', np.float64, ('time',))
        timeunits = 'days since '+str(y)+'-01-01'
        time.units = timeunits
        time.long_name = 'time'

        prcp_total = ncfile.createVariable('prcp_total',np.float32,('time','lat','lon')) # note: unlimited dimension is leftmost
        prcp_total.units = 'inches' # inches
        prcp_total.standard_name = 'total of mean of daily precipitation (inches) per month across 20-year window'
        prcp_total.missing_value = -32768
        
        prcp_std = ncfile.createVariable('prcp_std',np.float32,('time','lat','lon')) # note: unlimited dimension is leftmost
        prcp_std.units = 'inches' # inches
        prcp_std.standard_name = 'standard deviation of daily precipitation (inches) per month across 20-year window'
        prcp_std.missing_value = -32768
        
        prcp_gt1 = ncfile.createVariable('prcp_gt1',np.float32,('time','lat','lon')) # note: unlimited dimension is leftmost
        prcp_gt1.units = 'days' # number of days
        prcp_gt1.standard_name = 'estimated number of days 1" or more of rain (25.4 mm) per month across 20-year window'
        prcp_gt1.missing_value = -32768
        
        prcp_gt2 = ncfile.createVariable('prcp_gt2',np.float32,('time','lat','lon')) # note: unlimited dimension is leftmost
        prcp_gt2.units = 'days' # number of days
        prcp_gt2.standard_name = 'estimated number of days 2" or more of rain (50.8 mm) per month across 20-year window'
        prcp_gt2.missing_value = -32768
        
        prcp_gt3 = ncfile.createVariable('prcp_gt3',np.float32,('time','lat','lon')) # note: unlimited dimension is leftmost
        prcp_gt3.units = 'days' # number of days
        prcp_gt3.standard_name = 'estimated number of days 3" or more of rain (76.2 mm) per month across 20-year window'
        prcp_gt3.missing_value = -32768
        
        prcp_norain = ncfile.createVariable('prcp_norain',np.float32,('time','lat','lon')) # note: unlimited dimension is leftmost
        prcp_norain.units = 'days' # number of days
        prcp_norain.standard_name = 'estimated number of days with no rain (0 mm) per month across 20-year window'
        prcp_norain.missing_value = -32768 

        # Write latitudes, longitudes
        # Note: the ":" is necessary in these "write" statements
        n_lat = n.variables['lat']
        n_lon = n.variables['lon']
        lat[:] = n_lat[:]
        lon[:] = n_lon[:]

        # write temp variables
        prcp_total[:,:,:] = prcp_months_total
        prcp_std[:,:,:] = prcp_months_std
        prcp_gt1[:,:,:] = prcp_months_gt1
        prcp_gt2[:,:,:] = prcp_months_gt2
        prcp_gt3[:,:,:] = prcp_months_gt3
        prcp_norain[:,:,:] = prcp_months_norain

        # write time
        yystart = y
        nyears = 1
        ntime = 12
        print("Writing time. Year = "+str(y))
        datesout = [datetime.datetime(y,mm,15,0) for yy in range(yystart,yystart+nyears) for mm in range(1,13)]
        time[:] = date2num(datesout,timeunits)

        # close file
        ncfile.close()

        # report progress
        print("Wrote: "+filename)
        
        
        # clear some memory, invoke Python garbage collector
        del prcp_all_array
        del prcp_means_array
        del prcp_months_total
        del prcp_months_std
        del prcp_months_gt1
        del prcp_months_gt2
        del prcp_months_gt3
        del prcp_months_norain
        del prcp_mean
        gc.collect()
        print("Cleared some memory up...restarting loop")
        

Processing climate scenario rcp45
Removed GFDL-CM3
  Processing base year 2030
    Processing GCM ACCESS1-0
      Processing year 2021
      Processing year 2022
      Processing year 2023
      Processing year 2024
      Processing year 2025
      Processing year 2026
      Processing year 2027
      Processing year 2028
      Processing year 2029
      Processing year 2030
      Processing year 2031
      Processing year 2032
      Processing year 2033
      Processing year 2034
      Processing year 2035
      Processing year 2036
      Processing year 2037
      Processing year 2038
      Processing year 2039
      Processing year 2040
    Processing GCM ACCESS1-3
      Processing year 2021
      Processing year 2022
      Processing year 2023
      Processing year 2024
      Processing year 2025
      Processing year 2026
      Processing year 2027
      Processing year 2028
      Processing year 2029
      Processing year 2030
      Processing year 2031
      Processing year 2032

      Processing year 2027
      Processing year 2028
      Processing year 2029
      Processing year 2030
      Processing year 2031
      Processing year 2032
      Processing year 2033
      Processing year 2034
      Processing year 2035
      Processing year 2036
      Processing year 2037
      Processing year 2038
      Processing year 2039
      Processing year 2040
    Processing GCM MIROC-ESM-CHEM
      Processing year 2021
      Processing year 2022
      Processing year 2023
      Processing year 2024
      Processing year 2025
      Processing year 2026
      Processing year 2027
      Processing year 2028
      Processing year 2029
      Processing year 2030
      Processing year 2031
      Processing year 2032
      Processing year 2033
      Processing year 2034
      Processing year 2035
      Processing year 2036
      Processing year 2037
      Processing year 2038
      Processing year 2039
      Processing year 2040
    Processing GCM MIROC5
      Processing year 

      Processing year 2052
      Processing year 2053
      Processing year 2054
      Processing year 2055
      Processing year 2056
      Processing year 2057
      Processing year 2058
      Processing year 2059
      Processing year 2060
    Processing GCM GFDL-ESM2M
      Processing year 2041
      Processing year 2042
      Processing year 2043
      Processing year 2044
      Processing year 2045
      Processing year 2046
      Processing year 2047
      Processing year 2048
      Processing year 2049
      Processing year 2050
      Processing year 2051
      Processing year 2052
      Processing year 2053
      Processing year 2054
      Processing year 2055
      Processing year 2056
      Processing year 2057
      Processing year 2058
      Processing year 2059
      Processing year 2060
    Processing GCM HadGEM2-CC
      Processing year 2041
      Processing year 2042
      Processing year 2043
      Processing year 2044
      Processing year 2045
      Processing year 

      Processing year 2077
      Processing year 2078
      Processing year 2079
      Processing year 2080
    Processing GCM ACCESS1-3
      Processing year 2061
      Processing year 2062
      Processing year 2063
      Processing year 2064
      Processing year 2065
      Processing year 2066
      Processing year 2067
      Processing year 2068
      Processing year 2069
      Processing year 2070
      Processing year 2071
      Processing year 2072
      Processing year 2073
      Processing year 2074
      Processing year 2075
      Processing year 2076
      Processing year 2077
      Processing year 2078
      Processing year 2079
      Processing year 2080
    Processing GCM CanESM2
      Processing year 2061
      Processing year 2062
      Processing year 2063
      Processing year 2064
      Processing year 2065
      Processing year 2066
      Processing year 2067
      Processing year 2068
      Processing year 2069
      Processing year 2070
      Processing year 2071

      Processing year 2066
      Processing year 2067
      Processing year 2068
      Processing year 2069
      Processing year 2070
      Processing year 2071
      Processing year 2072
      Processing year 2073
      Processing year 2074
      Processing year 2075
      Processing year 2076
      Processing year 2077
      Processing year 2078
      Processing year 2079
      Processing year 2080
    Processing GCM MIROC5
      Processing year 2061
      Processing year 2062
      Processing year 2063
      Processing year 2064
      Processing year 2065
      Processing year 2066
      Processing year 2067
      Processing year 2068
      Processing year 2069
      Processing year 2070
      Processing year 2071
      Processing year 2072
      Processing year 2073
      Processing year 2074
      Processing year 2075
      Processing year 2076
      Processing year 2077
      Processing year 2078
      Processing year 2079
      Processing year 2080
    Processing GCM MPI-ESM-LR

      Processing year 2092
      Processing year 2093
      Processing year 2094
      Processing year 2095
      Processing year 2096
      Processing year 2097
      Processing year 2098
      Processing year 2099
      Processing year 2100
    Processing GCM HadGEM2-CC
      Processing year 2081
      Processing year 2082
      Processing year 2083
      Processing year 2084
      Processing year 2085
      Processing year 2086
      Processing year 2087
      Processing year 2088
      Processing year 2089
      Processing year 2090
      Processing year 2091
      Processing year 2092
      Processing year 2093
      Processing year 2094
      Processing year 2095
      Processing year 2096
      Processing year 2097
      Processing year 2098
      Processing year 2099
      Processing year 2100
    Processing GCM inmcm4
      Processing year 2081
      Processing year 2082
      Processing year 2083
      Processing year 2084
      Processing year 2085
      Processing year 2086

      Processing year 2034
      Processing year 2035
      Processing year 2036
      Processing year 2037
      Processing year 2038
      Processing year 2039
      Processing year 2040
    Processing GCM CanESM2
      Processing year 2021
      Processing year 2022
      Processing year 2023
      Processing year 2024
      Processing year 2025
      Processing year 2026
      Processing year 2027
      Processing year 2028
      Processing year 2029
      Processing year 2030
      Processing year 2031
      Processing year 2032
      Processing year 2033
      Processing year 2034
      Processing year 2035
      Processing year 2036
      Processing year 2037
      Processing year 2038
      Processing year 2039
      Processing year 2040
    Processing GCM CMCC-CESM
      Processing year 2021
      Processing year 2022
      Processing year 2023
      Processing year 2024
      Processing year 2025
      Processing year 2026
      Processing year 2027
      Processing year 2028

      Processing year 2023
      Processing year 2024
      Processing year 2025
      Processing year 2026
      Processing year 2027
      Processing year 2028
      Processing year 2029
      Processing year 2030
      Processing year 2031
      Processing year 2032
      Processing year 2033
      Processing year 2034
      Processing year 2035
      Processing year 2036
      Processing year 2037
      Processing year 2038
      Processing year 2039
      Processing year 2040
    Processing GCM MIROC-ESM-CHEM
      Processing year 2021
      Processing year 2022
      Processing year 2023
      Processing year 2024
      Processing year 2025
      Processing year 2026
      Processing year 2027
      Processing year 2028
      Processing year 2029
      Processing year 2030
      Processing year 2031
      Processing year 2032
      Processing year 2033
      Processing year 2034
      Processing year 2035
      Processing year 2036
      Processing year 2037
      Processing year

      Processing year 2049
      Processing year 2050
      Processing year 2051
      Processing year 2052
      Processing year 2053
      Processing year 2054
      Processing year 2055
      Processing year 2056
      Processing year 2057
      Processing year 2058
      Processing year 2059
      Processing year 2060
    Processing GCM CSIRO-Mk3-6-0
      Processing year 2041
      Processing year 2042
      Processing year 2043
      Processing year 2044
      Processing year 2045
      Processing year 2046
      Processing year 2047
      Processing year 2048
      Processing year 2049
      Processing year 2050
      Processing year 2051
      Processing year 2052
      Processing year 2053
      Processing year 2054
      Processing year 2055
      Processing year 2056
      Processing year 2057
      Processing year 2058
      Processing year 2059
      Processing year 2060
    Processing GCM GFDL-CM3
      Processing year 2041
      Processing year 2042
      Processing year

      Processing year 2059
      Processing year 2060
    Processing GCM MRI-CGCM3
      Processing year 2041
      Processing year 2042
      Processing year 2043
      Processing year 2044
      Processing year 2045
      Processing year 2046
      Processing year 2047
      Processing year 2048
      Processing year 2049
      Processing year 2050
      Processing year 2051
      Processing year 2052
      Processing year 2053
      Processing year 2054
      Processing year 2055
      Processing year 2056
      Processing year 2057
      Processing year 2058
      Processing year 2059
      Processing year 2060
    Processing GCM MRI-ESM1
      Processing year 2041
      Processing year 2042
      Processing year 2043
      Processing year 2044
      Processing year 2045
      Processing year 2046
      Processing year 2047
      Processing year 2048
      Processing year 2049
      Processing year 2050
      Processing year 2051
      Processing year 2052
      Processing year 205

      Processing year 2063
      Processing year 2064
      Processing year 2065
      Processing year 2066
      Processing year 2067
      Processing year 2068
      Processing year 2069
      Processing year 2070
      Processing year 2071
      Processing year 2072
      Processing year 2073
      Processing year 2074
      Processing year 2075
      Processing year 2076
      Processing year 2077
      Processing year 2078
      Processing year 2079
      Processing year 2080
    Processing GCM inmcm4
      Processing year 2061
      Processing year 2062
      Processing year 2063
      Processing year 2064
      Processing year 2065
      Processing year 2066
      Processing year 2067
      Processing year 2068
      Processing year 2069
      Processing year 2070
      Processing year 2071
      Processing year 2072
      Processing year 2073
      Processing year 2074
      Processing year 2075
      Processing year 2076
      Processing year 2077
      Processing year 2078
  

      Processing year 2087
      Processing year 2088
      Processing year 2089
      Processing year 2090
      Processing year 2091
      Processing year 2092
      Processing year 2093
      Processing year 2094
      Processing year 2095
      Processing year 2096
      Processing year 2097
      Processing year 2098
      Processing year 2099
      Processing year 2100
    Processing GCM CanESM2
      Processing year 2081
      Processing year 2082
      Processing year 2083
      Processing year 2084
      Processing year 2085
      Processing year 2086
      Processing year 2087
      Processing year 2088
      Processing year 2089
      Processing year 2090
      Processing year 2091
      Processing year 2092
      Processing year 2093
      Processing year 2094
      Processing year 2095
      Processing year 2096
      Processing year 2097
      Processing year 2098
      Processing year 2099
      Processing year 2100
    Processing GCM CMCC-CESM
      Processing year 2081

      Processing year 2096
      Processing year 2097
      Processing year 2098
      Processing year 2099
      Processing year 2100
    Processing GCM MIROC-ESM
      Processing year 2081
      Processing year 2082
      Processing year 2083
      Processing year 2084
      Processing year 2085
      Processing year 2086
      Processing year 2087
      Processing year 2088
      Processing year 2089
      Processing year 2090
      Processing year 2091
      Processing year 2092
      Processing year 2093
      Processing year 2094
      Processing year 2095
      Processing year 2096
      Processing year 2097
      Processing year 2098
      Processing year 2099
      Processing year 2100
    Processing GCM MIROC-ESM-CHEM
      Processing year 2081
      Processing year 2082
      Processing year 2083
      Processing year 2084
      Processing year 2085
      Processing year 2086
      Processing year 2087
      Processing year 2088
      Processing year 2089
      Processing ye