tgb - 15/02/2023 - The goal is to subsample the real-geography and aquaplanet datasets so that they can be easily used in tutorials and online courses.

# Imports

In [1]:
from cbrain.climate_invariant import *

import matplotlib as mpl
import matplotlib.pyplot as plt
import pickle
import xarray as xr

/nfspool-0/home/tbeucler/CBRAIN-CAM/notebooks/tbeucler_devlog


# Paths

In [2]:
path_data = '/DFS-L/DATA/pritchard/tbeucler/SPCAM/SPCAM_PHYS/'
path_array = {}

In [3]:
climate_str = ['cold','hot','both']
set_str = ['train','valid','test']
test_clim_str = ['cold','hot','both','medium']

In [4]:
path_array['cold'] = [path_data+'2021_03_18_O3_TRAIN_M4K_shuffle.nc',
                      path_data+'2021_03_18_O3_VALID_M4K.nc',
                      path_data+'2021_03_18_O3_TEST_M4K.nc']
path_array['hot'] = [path_data+'2021_03_18_O3_TRAIN_P4K_shuffle.nc',
                     path_data+'2021_03_18_O3_VALID_P4K.nc',
                     path_data+'2021_03_18_O3_TEST_P4K.nc']
path_array['both'] = [path_data+'2022_04_18_TRAIN_M4K_P4K_shuffle.nc',
                      path_data+'2022_04_18_VALID_M4K_P4K.nc',
                      path_data+'2022_04_18_TEST_M4K_P4K.nc']
path_array['medium'] = [path_data+'2021_01_24_O3_TRAIN_shuffle.nc',
                        path_data+'2021_01_24_O3_VALID.nc',
                        path_data+'2021_01_24_O3_TEST.nc']

In [5]:
path_input_norm = path_data + '2021_01_24_NORM_O3_small.nc'
scale_dict = pickle.load(open(path_data+'009_Wm2_scaling.pkl','rb'))
path_norm_RH = path_data + '2021_02_01_NORM_O3_RH_small.nc'
scale_dict_RH = scale_dict.copy()
scale_dict_RH['RH'] = 0.01*L_S/G, # Arbitrary 0.1 factor as specific humidity is generally below 2%
path_train_RH = path_data + '2021_01_24_O3_small_shuffle.nc'
path_norm_BMSE = path_data + '2021_06_16_NORM_BMSE_small.nc'
path_train_BMSE = path_data + '2021_06_16_BMSE_small_shuffle.nc'
path_norm_LHF_nsDELQ = path_data + '2021_02_01_NORM_O3_LHF_nsDELQ_small.nc'
path_train_LHF_nsDELQ = path_data + '2021_02_01_O3_LHF_nsQ_small_shuffle.nc'

In [6]:
in_vars = ['QBP','TBP','PS','SOLIN','SHFLX','LHFLX'] # We take the large-scale climate state as inputs
out_vars = ['PHQ','TPHYSTND','QRL','QRS'] # and we output the response of clouds/storms to these climate conditions

In [7]:
scale_dict = pickle.load(open(path_data+'009_Wm2_scaling.pkl','rb'))

In [8]:
fz = 15
lw = 2
siz = 100

plt.rc('text', usetex=False)
mpl.rcParams['mathtext.fontset'] = 'stix'
mpl.rcParams['font.family'] = 'STIXGeneral'
plt.rc('font', family='serif', size=fz)
mpl.rcParams['lines.linewidth'] = lw

# Reduce cold and warm datasets by a factor 20

## Aquaplanet

### Test -4K

In [12]:
test_m4k = xr.open_dataset(path_array['cold'][2])
End_ind = int(test_m4k['vars'].shape[0]/20)

In [10]:
test_m4k_reduced = test_m4k['vars'][:End_ind,:]

In [11]:
test_m4k_reduced

In [12]:
test_m4k_reduced.values = np.float32(test_m4k_reduced.values)

In [13]:
type(test_m4k['vars'].values[0,0])

numpy.float64

In [14]:
type(test_m4k_reduced.values[0,0])

numpy.float32

In [15]:
test_m4k_reduced['lat'].values = np.float32(test_m4k_reduced['lat'].values)
test_m4k_reduced['lon'].values = np.float32(test_m4k_reduced['lon'].values)

In [16]:
test_m4k_reduced

In [17]:
test_m4k_reduced.to_netcdf(path_data+'2023_15_02_TEST_M4K_reduced.nc',mode='w')

### Test +4K

In [21]:
test_p4k = xr.open_dataset(path_array['hot'][2])
test_p4k_reduced = test_p4k['vars'][:End_ind,:]

In [23]:
test_p4k_reduced.values = np.float32(test_p4k_reduced.values)
test_p4k_reduced['lat'].values = np.float32(test_p4k_reduced['lat'].values)
test_p4k_reduced['lon'].values = np.float32(test_p4k_reduced['lon'].values)

In [25]:
test_p4k_reduced.to_netcdf(path_data+'2023_15_02_TEST_P4K_reduced.nc',mode='w')

## Real-geography

In [9]:
path_RG = [path_data + '2021_04_18_RG_TEST_M4K.nc',path_data + '2021_04_18_RG_TEST_P4K.nc']
path_tow = [path_data+'2023_15_02_RG_TEST_M4K_red.nc',path_data+'2023_15_02_RG_TEST_P4K_red.nc']

In [10]:
path_RG = [path_data + '2021_04_18_RG_TEST_M4K.nc',path_data + '2021_04_18_RG_TEST_P4K.nc']
path_tow = [path_data+'2023_15_02_RG_TEST_M4K_red.nc',path_data+'2023_15_02_RG_TEST_P4K_red.nc']

In [13]:
for i,path in enumerate(path_RG):
    xr_data = xr.open_dataset(path)
    data_red = xr_data['vars'][:2*End_ind,:]
    data_red.values = np.float32(data_red.values)
    data_red['lat'].values = np.float32(data_red['lat'].values)
    data_red['lon'].values = np.float32(data_red['lon'].values)
    data_red.to_netcdf(path_tow[i],mode='w')

# Also reduce the data used to build the normalization data generators

In [28]:
path_data = '/DFS-L/DATA/pritchard/tbeucler/SPCAM/SPCAM_PHYS/'
path_train_RH = path_data + '2021_02_01_O3_RH_small_shuffle.nc'
path_train_BMSE = path_data + '2021_06_16_BMSE_small_shuffle.nc'
path_train_LHF_nsDELQ = path_data + '2021_02_01_O3_LHF_nsDELQ_small_shuffle.nc'

In [29]:
RH_da = xr.open_dataset(path_train_RH)
BMSE_da = xr.open_dataset(path_train_BMSE)
LHFns_da = xr.open_dataset(path_train_LHF_nsDELQ)

In [14]:
# Subsample by a factor 50
End_ind = int(RH_da['vars'].shape[0]/50)

In [21]:
RH_red = RH_da['vars'][:End_ind,:]
RH_red.values = np.float32(RH_red.values)
RH_red.to_netcdf(path_data+'2023_20_03_RH_training_reduced.nc',mode='w')

In [22]:
BMSE_red = BMSE_da['vars'][:End_ind,:]
BMSE_red.values = np.float32(BMSE_red.values)
BMSE_red.to_netcdf(path_data+'2023_20_03_BMSE_training_reduced.nc',mode='w')

In [31]:
LHFns_red = LHFns_da['vars'][:End_ind,:]
LHFns_red.values = np.float32(LHFns_red.values)
LHFns_red.to_netcdf(path_data+'2023_20_03_LHFnsDELQ_training_reduced.nc',mode='w')

In [30]:
print(LHFns_da['var_names'].values)

['QBP' 'QBP' 'QBP' 'QBP' 'QBP' 'QBP' 'QBP' 'QBP' 'QBP' 'QBP' 'QBP' 'QBP'
 'QBP' 'QBP' 'QBP' 'QBP' 'QBP' 'QBP' 'QBP' 'QBP' 'QBP' 'QBP' 'QBP' 'QBP'
 'QBP' 'QBP' 'QBP' 'QBP' 'QBP' 'QBP' 'TBP' 'TBP' 'TBP' 'TBP' 'TBP' 'TBP'
 'TBP' 'TBP' 'TBP' 'TBP' 'TBP' 'TBP' 'TBP' 'TBP' 'TBP' 'TBP' 'TBP' 'TBP'
 'TBP' 'TBP' 'TBP' 'TBP' 'TBP' 'TBP' 'TBP' 'TBP' 'TBP' 'TBP' 'TBP' 'TBP'
 'O3_AQUA' 'O3_AQUA' 'O3_AQUA' 'O3_AQUA' 'O3_AQUA' 'O3_AQUA' 'O3_AQUA'
 'O3_AQUA' 'O3_AQUA' 'O3_AQUA' 'O3_AQUA' 'O3_AQUA' 'O3_AQUA' 'O3_AQUA'
 'O3_AQUA' 'O3_AQUA' 'O3_AQUA' 'O3_AQUA' 'O3_AQUA' 'O3_AQUA' 'O3_AQUA'
 'O3_AQUA' 'O3_AQUA' 'O3_AQUA' 'O3_AQUA' 'O3_AQUA' 'O3_AQUA' 'O3_AQUA'
 'O3_AQUA' 'O3_AQUA' 'PS' 'SOLIN' 'SHFLX' 'LHF_nsDELQ' 'PHQ' 'PHQ' 'PHQ'
 'PHQ' 'PHQ' 'PHQ' 'PHQ' 'PHQ' 'PHQ' 'PHQ' 'PHQ' 'PHQ' 'PHQ' 'PHQ' 'PHQ'
 'PHQ' 'PHQ' 'PHQ' 'PHQ' 'PHQ' 'PHQ' 'PHQ' 'PHQ' 'PHQ' 'PHQ' 'PHQ' 'PHQ'
 'PHQ' 'PHQ' 'PHQ' 'TPHYSTND' 'TPHYSTND' 'TPHYSTND' 'TPHYSTND' 'TPHYSTND'
 'TPHYSTND' 'TPHYSTND' 'TPHYSTND' 'TPHYSTND' 'TPHYSTND' 'T