diff --git a/CHANGELOG.md b/CHANGELOG.md index b9db747..2fa812e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Do not remap groups of restarts when it is not necessary, make copies instead. - Addition of SST and FRACI forecast (ocean) boundary conditions generation capability in `pre/prepare_ocnExtData` - Added EASE grid option for remapping of land restarts in remap_restarts.py package (facilitates use of package in GEOSldas setup script) - Added support for SLES15, NAS site and log for remap_lake_landice_saltwater in remap_restarts.py package diff --git a/pre/remap_restart/remap_base.py b/pre/remap_restart/remap_base.py index f9ee881..d018911 100755 --- a/pre/remap_restart/remap_base.py +++ b/pre/remap_restart/remap_base.py @@ -33,3 +33,59 @@ def remove_merra2(self): if self.config['input']['shared']['MERRA-2']: print(" remove temporary folder that contains MERRA-2 archived files ... \n") subprocess.call(['/bin/rm', '-rf', self.config['input']['shared']['rst_dir']]) + + def copy_without_remap(self, restarts_in, compared_file_in, compared_file_out, suffix, catch=False): +# +# Determine if remapping is needed for a group of restart files, or if the input restart files +# can just simply be copied to the output dir, based on the following dependency table: +# +# restarts | agrid/stretch | #levels | topo files | tile file | bcs version +# ---------------------------------------------------------------------------- +# upper air | X | X | X | | +# catch/vegdyn | X | | | X | X +# landice | X | | | X | +# + config = self.config + in_agrid = config['input']['shared']['agrid'] + out_agrid = config['output']['shared']['agrid'] + out_levels = config['output']['air']['nlevel'] + in_bc_version = config['input']['shared']['bc_version'] + out_bc_version = config['output']['shared']['bc_version'] + in_stretch = config['input']['shared']['stretch'] + out_stretch = config['output']['shared']['stretch'] + + if (in_agrid == out_agrid and in_stretch == out_stretch): + + for rst in restarts_in : + if 'fvcore_internal' in rst: + fvrst = nc.Dataset(rst) + in_levels = fvrst.dimensions['lev'].size + if in_levels != int(out_levels): return False + + cmd = 'diff -q ' + compared_file_in + ' ' + compared_file_out + print('\n' + cmd) + diff = subprocess.call(shlex.split(cmd)) + # diff = 0 means no difference + if diff != 0: return False + + # for catchment, even tile files are the same, if bc is different, it still need remap + if (catch) : + if in_bc_version != out_bc_version : return False + + expid = config['output']['shared']['expid'] + if (expid) : + expid = expid + '.' + else: + expid = '' + out_dir = config['output']['shared']['out_dir'] + + print('\nCopy restart files from orignal restart files without remapping. \n') + for rst in restarts_in : + f = expid + os.path.basename(rst).split('_rst')[0].split('.')[-1]+'_rst.'+suffix + cmd = '/bin/cp ' + rst + ' ' + out_dir+'/'+f + print('\n'+cmd) + subprocess.call(shlex.split(cmd)) + + return True + + return False diff --git a/pre/remap_restart/remap_catchANDcn.py b/pre/remap_restart/remap_catchANDcn.py index 59af862..8481707 100755 --- a/pre/remap_restart/remap_catchANDcn.py +++ b/pre/remap_restart/remap_catchANDcn.py @@ -43,7 +43,6 @@ def remap(self): return in_rstfile = in_rstfiles[0] - print("\nRemapping " + model + ".....\n") cwdir = os.getcwd() bindir = os.path.dirname(os.path.realpath(__file__)) @@ -84,6 +83,19 @@ def remap(self): out_bc_landdir = get_landdir(out_bc_base, out_bc_version, agrid=agrid, ogrid=ogrid, omodel=omodel, stretch=stretch, grid=EASE_grid) + label = get_label(config) + + suffix = time+'z.nc4' + label + + if (expid) : + expid = expid + '.' + else: + expid = '' + + no_remap = self.copy_without_remap([in_rstfile], in_tilefile, out_tilefile, suffix, catch=True) + if (no_remap) : return + + print("\nRemapping " + model + ".....\n") # determine NPE based on *approximate* number of input and output tile in_Ntile = 0 @@ -131,14 +143,6 @@ def remap(self): account = config['slurm_pbs']['account'] # even if the (MERRA-2) input restarts are binary, the output restarts will always be nc4 (remap_bin2nc.py) - label = get_label(config) - - suffix = time+'z.nc4' + label - - if (expid) : - expid = expid + '.' - else: - expid = '' suffix = '_rst.' + suffix out_rstfile = expid + os.path.basename(in_rstfile).split('_rst')[0].split('.')[-1]+suffix @@ -433,6 +437,8 @@ def remap_land_only(): catch = catchANDcn(params_file=config_yaml) catch.remap() + + if __name__ == '__main__' : remap_land_only() diff --git a/pre/remap_restart/remap_lake_landice_saltwater.py b/pre/remap_restart/remap_lake_landice_saltwater.py index 5603e1c..ca2f9cd 100755 --- a/pre/remap_restart/remap_lake_landice_saltwater.py +++ b/pre/remap_restart/remap_lake_landice_saltwater.py @@ -37,12 +37,45 @@ def remap(self): config = self.config cwdir = os.getcwd() bindir = os.path.dirname(os.path.realpath(__file__)) + in_bc_base = config['input']['shared']['bc_base'] in_bc_version = config['input']['shared']['bc_version'] out_bc_base = config['output']['shared']['bc_base'] out_bc_version= config['output']['shared']['bc_version'] + agrid = config['input']['shared']['agrid'] + ogrid = config['input']['shared']['ogrid'] + omodel = config['input']['shared']['omodel'] + stretch = config['input']['shared']['stretch'] + in_geomdir = get_geomdir(in_bc_base, in_bc_version, agrid=agrid, ogrid=ogrid, omodel=omodel, stretch=stretch) + in_tile_file = glob.glob(in_geomdir+ '/*-Pfafstetter.til')[0] + + agrid = config['output']['shared']['agrid'] + ogrid = config['output']['shared']['ogrid'] + omodel = config['output']['shared']['omodel'] + stretch = config['output']['shared']['stretch'] + out_geomdir = get_geomdir(out_bc_base, out_bc_version, agrid=agrid, ogrid=ogrid, omodel=omodel, stretch=stretch) + out_tile_file = glob.glob(out_geomdir+ '/*-Pfafstetter.til')[0] + + types = '.bin' + type_str = sp.check_output(['file','-b', os.path.realpath(restarts_in[0])]) + type_str = str(type_str) + if 'Hierarchical' in type_str: + types = '.nc4' + yyyymmddhh_ = str(config['input']['shared']['yyyymmddhh']) + + label = get_label(config) + suffix = yyyymmddhh_[0:8]+'_'+yyyymmddhh_[8:10] +'z' + types + label + out_dir = config['output']['shared']['out_dir'] + expid = config['output']['shared']['expid'] + if (expid) : + expid = expid + '.' + else: + expid = '' + + no_remap = self.copy_without_remap(restarts_in, in_tile_file, out_tile_file, suffix) + if no_remap : return if not os.path.exists(out_dir) : os.makedirs(out_dir) print( "cd " + out_dir) @@ -58,15 +91,6 @@ def remap(self): print ("mkdir " + OutData_dir) os.makedirs(OutData_dir) - types = '.bin' - type_str = sp.check_output(['file','-b', os.path.realpath(restarts_in[0])]) - type_str = str(type_str) - if 'Hierarchical' in type_str: - types = '.nc4' - yyyymmddhh_ = str(config['input']['shared']['yyyymmddhh']) - - label = get_label(config) - suffix = yyyymmddhh_[0:8]+'_'+yyyymmddhh_[8:10] +'z' + types + label saltwater = '' seaice = '' @@ -87,20 +111,6 @@ def remap(self): if 'roue' in f : route = f if 'openwater' in f : openwater = f - agrid = config['input']['shared']['agrid'] - ogrid = config['input']['shared']['ogrid'] - omodel = config['input']['shared']['omodel'] - stretch = config['input']['shared']['stretch'] - in_geomdir = get_geomdir(in_bc_base, in_bc_version, agrid=agrid, ogrid=ogrid, omodel=omodel, stretch=stretch) - in_tile_file = glob.glob(in_geomdir+ '/*-Pfafstetter.til')[0] - - agrid = config['output']['shared']['agrid'] - ogrid = config['output']['shared']['ogrid'] - omodel = config['output']['shared']['omodel'] - stretch = config['output']['shared']['stretch'] - out_geomdir = get_geomdir(out_bc_base, out_bc_version, agrid=agrid, ogrid=ogrid, omodel=omodel, stretch=stretch) - out_tile_file = glob.glob(out_geomdir+ '/*-Pfafstetter.til')[0] - in_til = InData_dir+'/' + os.path.basename(in_tile_file) out_til = OutData_dir+'/'+ os.path.basename(out_tile_file) @@ -152,11 +162,6 @@ def remap(self): cmd = route + out_til + ' ' + yyyymmddhh_[0:6] self.run_and_log(cmd, log_name) - expid = config['output']['shared']['expid'] - if (expid) : - expid = expid + '.' - else: - expid = '' suffix = '_rst.' + suffix for out_rst in glob.glob("OutData/*_rst*"): filename = expid + os.path.basename(out_rst).split('_rst')[0].split('.')[-1]+suffix diff --git a/pre/remap_restart/remap_restarts.py b/pre/remap_restart/remap_restarts.py index 78ae59f..f2a380f 100755 --- a/pre/remap_restart/remap_restarts.py +++ b/pre/remap_restart/remap_restarts.py @@ -26,24 +26,28 @@ program_description = textwrap.dedent(f''' USAGE: - This script provides three options for remapping GEOS restart files: + This script provides four options for remapping GEOS restart files: - 1. Use the interactive questionary: + 1. Use the interactive questionnaire (recommended): ./remap_restarts.py - The questionary concludes with the option to submit the remapping job. + The questionnaire concludes with the option to submit the remapping job. It also creates a yaml configuration file (`remap_params.yaml`) and - a command line options string (`remap_restarts.CMD`), which can be edited - manually and used in the other two ways of running `remap_restarts.py`. + a matching command line argument string (`remap_restarts.CMD`), which can be edited + manually and used in the next two run options. - 2. Use an existing yaml config file: + 2. Use an existing yaml config file*: ./remap_restarts.py config_file -c my_config.yaml - 3. Use command line arguments: - ./remap_restarts.py command_line -ymdh 2004041421 .... + 3. Use command line arguments*: + ./remap_restarts.py command_line -ymdh 2004041421 ... 4. For GEOSldas: Remap land (catch[cn]) restart only; global domain only; ens0000 only: ./remap_restarts.py land_only + *NOTE: The yaml and command-line interfaces may not backward compatible across + releases. If existing yaml files or command-line strings do not work, + use the questionnaire option to generate updated versions. + Help commands: ./remap_restarts.py -h ./remap_restarts.py config_file -h diff --git a/pre/remap_restart/remap_upper.py b/pre/remap_restart/remap_upper.py index 1f28c8a..6704d8d 100755 --- a/pre/remap_restart/remap_upper.py +++ b/pre/remap_restart/remap_upper.py @@ -17,6 +17,7 @@ from remap_base import remap_base from remap_utils import * from remap_bin2nc import bin2nc +import netCDF4 as nc class upperair(remap_base): def __init__(self, **configs): @@ -59,18 +60,7 @@ def remap(self): out_dir = config['output']['shared']['out_dir'] if not os.path.exists(out_dir) : os.makedirs(out_dir) - print( "cd " + out_dir) - os.chdir(out_dir) - - tmpdir = out_dir+'/upper_data/' - if os.path.exists(tmpdir) : subprocess.call(['rm', '-rf',tmpdir]) - print ("mkdir " + tmpdir) - os.makedirs(tmpdir) - - print( "cd " + tmpdir) - os.chdir(tmpdir) - print('\nUpper air restart file names link from "_rst" to "_restart_in" \n') types = '.bin' type_str = subprocess.check_output(['file','-b', os.path.realpath(restarts_in[0])]) type_str = str(type_str) @@ -80,29 +70,18 @@ def remap(self): label = get_label(config) suffix = yyyymmddhh_[0:8]+'_'+yyyymmddhh_[8:10] +'z' + types + label - for rst in restarts_in : - f = os.path.basename(rst).split('_rst')[0].split('.')[-1]+'_restart_in' - cmd = '/bin/ln -s ' + rst + ' ' + f - print('\n'+cmd) - subprocess.call(shlex.split(cmd)) - + in_bc_base = config['input']['shared']['bc_base'] + if "gmao_SIteam/ModelData" in in_bc_base: + assert GEOS_SITE == "NAS", "wrong site to run the package" + in_bc_version = config['input']['shared']['bc_version'] agrid = config['input']['shared']['agrid'] ogrid = config['input']['shared']['ogrid'] omodel = config['input']['shared']['omodel'] stretch = config['input']['shared']['stretch'] topo_bcsdir = get_topodir(in_bc_base, in_bc_version, agrid=agrid, ogrid=ogrid, omodel=omodel, stretch=stretch) - - if "gmao_SIteam/ModelData" in in_bc_base: - assert GEOS_SITE == "NAS", "wrong site to run the package" - topoin = glob.glob(topo_bcsdir+'/topo_DYN_ave*.data')[0] - # link topo file - - cmd = '/bin/ln -s ' + topoin + ' .' - print('\n'+cmd) - subprocess.call(shlex.split(cmd)) out_bc_base = config['output']['shared']['bc_base'] out_bc_version = config['output']['shared']['bc_version'] @@ -111,15 +90,44 @@ def remap(self): omodel = config['output']['shared']['omodel'] stretch = config['output']['shared']['stretch'] topo_bcsdir = get_topodir(out_bc_base, out_bc_version, agrid=agrid, ogrid=ogrid, omodel=omodel, stretch=stretch) - topoout = glob.glob(topo_bcsdir+'/topo_DYN_ave*.data')[0] + + expid = config['output']['shared']['expid'] + if (expid) : + expid = expid + '.' + else: + expid = '' + + no_remap = self.copy_without_remap(restarts_in, topoin, topoout, suffix) + if (no_remap) : return + + print( "cd " + out_dir) + os.chdir(out_dir) + + tmpdir = out_dir+'/upper_data/' + if os.path.exists(tmpdir) : subprocess.call(['rm', '-rf',tmpdir]) + print ("mkdir " + tmpdir) + os.makedirs(tmpdir) + + print( "cd " + tmpdir) + os.chdir(tmpdir) + + print('\nUpper air restart file names link from "_rst" to "_restart_in" \n') + for rst in restarts_in : + f = os.path.basename(rst).split('_rst')[0].split('.')[-1]+'_restart_in' + cmd = '/bin/ln -s ' + rst + ' ' + f + print('\n'+cmd) + subprocess.call(shlex.split(cmd)) + + # link topo file + + cmd = '/bin/ln -s ' + topoin + ' .' + print('\n'+cmd) + subprocess.call(shlex.split(cmd)) + cmd = '/bin/ln -s ' + topoout + ' topo_dynave.data' print('\n'+cmd) subprocess.call(shlex.split(cmd)) - #fname = os.path.basename(topoout) - #cmd = '/bin/ln -s ' + fname + ' topo_dynave.data' - #print('\n'+cmd) - #subprocess.call(shlex.split(cmd)) agrid = config['output']['shared']['agrid'] if agrid[0].upper() == 'C': @@ -307,11 +315,6 @@ def remap(self): # # post process # - expid = config['output']['shared']['expid'] - if (expid) : - expid = expid + '.' - else: - expid = '' suffix = '_rst.' + suffix for out_rst in glob.glob("*_rst*"): @@ -371,6 +374,7 @@ def copy_merra2(self): merra_2_rst_dir + expid+'.gocart_internal_rst.' + suffix, merra_2_rst_dir + expid+'.pchem_internal_rst.' + suffix, merra_2_rst_dir + expid+'.agcm_import_rst.' + suffix ] + bin2nc_yaml = ['bin2nc_merra2_fv.yaml', 'bin2nc_merra2_moist.yaml', 'bin2nc_merra2_gocart.yaml', 'bin2nc_merra2_pchem.yaml','bin2nc_merra2_agcm.yaml'] bin_path = os.path.dirname(os.path.realpath(__file__)) for (f, yf) in zip(upperin,bin2nc_yaml) :