From 5ae867d3c8430fb289e47cd033acb48fd27c97a4 Mon Sep 17 00:00:00 2001 From: alperaltuntas Date: Tue, 4 Mar 2025 13:29:05 -0700 Subject: [PATCH 1/4] update lbe.py to reflect changes in MOM6 source code --- cime_config/tools/lbe.py | 116 ++++++++++++++++++++++++++++++--------- 1 file changed, 89 insertions(+), 27 deletions(-) diff --git a/cime_config/tools/lbe.py b/cime_config/tools/lbe.py index ae032f7e..958fbc90 100755 --- a/cime_config/tools/lbe.py +++ b/cime_config/tools/lbe.py @@ -83,16 +83,19 @@ def gen_auto_mask_table( Output directory to write the mask table. """ - ds_topog = xr.open_dataset(topo_file_path) - ny, nx = ds_topog.mask.shape - ibuf = 2 jbuf = 2 num_masked_blocks = 0 - mask = np.zeros((ny + 2 * jbuf, nx + 2 * ibuf)) - - mask[jbuf : ny + jbuf, ibuf : nx + ibuf] = ds_topog.mask.data + ds_topog = xr.open_dataset(topo_file_path) + if 'mask' in ds_topog: + ny, nx = ds_topog.mask.shape + mask = np.zeros((ny + 2 * jbuf, nx + 2 * ibuf)) + mask[jbuf : ny + jbuf, ibuf : nx + ibuf] = ds_topog.mask.data + elif 'wet' in ds_topog: + ny, nx = ds_topog.wet.shape + mask = np.zeros((ny + 2 * jbuf, nx + 2 * ibuf)) + mask[jbuf : ny + jbuf, ibuf : nx + ibuf] = ds_topog.wet.data # fill in buffer cells if reentrant_x: @@ -123,29 +126,64 @@ def gen_auto_mask_table( # ratio of ocean cells to total number of cells glob_ocn_frac = mask[jbuf : ny + jbuf, ibuf : nx + ibuf].sum() / (ny * nx) + pfrac = 0.01 + max_feasible_p = 0 + target_io_pes = args.tiopes + found_feasible_layout = False + # Iteratively check for all possible division counts starting from the upper bound of npes/glob_ocn_frac, - # which is over-optimistic for realistic domains, but may be satisfied with idealized domains. - for p in range(int(np.ceil(npes / glob_ocn_frac)), npes, -1): - - # compute the layout for the current division count, p - idiv, jdiv = MOM_define_layout(nx, ny, p) - - # don't bother checking this p if the aspect ratio is extreme - r_p = (nx / idiv) / (ny / jdiv) - if r_p * r_extreme < 1.0 or r_extreme < r_p: - continue - - # Get the number of masked_blocks for this particular division count - mask_table = determine_land_blocks(mask, nx, ny, idiv, jdiv, ibuf, jbuf) - - # If we can eliminate enough blocks to reach the target npes, adopt - # this p (and the associated layout) and terminate the iteration. - num_masked_blocks = len(mask_table) - if p - num_masked_blocks <= npes: - print("Found the optimum layout for auto-masking. Terminating iteration...") - print(f"\t new ndivs: {p}, num_masked_blocks: {p-npes}") + # which is over-optimistic for realistic domains, but may be satisfied with idealized domains. The first encountered + # feasible division count is stored in max_feasible_p. If the target_io_pes is not achievable with this layout, + # the iteration continues until max_feasible_p * (1 - pfrac) is reached or the target_io_pes is satisfiable. + # If not, the target_io_pes is decremented and the iteration is re-done from max_feasible_p to max_feasible_p * (1 - pfrac). + + for i in range(target_io_pes, 0, -1): + + if found_feasible_layout: break + if (max_feasible_p == 0): # first iteration + p_up = int(np.ceil(npes / glob_ocn_frac)) + else: + p_up = max_feasible_p + + for p in range(p_up, npes, -1): + + # compute the layout for the current division count, p + idiv, jdiv = MOM_define_layout(nx, ny, p) + + # don't bother checking this p if the aspect ratio is extreme + ar = (nx / idiv) / (ny / jdiv) + if ar * r_extreme < 1.0 or r_extreme < ar: + continue + + # Get the number of masked_blocks for this particular division count + mask_table = determine_land_blocks(mask, nx, ny, idiv, jdiv, ibuf, jbuf) + + # If we can eliminate enough blocks to reach the target npes, adopt + # this p (and the associated layout) and terminate the iteration. + num_masked_blocks = len(mask_table) + + if p - num_masked_blocks <= npes: + print(f"ndivs: {p}, masked_blocks: {num_masked_blocks}", " idiv: ", idiv, "jdiv", jdiv) + + if max_feasible_p == 0: + print("^^^^^^^^^^^^^^^ first feasible layout ^^^^^^^^^^^^^^^") + max_feasible_p = p + if (idiv * jdiv) % i == 0: + idiv_io, jdiv_io = determine_io_layout(idiv, jdiv, i) + # if the io layout ratio is extreme, skip this layout + ar = (idiv / idiv_io) / (jdiv / jdiv_io) + if ar * r_extreme < 1.0 or r_extreme < ar: + continue + print(f"IO layout: {idiv_io} x {jdiv_io}") + print("Found the optimum layout for auto-masking. Terminating iteration.") + found_feasible_layout = True + break + + if p <= max_feasible_p * (1 - pfrac): + break + if num_masked_blocks == 0: raise RuntimeError( "Couldn't auto-eliminate any land blocks. Try to increase the number" @@ -153,7 +191,24 @@ def gen_auto_mask_table( # Call determine_land_blocks once again, this time to retrieve and write out the mask_table. mask_table = determine_land_blocks(mask, nx, ny, idiv, jdiv, ibuf, jbuf) - write_auto_mask_file(mask_table, idiv, jdiv, npes, output_dir) + +def determine_io_layout(idiv, jdiv, nio): + """Determines the optimal I/O layout given the number of partitions in x and y direction and the number of I/O PEs.""" + min_ratio_diff = float('inf') + best_idiv_io, best_jdiv_io = 1, nio + + for f in range(1, nio + 1): + if nio % f == 0: + idiv_io, jdiv_io = f, nio // f + + if idiv % idiv_io == 0 and jdiv % jdiv_io == 0: + ratio_diff = abs((idiv_io / jdiv_io) - (idiv / jdiv)) + + if ratio_diff < min_ratio_diff: + min_ratio_diff = ratio_diff + best_idiv_io, best_jdiv_io = idiv_io, jdiv_io + + return best_idiv_io, best_jdiv_io def write_auto_mask_file( @@ -209,6 +264,13 @@ def write_auto_mask_file( required=True, help="Number of MOM6 PEs (NTASKS_OCN)", ) + parser.add_argument( + "--tiopes", + default=1, + type=int, + required=False, + help="Number of target I/O PEs (NTASKS_IO) (default: 1)", + ) parser.add_argument( "-rx", default=False, From 75f030b71237a2cc205a1b9ed2f3a2194671e188 Mon Sep 17 00:00:00 2001 From: alperaltuntas Date: Tue, 4 Mar 2025 13:31:15 -0700 Subject: [PATCH 2/4] changes in MOM_input and input_nml to enable parallel IO and auto merging of partitioned nc files --- param_templates/MOM_input.yaml | 9 +++++++++ param_templates/input_nml.yaml | 4 ++++ param_templates/json/MOM_input.json | 8 ++++++++ param_templates/json/input_nml.json | 6 ++++++ 4 files changed, 27 insertions(+) diff --git a/param_templates/MOM_input.yaml b/param_templates/MOM_input.yaml index 5ffdb482..4f4ba9a3 100644 --- a/param_templates/MOM_input.yaml +++ b/param_templates/MOM_input.yaml @@ -3650,6 +3650,15 @@ Global: datatype: list value: $OCN_GRID in ["tx2_3v2", "tx0.25v1"]: True + TARGET_IO_PES: + description: | + When AUTO_MASKTABLE is enabled, target number of IO PEs. If the given target + number of IO PEs is not achievable, the target number of IO PEs is set to the + nearest smaller number of PEs that is achievable. + datatype: integer + value: + $OCN_GRID == "tx2_3v2": = - ( - $NTASKS_OCN // 256) + $OCN_GRID == "tx0.25v1": = - ( - $NTASKS_OCN // 128) GEOM_FILE: description: | default = ocean_geometry.nc diff --git a/param_templates/input_nml.yaml b/param_templates/input_nml.yaml index 687e76b8..c39d27c1 100644 --- a/param_templates/input_nml.yaml +++ b/param_templates/input_nml.yaml @@ -47,6 +47,10 @@ diag_manager_nml: else: 30 max_axes: values: 90 + auto_merge_nc: + values: + $OCN_GRID in ["tx2_3v2", "tx0.25v1"]: .true. + else: .false. mpp_io_nml: cf_compliance: diff --git a/param_templates/json/MOM_input.json b/param_templates/json/MOM_input.json index 193396ef..ed0d62e5 100644 --- a/param_templates/json/MOM_input.json +++ b/param_templates/json/MOM_input.json @@ -2959,6 +2959,14 @@ "$OCN_GRID in [\"tx2_3v2\", \"tx0.25v1\"]": true } }, + "TARGET_IO_PES": { + "description": "When AUTO_MASKTABLE is enabled, target number of IO PEs. If the given target\nnumber of IO PEs is not achievable, the target number of IO PEs is set to the\nnearest smaller number of PEs that is achievable.\n", + "datatype": "integer", + "value": { + "$OCN_GRID == \"tx2_3v2\"": "= - ( - $NTASKS_OCN // 256)", + "$OCN_GRID == \"tx0.25v1\"": "= - ( - $NTASKS_OCN // 128)" + } + }, "GEOM_FILE": { "description": "default = ocean_geometry.nc\nThe file into which to write the ocean geometry.\n", "datatype": "string", diff --git a/param_templates/json/input_nml.json b/param_templates/json/input_nml.json index 3982a00c..85535b37 100644 --- a/param_templates/json/input_nml.json +++ b/param_templates/json/input_nml.json @@ -55,6 +55,12 @@ }, "max_axes": { "values": 90 + }, + "auto_merge_nc": { + "values": { + "$OCN_GRID in [\"tx2_3v2\", \"tx0.25v1\"]": ".true.", + "else": ".false." + } } }, "mpp_io_nml": { From aea34905377ccc167560e36a57899388a0334cca Mon Sep 17 00:00:00 2001 From: alperaltuntas Date: Tue, 4 Mar 2025 15:59:44 -0700 Subject: [PATCH 3/4] black formatting for lbe.py --- cime_config/tools/lbe.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/cime_config/tools/lbe.py b/cime_config/tools/lbe.py index 958fbc90..09fccb64 100755 --- a/cime_config/tools/lbe.py +++ b/cime_config/tools/lbe.py @@ -88,11 +88,11 @@ def gen_auto_mask_table( num_masked_blocks = 0 ds_topog = xr.open_dataset(topo_file_path) - if 'mask' in ds_topog: + if "mask" in ds_topog: ny, nx = ds_topog.mask.shape mask = np.zeros((ny + 2 * jbuf, nx + 2 * ibuf)) mask[jbuf : ny + jbuf, ibuf : nx + ibuf] = ds_topog.mask.data - elif 'wet' in ds_topog: + elif "wet" in ds_topog: ny, nx = ds_topog.wet.shape mask = np.zeros((ny + 2 * jbuf, nx + 2 * ibuf)) mask[jbuf : ny + jbuf, ibuf : nx + ibuf] = ds_topog.wet.data @@ -128,7 +128,7 @@ def gen_auto_mask_table( pfrac = 0.01 max_feasible_p = 0 - target_io_pes = args.tiopes + target_io_pes = args.tiopes found_feasible_layout = False # Iteratively check for all possible division counts starting from the upper bound of npes/glob_ocn_frac, @@ -142,7 +142,7 @@ def gen_auto_mask_table( if found_feasible_layout: break - if (max_feasible_p == 0): # first iteration + if max_feasible_p == 0: # first iteration p_up = int(np.ceil(npes / glob_ocn_frac)) else: p_up = max_feasible_p @@ -165,7 +165,13 @@ def gen_auto_mask_table( num_masked_blocks = len(mask_table) if p - num_masked_blocks <= npes: - print(f"ndivs: {p}, masked_blocks: {num_masked_blocks}", " idiv: ", idiv, "jdiv", jdiv) + print( + f"ndivs: {p}, masked_blocks: {num_masked_blocks}", + " idiv: ", + idiv, + "jdiv", + jdiv, + ) if max_feasible_p == 0: print("^^^^^^^^^^^^^^^ first feasible layout ^^^^^^^^^^^^^^^") @@ -177,10 +183,12 @@ def gen_auto_mask_table( if ar * r_extreme < 1.0 or r_extreme < ar: continue print(f"IO layout: {idiv_io} x {jdiv_io}") - print("Found the optimum layout for auto-masking. Terminating iteration.") + print( + "Found the optimum layout for auto-masking. Terminating iteration." + ) found_feasible_layout = True break - + if p <= max_feasible_p * (1 - pfrac): break @@ -192,9 +200,10 @@ def gen_auto_mask_table( # Call determine_land_blocks once again, this time to retrieve and write out the mask_table. mask_table = determine_land_blocks(mask, nx, ny, idiv, jdiv, ibuf, jbuf) + def determine_io_layout(idiv, jdiv, nio): """Determines the optimal I/O layout given the number of partitions in x and y direction and the number of I/O PEs.""" - min_ratio_diff = float('inf') + min_ratio_diff = float("inf") best_idiv_io, best_jdiv_io = 1, nio for f in range(1, nio + 1): @@ -270,7 +279,7 @@ def write_auto_mask_file( type=int, required=False, help="Number of target I/O PEs (NTASKS_IO) (default: 1)", - ) + ) parser.add_argument( "-rx", default=False, From 66749e36cb4d02e21b5895a46ac168a47489d015 Mon Sep 17 00:00:00 2001 From: alperaltuntas Date: Tue, 4 Mar 2025 16:07:47 -0700 Subject: [PATCH 4/4] Set black version to 24.1 --- .github/workflows/general-ci-tests.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/general-ci-tests.yml b/.github/workflows/general-ci-tests.yml index f58eca40..474ba587 100644 --- a/.github/workflows/general-ci-tests.yml +++ b/.github/workflows/general-ci-tests.yml @@ -133,11 +133,10 @@ jobs: - uses: actions/checkout@v4 # Run black check - - uses: psf/black@stable + - uses: psf/black@24.1.0 with: options: "--check --verbose" src: "./cime_config" -