Skip to content

Commit

Permalink
bugfix for sparse mesh save when rank 0 lacks proper info to create o… (
Browse files Browse the repository at this point in the history
#1062)

* bugfix for sparse mesh save when rank 0 lacks proper info to create output dir

* fix another bug
  • Loading branch information
cyrush committed Jan 7, 2023
1 parent c30a690 commit 003c657
Show file tree
Hide file tree
Showing 3 changed files with 324 additions and 0 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ Notable changes to Conduit are documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project aspires to adhere to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## Unreleased

### Fixed

#### Relay
- Fixed a directory creation bug in `relay::io::blueprint::{save_mesh|write_mesh}` that occurred with sparse topologies with no domains on rank 0.
- Fixed a bug in `relay::io::blueprint::{save_mesh|write_mesh}` with the `suffix=cycle` option that could cause int max to erroneously be used as the cycle number in the output directory.


## [0.8.5] - Released 2022-12-22

Expand Down
44 changes: 44 additions & 0 deletions src/libs/relay/conduit_relay_io_blueprint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1029,6 +1029,10 @@ void write_mesh(const Node &mesh,
opts_suffix = "none";
}
}
else if(opts_suffix == "cycle")
{
cycle = dom["state/cycle"].to_int();
}
else if(opts_suffix == "default")
{
cycle = dom["state/cycle"].to_int();
Expand All @@ -1045,6 +1049,46 @@ void write_mesh(const Node &mesh,
mpi_comm);

cycle = n_reduced.as_int();

// we also need to have all mpi tasks agree on the `opts_suffix`
// checking the first mpi task with domains should be sufficient.
// find first
n_local = local_num_domains;
n_reduced.reset();

relay::mpi::all_gather(n_local,
n_reduced,
mpi_comm);


index_t_accessor counts = n_reduced.value();
index_t idx = -1;
index_t i =0;
NodeConstIterator counts_itr = n_reduced.children();
while(counts_itr.has_next() && idx < 0)
{
const Node &curr = counts_itr.next();
index_t count = curr.to_index_t();
if(count > 0)
{
idx = i;
}
i++;
}

// now broadcast from idx
Node n_opts_suffix;
if(par_rank == idx)
{
n_opts_suffix = opts_suffix;
}

conduit::relay::mpi::broadcast_using_schema(n_opts_suffix,
idx,
mpi_comm);

opts_suffix = n_opts_suffix.as_string();

#endif

// -----------------------------------------------------------
Expand Down
272 changes: 272 additions & 0 deletions src/tests/blueprint/t_blueprint_mpi_mesh_relay.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -721,6 +721,278 @@ TEST(blueprint_mpi_relay, test_write_error_hang)
info.print();
}

}

//-----------------------------------------------------------------------------
// note: sparse topo tests are from ascent usecases
TEST(blueprint_mpi_relay, test_sparse_domains_case_1)
{
Node io_protos;
relay::io::about(io_protos["io"]);
bool hdf5_enabled = io_protos["io/protocols/hdf5"].as_string() == "enabled";
if(!hdf5_enabled)
{
CONDUIT_INFO("HDF5 disabled, skipping test_sparse_domains_case_1 test");
return;
}

MPI_Comm comm = MPI_COMM_WORLD;
int par_rank = mpi::rank(comm);
int par_size = mpi::size(comm);

Node data;
ostringstream oss;

// create mesh where each rank has three domains with different topos
for(index_t d =0; d<3; d++)
{
Node &mesh = data.append();

mesh["state/cycle"] = 0;

oss.str("");
oss << "my_coords_rank_" << par_rank << "_" << d;
std::string c_name = oss.str();

oss.str("");
oss << "my_topo_rank_" << par_rank << "_" << d;
std::string t_name = oss.str();

oss.str("");
oss << "my_field_rank_" << par_rank << "_" << d;
std::string f_name = oss.str();

// create the coordinate set
mesh["coordsets"][c_name]["type"] = "uniform";
mesh["coordsets"][c_name]["dims/i"] = 3;
mesh["coordsets"][c_name]["dims/j"] = 3;
mesh["coordsets"][c_name]["origin/x"] = -10.0;
mesh["coordsets"][c_name]["origin/y"] = -10.0;
mesh["coordsets"][c_name]["spacing/dx"] = 10.0;
mesh["coordsets"][c_name]["spacing/dy"] = 10.0;

mesh["topologies"][t_name]["type"] = "uniform";
mesh["topologies"][t_name]["coordset"] = c_name;

mesh["fields"][f_name]["association"] = "element";
mesh["fields"][f_name]["topology"] = t_name;
mesh["fields"][f_name]["values"].set(DataType::float64(4));

float64 *ele_vals_ptr = mesh["fields"][f_name]["values"].value();

for(int i=0;i<4;i++)
{
ele_vals_ptr[i] = float64(d);
}
}

Node verify_info;
EXPECT_TRUE(conduit::blueprint::mesh::verify(data,verify_info));

Node opts; // empty for now
std::string tout_base = "tout_relay_mpi_sparse_case_1_hdf5";

remove_path_if_exists(tout_base + ".cycle_000000.root");
conduit::relay::mpi::io::blueprint::save_mesh(data,
tout_base,
"hdf5",
opts,
comm);
EXPECT_TRUE(conduit::utils::is_file(tout_base + ".cycle_000000.root"));

}

//-----------------------------------------------------------------------------
// note: sparse topo tests are from ascent usecases
TEST(blueprint_mpi_relay, test_sparse_domains_case_2)
{
Node io_protos;
relay::io::about(io_protos["io"]);
bool hdf5_enabled = io_protos["io/protocols/hdf5"].as_string() == "enabled";
if(!hdf5_enabled)
{
CONDUIT_INFO("HDF5 disabled, skipping test_sparse_domains_case_2 test");
return;
}

MPI_Comm comm = MPI_COMM_WORLD;
int par_rank = mpi::rank(comm);
int par_size = mpi::size(comm);

//
// Create an example mesh.
//

Node data;
ostringstream oss;

// rank 1 have 3 domains, rank zero none
if(par_rank > 0)
{
// three domains with different topos
for(index_t d =0; d<3; d++)
{
Node &mesh = data.append();

mesh["state/cycle"] = 0;

oss.str("");
oss << "my_coords_rank_" << par_rank << "_" << d;
std::string c_name = oss.str();

oss.str("");
oss << "my_topo_rank_" << par_rank << "_" << d;
std::string t_name = oss.str();

oss.str("");
oss << "my_field_rank_" << par_rank << "_" << d;
std::string f_name = oss.str();

// create the coordinate set
mesh["coordsets"][c_name]["type"] = "uniform";
mesh["coordsets"][c_name]["dims/i"] = 3;
mesh["coordsets"][c_name]["dims/j"] = 3;
// add origin and spacing to the coordset (optional)
mesh["coordsets"][c_name]["origin/x"] = -10.0;
mesh["coordsets"][c_name]["origin/y"] = -10.0;
mesh["coordsets"][c_name]["spacing/dx"] = 10.0;
mesh["coordsets"][c_name]["spacing/dy"] = 10.0;

// add the topology
// this case is simple b/c it's implicitly derived from the coordinate set
mesh["topologies"][t_name]["type"] = "uniform";
// reference the coordinate set by name
mesh["topologies"][t_name]["coordset"] = c_name;

// add a simple element-associated field
mesh["fields"][f_name]["association"] = "element";
// reference the topology this field is defined on by name
mesh["fields"][f_name]["topology"] = t_name;
// set the field values, for this case we have 4 elements
mesh["fields"][f_name]["values"].set(DataType::float64(4));

float64 *ele_vals_ptr = mesh["fields"][f_name]["values"].value();

for(int i=0;i<4;i++)
{
ele_vals_ptr[i] = float64(d);
}
}

Node verify_info;
EXPECT_TRUE(conduit::blueprint::mesh::verify(data,verify_info));
}

Node opts; // empty for now
std::string tout_base = "tout_relay_mpi_sparse_case_2_hdf5";

remove_path_if_exists(tout_base + ".cycle_000000.root");
conduit::relay::mpi::io::blueprint::save_mesh(data,
tout_base,
"hdf5",
opts,
comm);
EXPECT_TRUE(conduit::utils::is_file(tout_base + ".cycle_000000.root"));



}

//-----------------------------------------------------------------------------
// note: sparse topo tests are from ascent usecases
TEST(blueprint_mpi_relay, test_sparse_domains_case_3)
{
Node io_protos;
relay::io::about(io_protos["io"]);
bool hdf5_enabled = io_protos["io/protocols/hdf5"].as_string() == "enabled";
if(!hdf5_enabled)
{
CONDUIT_INFO("HDF5 disabled, skipping test_sparse_domains_case_3 test");
return;
}

MPI_Comm comm = MPI_COMM_WORLD;
int par_rank = mpi::rank(comm);
int par_size = mpi::size(comm);

//
// Create an example mesh.
//

Node data;
ostringstream oss;

// rank 1 have 3 domains, rank zero none
if(par_rank > 0)
{
// three domains with different topos
for(index_t d =0; d<3; d++)
{
Node &mesh = data.append();

mesh["state/cycle"] = 0;

oss.str("");
oss << "my_coords_rank_" << par_rank << "_" << d;
std::string c_name = oss.str();

oss.str("");
oss << "my_topo_rank_" << par_rank << "_" << d;
std::string t_name = oss.str();

oss.str("");
oss << "my_field_rank_" << par_rank << "_" << d;
std::string f_name = oss.str();

// create the coordinate set
mesh["coordsets"][c_name]["type"] = "uniform";
mesh["coordsets"][c_name]["dims/i"] = 3;
mesh["coordsets"][c_name]["dims/j"] = 3;
// add origin and spacing to the coordset (optional)
mesh["coordsets"][c_name]["origin/x"] = -10.0;
mesh["coordsets"][c_name]["origin/y"] = -10.0;
mesh["coordsets"][c_name]["spacing/dx"] = 10.0;
mesh["coordsets"][c_name]["spacing/dy"] = 10.0;

// add the topology
// this case is simple b/c it's implicitly derived from the coordinate set
mesh["topologies"][t_name]["type"] = "uniform";
// reference the coordinate set by name
mesh["topologies"][t_name]["coordset"] = c_name;

// add a simple element-associated field
mesh["fields"][f_name]["association"] = "element";
// reference the topology this field is defined on by name
mesh["fields"][f_name]["topology"] = t_name;
// set the field values, for this case we have 4 elements
mesh["fields"][f_name]["values"].set(DataType::float64(4));

float64 *ele_vals_ptr = mesh["fields"][f_name]["values"].value();

for(int i=0;i<4;i++)
{
ele_vals_ptr[i] = float64(d);
}
}

Node verify_info;
EXPECT_TRUE(conduit::blueprint::mesh::verify(data,verify_info));
}

Node opts;
opts["suffix"] = "cycle";
std::string tout_base = "tout_relay_mpi_sparse_case_3_hdf5";

remove_path_if_exists(tout_base + ".cycle_000000.root");
conduit::relay::mpi::io::blueprint::save_mesh(data,
tout_base,
"hdf5",
opts,
comm);
EXPECT_TRUE(conduit::utils::is_file(tout_base + ".cycle_000000.root"));



}


Expand Down

0 comments on commit 003c657

Please sign in to comment.