Skip to content

Commit

Permalink
Merge pull request #21 from LLNL/feature/elliott/reduce-ranks
Browse files Browse the repository at this point in the history
Feature/elliott/reduce ranks
  • Loading branch information
nselliott committed May 8, 2019
2 parents b8d1ae4 + 5ddb3ae commit 5061932
Show file tree
Hide file tree
Showing 7 changed files with 334 additions and 93 deletions.
10 changes: 8 additions & 2 deletions src/axom/sidre/core/Group.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1427,7 +1427,10 @@ void Group::load(const std::string& path,
SLIC_ERROR("Invalid protocol " << protocol << " for file load.");
}

renameOrWarn(new_name);
if (!new_name.empty())
{
renameOrWarn(new_name);
}
}

/*
Expand Down Expand Up @@ -1587,7 +1590,10 @@ void Group::load(const hid_t& h5_id,
SLIC_ERROR("Invalid protocol " << protocol << " for file load.");
}

renameOrWarn(new_name);
if (!new_name.empty())
{
renameOrWarn(new_name);
}
}

/*
Expand Down
30 changes: 21 additions & 9 deletions src/axom/sidre/docs/sphinx/parallel_io_concepts.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,29 @@ management (such as using burst buffers if available).

In typical usage, a run that calls ``read()`` on a certain set of files
should be executed on the same number of MPI ranks as the run that created
those files with a ``write()`` call. Still a ``read()`` call, if using
the "sidre_hdf5" protocol, can work when called from a greater number of
processors. If ``write()`` was executed on N ranks and ``read()`` is called
while running on M ranks (M > N), then data will be read into ranks 0 to N-1,
and all ranks higher than N-1 will receive no data.
those files with a ``write()`` call. However, if using the "sidre_hdf5"
protocol, there are some usage patterns that do not have this limitation.

A ``read()`` call using "sidre_hdf5" will work when called from a greater
number of processors. If ``write()`` was executed on N ranks and ``read()``
is called while running on M ranks (M > N), then data will be read into ranks
0 to N-1, and all ranks higher than N-1 will receive no data.

If ``read()`` is called using "sidre_hdf5" to read data that was created on
a larger number of processors, this will work only in the case that the data
was written in a file-per-processor mode (M ranks to M files). In this case
the data in the Group being filled with file input will look a bit different
than in other usage patterns, since a Group on one rank will end up with data
from multiple ranks. An integer scalar View named ``reduced_input_ranks``
will be added to the Group with the value being the number of ranks that
wrote the files. The data from each output rank will be read into subgroups
located at ``rank_{%07d}/sidre_input`` in the input Group's data hierarchy.

.. warning::
If ``read()`` is called in an attempt to read data that was created on a
larger number of processors than the current run, an error will occur.
Support for this type of usage is intended to be added in future
releases.
If ``read()`` is called to read data that was created on a larger
number of processors than the current run with files produced in M-to-N
mode (M > N), an error will occur. Support for this type of usage is
intended to be added in future releases.

In the following example, an IOManager is created and used to write the contents
of the Group "root" in parallel.
Expand Down
39 changes: 19 additions & 20 deletions src/axom/sidre/spio/IOBaton.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,47 +38,48 @@ IOBaton::IOBaton(MPI_Comm comm,
MPI_Comm_rank(comm, &m_my_rank);
m_num_files = num_files;
m_num_groups = num_groups;

int active_comm_size = m_comm_size;
if (m_comm_size > m_num_groups)
{
active_comm_size = m_num_groups;
}
m_num_larger_groups = active_comm_size % num_files;
m_num_larger_sets = active_comm_size % num_files;
if (m_my_rank < active_comm_size)
{
m_group_size = active_comm_size / m_num_files; // ?
m_set_size = active_comm_size / m_num_files;
}
else
{
m_group_size = 1;
m_set_size = 1;
}
m_first_regular_group_rank = (m_group_size + 1) * m_num_larger_groups;
if (m_my_rank < m_first_regular_group_rank)
m_first_regular_set_rank = (m_set_size + 1) * m_num_larger_sets;
if (m_my_rank < m_first_regular_set_rank)
{
m_group_id = m_my_rank / (m_group_size + 1);
m_rank_within_group = m_my_rank % (m_group_size + 1);
if (m_rank_within_group < m_group_size)
m_set_id = m_my_rank / (m_set_size + 1);
m_rank_within_set = m_my_rank % (m_set_size + 1);
if (m_rank_within_set < m_set_size)
{
m_rank_after_me = m_my_rank + 1;
}
}
else if (m_my_rank < active_comm_size)
{
m_group_id = m_num_larger_groups +
(m_my_rank - m_first_regular_group_rank) / m_group_size;
m_rank_within_group = (m_my_rank - m_first_regular_group_rank) %
m_group_size;
if (m_rank_within_group < m_group_size - 1)
m_set_id = m_num_larger_sets +
(m_my_rank - m_first_regular_set_rank) / m_set_size;
m_rank_within_set = (m_my_rank - m_first_regular_set_rank) %
m_set_size;
if (m_rank_within_set < m_set_size - 1)
{
m_rank_after_me = m_my_rank + 1;
}
}
else
{
m_group_id = m_my_rank;
m_rank_within_group = 0;
m_set_id = m_my_rank;
m_rank_within_set = 0;
}
if (m_rank_within_group > 0)
if (m_rank_within_set > 0)
{
m_rank_before_me = m_my_rank - 1;
}
Expand Down Expand Up @@ -107,12 +108,12 @@ int IOBaton::wait()
m_mpi_tag, m_mpi_comm, &mpi_stat);
if (mpi_err == MPI_SUCCESS)
{
return_val = m_group_id;
return_val = m_set_id;
}
}
else
{
return_val = m_group_id;
return_val = m_set_id;
}
return return_val;
}
Expand All @@ -134,7 +135,5 @@ int IOBaton::pass()
}




} /* end namespace sidre */
} /* end namespace axom */
36 changes: 19 additions & 17 deletions src/axom/sidre/spio/IOBaton.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ namespace sidre
* \brief IOBaton ensures that during I/O operations, only one rank will
* interact with a particular file at one time.
*
* Each rank is placed into a group of ranks, with the number of groups being
* Each rank is placed into a set of ranks, with the number of sets being
* equal to the number of I/O files, and then the ranks use the wait and
* pass methods to pass control of I/O operations from one rank to the next
* within the group.
* within the set.
*/
class IOBaton
{
Expand All @@ -64,7 +64,7 @@ class IOBaton
/*!
* \brief Wait for previous rank to pass control to the local rank.
*
* \return An integer id for the group of which this rank is a member.
* \return An integer id for the set of which this rank is a member.
*/
int wait();

Expand All @@ -76,26 +76,26 @@ class IOBaton
int pass();

/*!
* \brief Size of local rank's group.
* \brief Size of local rank's set.
*
* \return Number of ranks in the group.
* \return Number of ranks in the set.
*/
int groupSize() const
int setSize() const
{
return m_my_rank <
m_first_regular_group_rank ? m_group_size + 1 : m_group_size;
m_first_regular_set_rank ? m_set_size + 1 : m_set_size;
}

/*!
* \brief Tells if the local rank is the first (lowest) in its group.
* \brief Tells if the local rank is the first (lowest) in its set.
*/
bool isFirstInGroup() const
{
return (m_rank_within_group == 0);
return (m_rank_within_set == 0);
}

/*!
* \brief Tells if the local rank is the last (highest) in its group.
* \brief Tells if the local rank is the last (highest) in its set.
*/
bool isLastInGroup() const
{
Expand All @@ -114,20 +114,22 @@ class IOBaton

DISABLE_COPY_AND_ASSIGNMENT( IOBaton );

void setupReducedRanks();

static const int s_invalid_rank_id;

MPI_Comm m_mpi_comm;

int m_comm_size; // num procs in the MPI communicator
int m_my_rank; // rank of this proc
int m_num_files; // number of files
int m_num_groups; // number of groups (ranks)
int m_num_larger_groups; // some group have one extra
int m_group_size; // regular group size (m_comm_size / m_num_files) w/o
// remainder
int m_group_id;
int m_first_regular_group_rank;
int m_rank_within_group;
int m_num_groups; // number of groups (input ranks)
int m_num_larger_sets; // some sets have one extra
int m_set_size; // regular set size (m_comm_size / m_num_files) w/o
// remainder
int m_set_id;
int m_first_regular_set_rank;
int m_rank_within_set;
int m_rank_before_me;
int m_rank_after_me;

Expand Down

0 comments on commit 5061932

Please sign in to comment.