diff --git a/src/libs/blueprint/conduit_blueprint_mesh_partition.cpp b/src/libs/blueprint/conduit_blueprint_mesh_partition.cpp
index 5b79100e5..f03939cce 100644
--- a/src/libs/blueprint/conduit_blueprint_mesh_partition.cpp
+++ b/src/libs/blueprint/conduit_blueprint_mesh_partition.cpp
@@ -1142,12 +1142,20 @@ selection_ranges::print(std::ostream &os) const
 
 //---------------------------------------------------------------------------
 //---------------------------------------------------------------------------
-partitioner::chunk::chunk() : mesh(nullptr), owns(false)
+partitioner::chunk::chunk() : mesh(nullptr), owns(false), destination_rank(-1),
+    destination_domain(-1)
 {
 }
 
 //---------------------------------------------------------------------------
-partitioner::chunk::chunk(const Node *m, bool own) : mesh(m), owns(own)
+partitioner::chunk::chunk(const Node *m, bool own) : mesh(m), owns(own),
+    destination_rank(-1), destination_domain(-1)
+{
+}
+
+//---------------------------------------------------------------------------
+partitioner::chunk::chunk(const Node *m, bool own, int dr, int dd) : 
+    mesh(nullptr), owns(false), destination_rank(dr), destination_domain(dd)
 {
 }
 
diff --git a/src/libs/blueprint/conduit_blueprint_mesh_partition.hpp b/src/libs/blueprint/conduit_blueprint_mesh_partition.hpp
index 7695dace0..28a69b943 100644
--- a/src/libs/blueprint/conduit_blueprint_mesh_partition.hpp
+++ b/src/libs/blueprint/conduit_blueprint_mesh_partition.hpp
@@ -183,16 +183,20 @@ class partitioner
      @brief This struct is a Conduit/Blueprint mesh plus an ownership bool.
             The mesh pointer is always assumed to be external by default
             so we do not provide a destructor. If we want to delete it,
-            call free(), which will free the mesh if we own it.
+            call free(), which will free the mesh if we own it. The struct
+            does not have a destructor on purpose.
      */
     struct chunk
     {
         chunk();
         chunk(const Node *m, bool own);
+        chunk(const Node *m, bool own, int dr, int dd);
         void free();
 
         const Node *mesh;
         bool        owns;
+        int         destination_rank;
+        int         destination_domain;
     };
 
     /**
diff --git a/src/libs/blueprint/conduit_blueprint_mpi_mesh.cpp b/src/libs/blueprint/conduit_blueprint_mpi_mesh.cpp
index f2db60101..9b72cf95f 100644
--- a/src/libs/blueprint/conduit_blueprint_mpi_mesh.cpp
+++ b/src/libs/blueprint/conduit_blueprint_mpi_mesh.cpp
@@ -19,6 +19,10 @@
 #include <mpi.h>
 using partitioner = conduit::blueprint::mesh::partitioner;
 
+//tmp
+using std::cout;
+using std::endl;
+
 //-----------------------------------------------------------------------------
 // -- begin conduit --
 //-----------------------------------------------------------------------------
@@ -209,6 +213,19 @@ class parallel_partitioner : public partitioner
     virtual void get_largest_selection(int &sel_rank, int &sel_index) const override;
 
 protected:
+    struct long_int
+    {
+        long value;
+        int  rank;
+    };
+
+    struct chunk_info
+    {
+        uint64 num_elements;
+        int destination_rank;
+        int destination_domain;
+    };
+
     virtual void map_chunks(const std::vector<partitioner::chunk> &chunks,
                             std::vector<int> &dest_ranks,
                             std::vector<int> &dest_domain,
@@ -222,7 +239,18 @@ class parallel_partitioner : public partitioner
                                     std::vector<int> &chunks_to_assemble_domains) override;
 
 private:
-    MPI_Comm comm;
+    /**
+     @brief Creates an MPI structure datatype so we can Allgatherv 3 things
+            in 1 call. We initialize the chunk_info_dt member.
+     */
+    void create_chunk_info_dt();
+    /**
+     @brief Frees the chunk_info_dt data type.
+     */
+    void free_chunk_info_dt();
+
+    MPI_Comm     comm;
+    MPI_Datatype chunk_info_dt;
 };
 
 //---------------------------------------------------------------------------
@@ -231,11 +259,13 @@ parallel_partitioner::parallel_partitioner(MPI_Comm c) : partitioner()
     comm = c;
     MPI_Comm_size(comm, &size);
     MPI_Comm_rank(comm, &rank);
+    create_chunk_info_dt();
 }
 
 //---------------------------------------------------------------------------
 parallel_partitioner::~parallel_partitioner()
 {
+    free_chunk_info_dt();
 }
 
 //---------------------------------------------------------------------------
@@ -254,49 +284,79 @@ parallel_partitioner::get_total_selections() const
 /**
  @note This method is called iteratively until we have the number of target
        selections that we want to make. We could do better by identifying
-       more selections to split in each pass.
+       more selections to split in each pass. We use MPI_LONG_INT because it
+       can be used with MPI_MAXLOC.
  */
 void
 parallel_partitioner::get_largest_selection(int &sel_rank, int &sel_index) const
 {
     // Find largest selection locally.
-    long largest_selection_size = 0;
-    int  largest_selection_index = 0;
+    long_int largest_selection;
+    largest_selection.value = 0;
+    largest_selection.rank = rank;
+    std::vector<uint64> local_sizes(selections.size());
     for(size_t i = 0; i < selections.size(); i++)
     {
-        long ssize = static_cast<long>(selections[i]->length());
-        if(ssize > largest_selection_size)
+        local_sizes[i] = static_cast<uint64>(selections[i]->length());
+        if(local_sizes[i] > static_cast<uint64>(largest_selection.value))
         {
-            largest_selection_size = ssize;
-            largest_selection_index = static_cast<int>(i);
+            largest_selection.value = static_cast<long>(local_sizes[i]);
         }
     }
 
-    // What's the largest selection across ranks?
-    long global_largest_selection_size = 0;
-    MPI_Allreduce(&largest_selection_size, 
-                  &global_largest_selection_size, 1, MPI_LONG,
-                  MPI_MAX, comm);
+    // What's the largest selection across ranks? We use MPI_MAXLOC to 
+    // get the max size and the rank where it occurs.
+    long_int global_largest_selection;
+    MPI_Allreduce(&largest_selection, &global_largest_selection,
+                  1, MPI_LONG_INT, MPI_MAXLOC, comm);
 
-    // See if this rank has the largest selection.
-    int rank_that_matches = -1, largest_rank_that_matches = -1;
-    int local_index = -1;
-    for(size_t i = 0; i < selections.size(); i++)
+    // MPI found us the rank that has the largest selection.
+    sel_rank = global_largest_selection.rank;
+    sel_index = -1;
+
+    // If we're on that rank, determine the local selection index.
+    if(sel_rank == rank)
     {
-        long ssize = static_cast<long>(selections[i]->length());
-        if(ssize == global_largest_selection_size)
+        uint64 ssize = static_cast<uint64>(global_largest_selection.value);
+        for(size_t i = 0; i < selections.size(); i++)
         {
-            rank_that_matches = rank;
-            local_index = -1;
+            if(ssize == local_sizes[i])
+            {
+                sel_index = static_cast<int>(i);
+                break;
+            }
         }
     }
-    MPI_Allreduce(&rank_that_matches,
-                  &largest_rank_that_matches, 1, MPI_INT,
-                  MPI_MAX, comm);
+}
 
-    sel_rank = largest_rank_that_matches;
-    if(sel_rank == rank)
-        sel_index = local_index;
+//-------------------------------------------------------------------------
+void
+parallel_partitioner::create_chunk_info_dt()
+{
+    chunk_info obj;
+    int slen = 3;
+    int lengths[3] = {1,1,1};
+    MPI_Datatype types[3];
+    MPI_Aint offsets[3];
+
+    types[0] = MPI_UNSIGNED_LONG_LONG;
+    types[1] = MPI_INT;
+    types[2] = MPI_INT;
+
+    size_t base = ((size_t)(&obj));
+    offsets[0] = ((size_t)(&obj.num_elements)) - base;
+    offsets[1] = ((size_t)(&obj.destination_rank)) - base;
+    offsets[2] = ((size_t)(&obj.destination_domain)) - base;
+
+    MPI_Type_create_struct(slen, lengths, offsets, types, &chunk_info_dt);
+    MPI_Type_commit(&chunk_info_dt);
+}
+
+//-------------------------------------------------------------------------
+void
+parallel_partitioner::free_chunk_info_dt()
+{
+    MPI_Type_free(&chunk_info_dt);
 }
 
 //-------------------------------------------------------------------------
@@ -314,6 +374,9 @@ Some chunks may be the result of a field-based selection that
 says explicitly where the cells will end up in a domain/rank. We can only have
 a domain going to a single rank though.
 
+@note We pass out the global dest_rank, dest_domain, offsets in this method
+      since we consume it immediately in communicate_chunks where we need the
+      global information to do matching sends/recvs.
 */
 void
 parallel_partitioner::map_chunks(const std::vector<partitioner::chunk> &chunks,
@@ -321,9 +384,8 @@ parallel_partitioner::map_chunks(const std::vector<partitioner::chunk> &chunks,
     std::vector<int> &dest_domain,
     std::vector<int> &_offsets)
 {
-#if 0
     // Gather number of chunks on each rank.
-    auto nlocal_chunks = static_cast<int>(local_chunk_sizes.size());
+    auto nlocal_chunks = static_cast<int>(chunks.size());
     std::vector<int> nglobal_chunks(size, 0);
     MPI_Allgather(&nlocal_chunks, 1, MPI_INT,
                   &nglobal_chunks[0], 1, MPI_INT,
@@ -333,46 +395,66 @@ parallel_partitioner::map_chunks(const std::vector<partitioner::chunk> &chunks,
     for(size_t i = 0; i < nglobal_chunks.size(); i++)
         ntotal_chunks += nglobal_chunks[i];
 #if 1
+MPI_Barrier(comm);
 if(rank == 0)
 {
+cout << "------------------------ map_chunks ------------------------" << endl;
 cout << "ntotal_chunks = " << ntotal_chunks << endl;
 }
+MPI_Barrier(comm);
 #endif
     // Compute offsets. We use int because of MPI_Allgatherv
     std::vector<int> offsets(size, 0);
     for(size_t i = 1; i < nglobal_chunks.size(); i++)
-        offsets = offsets[i-1] + nglobal_chunks[i-1];
+        offsets[i] = offsets[i-1] + nglobal_chunks[i-1];
 
     // What we have at this point is a list of chunk sizes for all chunks
     // across all ranks. Let's get a global list of chunk domains (where
     // they want to go). A chunk may already know where it wants to go.
     // If it doesn't then we can assign it to move around. A chunk is
     // free to move around if its destination domain is -1.
-    std::vector<int> local_chunk_dest_domain(chunks.size());
-    std::vector<int> local_chunk_dest_rank(chunks.size(), rank);
+    std::vector<chunk_info> local_chunk_info(chunks.size());
     for(size_t i = 0; i < chunks.size(); i++)
     {
-        local_chunk_dest_domain[i] = chunks[i].destination_domain();
-        local_chunk_dest_rank[i]   = chunks[i].destination_rank();
+        const conduit::Node &n_topos = chunks[i].mesh->operator[]("topologies");
+        uint64 len = 0;
+        for(index_t j = 0; j < n_topos.number_of_children(); j++)
+            len += conduit::blueprint::mesh::topology::length(n_topos[j]);
+        local_chunk_info[i].num_elements = len;
+        local_chunk_info[i].destination_rank   = chunks[i].destination_rank;
+        local_chunk_info[i].destination_domain = chunks[i].destination_domain;
     }
-    std::vector<int> global_chunk_dest_domain(ntotal_chunks, 0);
-    MPI_Allgatherv(&local_chunk_dest_domain[0],
-                   static_cast<int>(local_chunk_dest_domain.size()),
-                   MPI_INT,
-                   &global_chunk_dest_domain[0],
+    std::vector<chunk_info> global_chunk_info(ntotal_chunks);
+    MPI_Allgatherv(&local_chunk_info[0],
+                   static_cast<int>(local_chunk_info.size()),
+                   chunk_info_dt,
+                   &global_chunk_info[0],
                    &nglobal_chunks[0],
                    &offsets[0],
-                   MPI_INT,
+                   chunk_info_dt,
                    comm);
+#if 1
+    if(rank == 0)
+    {
+        for(int i = 0; i < ntotal_chunks; i++)
+        {
+            cout << "global_chunk_info[" << i << "]={"
+                 << "num_elements=" << global_chunk_info[i].num_elements
+                 << ", dest_rank=" << global_chunk_info[i].destination_rank
+                 << ", dest_domain=" << global_chunk_info[i].destination_domain
+                 << "}" << endl;
+        }
+    }
+#endif
 
     // Determine how many ranks are free to move to various domains.
     // Also determine the domain ids in use and how many chunks
     // comprise each of them.
     std::map<int,int> domain_sizes;
     int free_to_move = 0;
-    for(size_t i = 0; i < ntotal_chunks; i++)
+    for(int i = 0; i < ntotal_chunks; i++)
     {
-        int domid = global_chunk_dest_domain[i];
+        int domid = global_chunk_info[i].destination_domain;
         if(domid >= 0)
         {
             std::map<int,int>::iterator it = domain_sizes.find(domid);
@@ -385,6 +467,20 @@ cout << "ntotal_chunks = " << ntotal_chunks << endl;
             free_to_move++;
     }
 
+#if 1
+    if(rank == 0)
+    {
+        cout << "domain_sizes = {";
+        for(std::map<int,int>::const_iterator it = domain_sizes.begin();
+            it != domain_sizes.end(); it++)
+        {
+            cout << it->first << ": " << it->second << ", ";
+        }
+        cout << "}" << endl;
+        cout << "free_to_move = " << free_to_move << endl;
+    }
+#endif
+
     if(free_to_move == 0)
     {
         // No chunks are free to move around. This means we the domains
@@ -393,78 +489,50 @@ cout << "ntotal_chunks = " << ntotal_chunks << endl;
         // NOTE: This may mean that we do not get #target domains though.
         if(domain_sizes.size() != target)
         {
-            CONDUIT_WARNING("The unique number of domain ids "
+            CONDUIT_WARN("The unique number of domain ids "
                 << domain_sizes.size()
                 << " was not equal to the desired target number of domains: "
                 << target  << ".");
         }
 
-#if 1
-        // NOTE: It is easier in parallel to do the communications later on
-        //       if we pass out the global information.
-        std::vector<int> global_chunk_dest_rank(ntotal_chunks, 0);
-        MPI_Allgatherv(&local_chunk_dest_rank[0],
-                   static_cast<int>(local_chunk_dest_rank.size()),
-                   MPI_INT,
-                   &global_chunk_dest_rank[0],
-                   &nglobal_chunks[0],
-                   &offsets[0],
-                   MPI_INT,
-                   comm);
-
-        // Pass out the global information.
-        dest_rank.swap(global_chunk_dest_rank);
-        dest_domain.swap(global_chunk_dest_domain);
-        _offsets.swap(offsets);
-#else
-        // Pass out local information
-        for(size_t i = 0; i < nlocal_chunks; i++)
+        // Pass out global information
+        dest_rank.reserve(ntotal_chunks);
+        dest_domain.reserve(ntotal_chunks);
+        for(int i = 0; i < ntotal_chunks; i++)
         {
-            dest_rank.push_back(local_chunk_dest_rank[i]);
-            dest_domain.push_back(local_chunk_dest_domain[i]);
+            dest_rank.push_back(global_chunk_info[i].destination_rank);
+            dest_domain.push_back(global_chunk_info[i].destination_domain);
         }
-#endif
+        _offsets.swap(offsets);
     }
     else if(free_to_move == ntotal_chunks)
     {
         // No chunks told us where they go so ALL are free to move.
-
-        // Determine local chunk sizes (number of elements).
-        std::vector<uint64> local_chunk_sizes;
-        for(size_t i =0 ; i < chunks.size(); i++)
+#if 1
+        if(rank == 0)
         {
-            const conduit::Node &n_topos = chunks[i].mesh->operator[]("topologies");
-            uint64 len = 0;
-            for(index_t j = 0; j < n_topos.number_of_children(); j++)
-                len += conduit::blueprint::mesh::topology::length(n_topos[j]);
-            local_chunk_sizes.push_back(len);
+            cout << "** We decide where chunks go." << endl;
         }
-        // Get all chunk sizes across all ranks.
-        std::vector<uint64> global_chunk_sizes(ntotal_chunks, 0);
-        MPI_Allgatherv(&local_chunk_sizes[0],
-                       static_cast<int>(local_chunk_sizes.size()),
-                       MPI_UNSIGNED_LONG_LONG,
-                       &global_chunk_sizes[0],
-                       &nglobal_chunks[0],
-                       &offsets[0],
-                       MPI_UNSIGNED_LONG_LONG,
-                       comm);
-
+#endif
         // We must make #target domains from the chunks we have. Since
         // no chunks told us a domain id they want to be inside, we can
-        // number domains 0..target
-
-        std::vector<uint64> target_cell_counts(target, 0);
-        std::vector<uint64> next_target_cell_counts(target);
-        std::vector<int> global_dest_rank(ntotal_domains, 0);
-        std::vector<int> global_dest_domain(ntotal_domains, 0);
-        for(size_t i = 0; i < ntotal_chunks; i++)
+        // number domains 0..target. This scheme ignores the chunk's
+        // destination_rank.
+
+        std::vector<uint64> target_element_counts(target, 0);
+        std::vector<uint64> next_target_element_counts(target);
+        std::vector<int> global_dest_rank(ntotal_chunks, -1);
+        std::vector<int> global_dest_domain(ntotal_chunks, 0);
+        for(int i = 0; i < ntotal_chunks; i++)
         {
             // Add the size of this chunk to all targets.
-            for(int r = 0; r < target; r++)
-                next_target_cell_counts[r] = target_cell_counts[r] + global_chunk_sizes[i];
+            for(unsigned int r = 0; r < target; r++)
+            {
+                next_target_element_counts[r] = target_element_counts[r] +
+                                                global_chunk_info[i].num_elements;
+            }
 
-            // Find the index of the min value in next_target_cell_counts.
+            // Find the index of the min value in next_target_element_counts.
             // This way, we'll let that target domain have the chunk as
             // we are trying to balance the sizes of the output domains.
             //
@@ -472,56 +540,81 @@ cout << "ntotal_chunks = " << ntotal_chunks << endl;
             //       smallest bounding box so we keep things close together.
             //
             // NOTE: This method has the potential to move chunks far away.
+            //       It is sprinkling chunks into targets 0,1,2,... and 
+            //       and then repeating when the number of elements is ascending.
             int idx = 0;
-            for(int r = 1; r < target; r++)
+            for(unsigned int r = 1; r < target; r++)
             {
-                if(next_target_cell_counts[r] < next_target_cell_counts[idx])
+                if(next_target_element_counts[r] < next_target_element_counts[idx])
                     idx = r;
             }
 
             // Add the current chunk to the specified target domain.
-            target_cell_counts[idx] += global_chunk_sizes[i];
+            target_element_counts[idx] += global_chunk_info[i].num_elements;
             global_dest_domain[i] = idx;
+#if 1
+            if(rank == 0)
+            {
+                cout << "Add chunk " << i << " to domain " << idx << " (nelem=" << target_element_counts[idx] << ")" << endl;
+            }
+#endif
         }
 
         // We now have a global map indicating the final domain to which
         // each chunk will contribute. Spread target domains across size ranks.
         std::vector<int> rank_domain_count(size, 0);
+        int divsize = std::min(size, static_cast<int>(target));
         for(int i = 0; i < target; i++)
-            rank_domain_count[i % size]++;
+            rank_domain_count[i % divsize]++;
+
         // Figure out which source chunks join which ranks.
         int target_id = 0;
         for(int r = 0; r < size; r++)
         {
+if(rank == 0) cout << "r=" << r << ", rank_domain_count[r]=" << rank_domain_count[r] << endl;
             if(rank_domain_count[r] == 0)
                 break;
             // For each domain on this rank r.
             for(int j = 0; j < rank_domain_count[r]; j++)
             {
-                for(size_t i = 0; i < ntotal_domains; i++)
+                for(int i = 0; i < ntotal_chunks; i++)
                 {
                     if(global_dest_domain[i] == target_id)
+                    {
+if(rank == 0)
+cout << "global domain " << target_id << " goes to " << r << endl;
                         global_dest_rank[i] = r;
+                    }
                 }
                 target_id++;
             }
         }
-
 #if 1
-        // Pass out the global information.
-        dest_rank.swap(global_chunk_dest_rank);
-        dest_domain.swap(global_chunk_dest_domain);
-        _offsets.swap(offsets);
-#else
-        // Now that we know where all chunks go, copy out the information
-        // that this rank will need.
-        for(size_t i = 0; i < chunks.size(); i++)
+        if(rank == 0)
         {
-            size_t srcindex = offsets[rank] + i;
-            dest_ranks.push_back(global_dest_rank[srcindex]);
-            dest_domain.push_back(global_dest_domain[srcindex]);
+            cout << "target=" << target << endl;
+            cout << "target_element_counts = {";
+            for(size_t i = 0; i < target_element_counts.size(); i++)
+                cout << target_element_counts[i] << ", ";
+            cout << "}" << endl;
+            cout << "global_dest_rank = {";
+            for(size_t i = 0; i < global_dest_rank.size(); i++)
+                cout << global_dest_rank[i] << ", ";
+            cout << "}" << endl;
+            cout << "global_dest_domain = {";
+            for(size_t i = 0; i < global_dest_domain.size(); i++)
+                cout << global_dest_domain[i] << ", ";
+            cout << "}" << endl;
+            cout << "rank_domain_count = {";
+            for(size_t i = 0; i < rank_domain_count.size(); i++)
+                cout << rank_domain_count[i] << ", ";
+            cout << "}" << endl;
         }
 #endif
+        // Pass out the global information.
+        dest_rank.swap(global_dest_rank);
+        dest_domain.swap(global_dest_domain);
+        _offsets.swap(offsets);
     }
     else
     {
@@ -531,27 +624,20 @@ cout << "ntotal_chunks = " << ntotal_chunks << endl;
         CONDUIT_ERROR("Invalid mixture of destination rank/domain specifications.");
     }
 #if 1
-    MPI_Barrier(comm);
-    // Wait for previous rank to print.
-    int tmp = 0;
-    MPI_Status s;
-    if(rank > 0)
-        MPI_Recv(&tmp, 1, MPI_INT, rank-1, 9999, comm, &s);
-
-    cout << rank << ": dest_ranks={";
-    for(size_t i = 0; i < dest_ranks.size(); i++)
-        cout << dest_ranks[i] << ", ";
-    cout << "}" << endl;
-    cout << rank << "dest_domain={";
-    for(size_t i = 0; i < dest_domain.size(); i++)
-        cout << dest_domain[i] << ", ";
-    cout << "}" << endl;
-    cout.flush();
-
-    // Pass baton to next rank
-    if(rank < size-1)
-        MPI_Send(&rank, 1, MPI_INT, rank+1, 9999, comm);
-#endif
+    // We're passing out global info now so all ranks should be the same.
+
+    if(rank == 0)
+    {
+        std::cout << rank << ": dest_ranks={";
+        for(size_t i = 0; i < dest_rank.size(); i++)
+            std::cout << dest_rank[i] << ", ";
+        std::cout << "}" << std::endl;
+        std::cout << rank << ": dest_domain={";
+        for(size_t i = 0; i < dest_domain.size(); i++)
+            std::cout << dest_domain[i] << ", ";
+        std::cout << "}" << std::endl;
+        std::cout.flush();
+    }
 #endif
 }
 
@@ -570,6 +656,79 @@ parallel_partitioner::communicate_chunks(const std::vector<partitioner::chunk> &
     std::vector<partitioner::chunk> &chunks_to_assemble,
     std::vector<int> &chunks_to_assemble_domains)
 {
+    const int PARTITION_TAG_BASE = 12000;
+
+    // Use the offsets to determine the sender for each chunk.
+    std::vector<int> src_rank(dest_rank.size(),size-1);
+    size_t idx = 0;
+    for(size_t r = 1; r < offsets.size(); r++)
+    {
+        int n = offsets[r] - offsets[r-1];
+        for(int i = 0; i < n; i++)
+            src_rank[idx++] = r-1;
+    }
+
+#if 1
+    MPI_Barrier(comm);
+    if(rank == 0)
+    {
+        cout << "offsets = {";
+        for(size_t i = 0; i < offsets.size(); i++)
+            cout << offsets[i] << ", ";
+        cout << "}" << endl;
+        cout << "src_rank = {";
+        for(size_t i = 0; i < src_rank.size(); i++)
+            cout << src_rank[i] << ", ";
+        cout << "}" << endl;
+    }
+    MPI_Barrier(comm);
+#endif
+
+    // Do sends for the chunks we own on this processor that must migrate.
+    for(size_t i = 0; i < chunks.size(); i++)
+    {
+        int gidx = offsets[rank] + i;
+        int tag = PARTITION_TAG_BASE + gidx;
+        int dest = dest_rank[gidx];
+        // If we're not sending to self, send the chunk.
+        if(dest != rank)
+        {
+cout << rank << ": send_using_schema(dest=" << dest << ", tag=" << tag << ")" << endl;
+            conduit::relay::mpi::send_using_schema(*chunks[i].mesh, dest, tag, comm);
+        }
+    }
+
+    // Do recvs.
+    for(size_t i = 0; i < dest_rank.size(); i++)
+    {
+        if(dest_rank[i] == rank)
+        {
+            int gidx = i;
+            int tag = PARTITION_TAG_BASE + gidx;
+            int start = offsets[rank];
+            int end = start + chunks.size();
+            bool this_rank_already_owns_it = (gidx >= start && gidx < end);
+            if(this_rank_already_owns_it)
+            {
+                int local_i = i - offsets[rank];
+                // Pass the chunk through since we already own it on this rank.
+                chunks_to_assemble.push_back(chunk(chunks[local_i].mesh, false));
+                chunks_to_assemble_domains.push_back(dest_domain[i]);
+            }
+            else
+            {
+cout << rank << ": recv_using_schema(src=" << src_rank[i] << ", tag=" << tag << ")" << endl;
+
+                conduit::Node *n_recv = new conduit::Node;              
+                conduit::relay::mpi::recv_using_schema(*n_recv, src_rank[i], tag, comm);
+                // Save the received chunk and indicate we own it for later.
+                chunks_to_assemble.push_back(chunk(n_recv, true));
+                chunks_to_assemble_domains.push_back(dest_domain[i]);
+            }
+        }
+    }
+
+
 #if 0
     const int PARTITION_TAG_BASE = 12345;
 
diff --git a/src/tests/blueprint/CMakeLists.txt b/src/tests/blueprint/CMakeLists.txt
index 08634cd2a..8f70df23d 100644
--- a/src/tests/blueprint/CMakeLists.txt
+++ b/src/tests/blueprint/CMakeLists.txt
@@ -55,15 +55,18 @@ else()
     message(STATUS "Fortran disabled: Skipping conduit blueprint fortran interface tests")
 endif()
 
-set(BLUEPRINT_MPI_TESTS 
+set(BLUEPRINT_MPI_TESTS_RANKS_2 
     t_blueprint_mpi_smoke
     t_blueprint_mpi_mesh_verify)
 
 set(BLUEPRINT_RELAY_MPI_TESTS t_blueprint_mpi_mesh_relay)
 
+set(BLUEPRINT_RELAY_MPI_TESTS_RANKS_4 
+    t_blueprint_mpi_mesh_partition)
+
 if(MPI_FOUND)
     message(STATUS "MPI enabled: Adding conduit_blueprint_mpi  and conduit_relay_mpi unit tests")
-    foreach(TEST ${BLUEPRINT_MPI_TESTS})
+    foreach(TEST ${BLUEPRINT_MPI_TESTS_RANKS_2})
         add_cpp_mpi_test(TEST ${TEST} NUM_MPI_TASKS 2 DEPENDS_ON conduit
                                                                  conduit_blueprint_mpi)
     endforeach()
@@ -77,6 +80,15 @@ if(MPI_FOUND)
                                                                  conduit_relay_mpi
                                                                  conduit_relay_mpi_io)
     endforeach()
+
+    foreach(TEST ${BLUEPRINT_RELAY_MPI_TESTS_RANKS_4})
+        add_cpp_mpi_test(TEST ${TEST} NUM_MPI_TASKS 4 DEPENDS_ON conduit
+                                                                 conduit_blueprint
+                                                                 conduit_blueprint_mpi
+                                                                 conduit_relay
+                                                                 conduit_relay_mpi
+                                                                 conduit_relay_mpi_io)
+    endforeach()
 else()
     message(STATUS "MPI disabled: Skipping conduit_blueprint_mpi tests")
 endif()
diff --git a/src/tests/blueprint/t_blueprint_mesh_partition.cpp b/src/tests/blueprint/t_blueprint_mesh_partition.cpp
index a99093bb9..65876fb9e 100644
--- a/src/tests/blueprint/t_blueprint_mesh_partition.cpp
+++ b/src/tests/blueprint/t_blueprint_mesh_partition.cpp
@@ -28,20 +28,6 @@ using std::endl;
 
 // #define USE_ERROR_HANDLER
 
-//-----------------------------------------------------------------------------
-#ifdef GENERATE_BASELINES
-  #ifdef _WIN32
-    #include <direct.h>
-    void create_path(const std::string &path) { _mkdir(path.c_str()); }
-  #else
-    #include <sys/stat.h>
-    #include <sys/types.h>
-    void create_path(const std::string &path) { mkdir(path.c_str(), S_IRWXU); }
-  #endif
-#else
-  void create_path(const std::string &) {}
-#endif
-
 //-----------------------------------------------------------------------------
 #ifdef _WIN32
 const std::string sep("\\");
@@ -62,139 +48,8 @@ baseline_dir()
 }
 
 //-----------------------------------------------------------------------------
-std::string
-baseline_file(const std::string &basename)
-{
-    std::string path(baseline_dir());
-    create_path(path);
-    path += (sep + std::string("t_blueprint_mesh_partition"));
-    create_path(path);
-    path += (sep + basename + ".yaml");
-    return path;
-}
-
-//-----------------------------------------------------------------------------
-void
-make_baseline(const std::string &filename, const conduit::Node &n)
-{
-    conduit::relay::io::save(n, filename, "yaml");
-}
-
-//-----------------------------------------------------------------------------
-void
-load_baseline(const std::string &filename, conduit::Node &n)
-{
-    conduit::relay::io::load(filename, "yaml", n);
-}
-
-//-----------------------------------------------------------------------------
-bool
-compare_baseline(const std::string &filename, const conduit::Node &n)
-{
-    const double tolerance = 1.e-6;
-    conduit::Node baseline, info;
-    conduit::relay::io::load(filename, "yaml", baseline);
-    const char *line = "*************************************************************";
-#if 0
-    cout << line << endl;
-    baseline.print();
-    cout << line << endl;
-    n.print();
-    cout << line << endl;
-#endif
-
-    // Node::diff returns true if the nodes are different. We want not different.
-    bool equal = !baseline.diff(n, info, tolerance, true);
-
-    if(!equal)
-    {
-       cout << "Difference!" << endl;
-       cout << line << endl;
-       info.print();
-    }
-    return equal;
-}
-
-//-----------------------------------------------------------------------------
-bool
-check_if_hdf5_enabled()
-{
-    conduit::Node io_protos;
-    conduit::relay::io::about(io_protos["io"]);
-    return io_protos["io/protocols/hdf5"].as_string() == "enabled";
-}
-
-//-----------------------------------------------------------------------------
-void
-save_node(const std::string &filename, const conduit::Node &mesh)
-{
-    conduit::relay::io::blueprint::save_mesh(mesh, filename + ".yaml", "yaml");
-}
-
-//-----------------------------------------------------------------------------
-void
-save_visit(const std::string &filename, const conduit::Node &n)
-{
-    // NOTE: My VisIt only wants to read HDF5 root files for some reason.
-    bool hdf5_enabled = check_if_hdf5_enabled();
-
-    auto pos = filename.rfind("/");
-    std::string fn(filename.substr(pos+1,filename.size()-pos-1));
-    pos = fn.rfind(".");
-    std::string fn_noext(fn.substr(0, pos));
-
-
-    // Save all the domains to individual files.
-    auto ndoms = conduit::blueprint::mesh::number_of_domains(n);
-    if(ndoms < 1)
-        return;
-    char dnum[20];
-    if(ndoms == 1)
-    {
-        sprintf(dnum, "%05d", 0);
-        std::stringstream ss;
-        ss << fn_noext << "." << dnum;
-
-        if(hdf5_enabled)
-            conduit::relay::io::save(n, ss.str() + ".hdf5", "hdf5");
-        // VisIt won't read it:
-        conduit::relay::io::save(n, ss.str() + ".yaml", "yaml");
-    }
-    else
-    {
-        for(size_t i = 0; i < ndoms; i++)
-        {
-            sprintf(dnum, "%05d", static_cast<int>(i));
-            std::stringstream ss;
-            ss << fn_noext << "." << dnum;
-
-            if(hdf5_enabled)
-                conduit::relay::io::save(n[i], ss.str() + ".hdf5", "hdf5");
-            // VisIt won't read it:
-            conduit::relay::io::save(n[i], ss.str() + ".yaml", "yaml");
-        }
-    }
-
-    // Add index stuff to it so we can plot it in VisIt.
-    conduit::Node root;
-    if(ndoms == 1)
-        conduit::blueprint::mesh::generate_index(n, "", ndoms, root["blueprint_index/mesh"]);
-    else
-        conduit::blueprint::mesh::generate_index(n[0], "", ndoms, root["blueprint_index/mesh"]);
-    root["protocol/name"] = "hdf5";
-    root["protocol/version"] = CONDUIT_VERSION;
-    root["number_of_files"] = ndoms;
-    root["number_of_trees"] = ndoms;
-    root["file_pattern"] = (fn_noext + ".%05d.hdf5");
-    root["tree_pattern"] = "/";
-
-    if(hdf5_enabled)
-        conduit::relay::io::save(root, fn_noext + "_hdf5.root", "hdf5");
-
-    root["file_pattern"] = (fn_noext + ".%05d.yaml");
-    // VisIt won't read it:
-    conduit::relay::io::save(root, fn_noext + "_yaml.root", "yaml");
-}
+// Include some helper function definitions
+#include "t_blueprint_partition_helpers.hpp"
 
 //-----------------------------------------------------------------------------
 void
diff --git a/src/tests/blueprint/t_blueprint_mpi_mesh_partition.cpp b/src/tests/blueprint/t_blueprint_mpi_mesh_partition.cpp
new file mode 100644
index 000000000..25601051a
--- /dev/null
+++ b/src/tests/blueprint/t_blueprint_mpi_mesh_partition.cpp
@@ -0,0 +1,164 @@
+// Copyright (c) Lawrence Livermore National Security, LLC and other Conduit
+// Project developers. See top-level LICENSE AND COPYRIGHT files for dates and
+// other details. No copyright assignment is required to contribute to Conduit.
+
+//-----------------------------------------------------------------------------
+///
+/// file: t_blueprint_mesh_mpi_partition.cpp
+///
+//-----------------------------------------------------------------------------
+
+#include "conduit.hpp"
+#include "conduit_relay.hpp"
+#include "conduit_blueprint.hpp"
+#include "conduit_blueprint_mesh_utils.hpp"
+#include "conduit_blueprint_mpi_mesh.hpp"
+#include "conduit_relay.hpp"
+#include "conduit_log.hpp"
+
+#include <math.h>
+#include <iostream>
+#include "gtest/gtest.h"
+
+#include <mpi.h>
+
+using std::cout;
+using std::endl;
+
+// Enable this macro to generate baselines.
+#define GENERATE_BASELINES
+
+//-----------------------------------------------------------------------------
+#ifdef _WIN32
+const std::string sep("\\");
+#else
+const std::string sep("/");
+#endif
+
+//-----------------------------------------------------------------------------
+std::string
+baseline_dir()
+{
+    std::string path(__FILE__);
+    auto idx = path.rfind(sep);
+    if(idx != std::string::npos)
+        path = path.substr(0, idx);
+    path = path + sep + std::string("baselines");
+    return path;
+}
+
+//-----------------------------------------------------------------------------
+// Include some helper function definitions
+#include "t_blueprint_partition_helpers.hpp"
+
+//-----------------------------------------------------------------------------
+void
+make_offsets(const int ndomains[4], int offsets[4])
+{
+    offsets[0] = 0;
+    for(int i = 1; i < 4; i++)
+        offsets[i] = offsets[i-1] + ndomains[i-1];
+}
+
+//-----------------------------------------------------------------------------
+void
+distribute_domains(int rank, const int ndomains[4],
+    const conduit::Node &src_domains, conduit::Node &domains)
+{
+    int offsets[4];
+    make_offsets(ndomains, offsets);
+
+    // Limit to just the domains for this rank.
+    domains.reset();
+    for(size_t i = 0; i < ndomains[rank]; i++)
+    {
+        conduit::Node &dom = domains.append();
+        dom.set_external(src_domains[offsets[rank] + i]);
+    }
+}
+
+//-----------------------------------------------------------------------------
+std::string
+rank_str(int rank)
+{
+    char tmp[20];
+    sprintf(tmp, "%02d", rank);
+    return std::string(tmp);
+}
+
+//-----------------------------------------------------------------------------
+TEST(blueprint_mesh_mpi_partition, all_ranks_have_data)
+{
+    const std::string base("all_ranks_have_data");
+
+    int rank, size;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+
+    // Make some domains that we'll distribute in different ways.
+    conduit::Node spiral;
+    conduit::blueprint::mesh::examples::spiral(7, spiral);
+#if 1
+    if(rank == 0)
+        save_visit("spiral", spiral);
+#endif
+    conduit::Node input, output, options;
+    int ndomains[] = {3,2,1,1};
+    distribute_domains(rank, ndomains, spiral, input);
+    const char *opt1 =
+"target: 3";
+    options.reset(); options.parse(opt1, "yaml");
+    conduit::blueprint::mpi::mesh::partition(input, options, output, MPI_COMM_WORLD);
+//    EXPECT_EQ(conduit::blueprint::mesh::number_of_domains(output), rank==0 ? 1 : 0);
+    if(conduit::blueprint::mesh::number_of_domains(output) > 0)
+    {
+        std::string b00 = baseline_file((base + "_00_") + rank_str(rank));
+        save_visit(b00, output);
+#ifdef GENERATE_BASELINES
+        make_baseline(b00, output);
+#else
+        EXPECT_EQ(compare_baseline(b00, output), true);
+#endif
+    }
+
+    // To do target 2, we may have to store data from 2 ranks.
+}
+
+#if 0
+//-----------------------------------------------------------------------------
+TEST(blueprint_mesh_mpi_partition, all_ranks_have_data_selections)
+{
+}
+
+//-----------------------------------------------------------------------------
+TEST(blueprint_mesh_mpi_partition, some_ranks_have_data)
+{
+}
+
+//-----------------------------------------------------------------------------
+TEST(blueprint_mesh_mpi_partition, some_ranks_have_data_selectons)
+{
+}
+#endif
+
+//-----------------------------------------------------------------------------
+int main(int argc, char* argv[])
+{
+    int result = 0;
+
+    ::testing::InitGoogleTest(&argc, argv);
+    MPI_Init(&argc, &argv);
+
+    int size;
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    if(size == 4)
+        result = RUN_ALL_TESTS();
+    else
+    {
+        cout << "This program requires 4 ranks." << endl;
+        result = -1;
+    }
+    MPI_Finalize();
+
+    return result;
+}
diff --git a/src/tests/blueprint/t_blueprint_partition_helpers.hpp b/src/tests/blueprint/t_blueprint_partition_helpers.hpp
new file mode 100644
index 000000000..70bb2a61f
--- /dev/null
+++ b/src/tests/blueprint/t_blueprint_partition_helpers.hpp
@@ -0,0 +1,156 @@
+// Copyright (c) Lawrence Livermore National Security, LLC and other Conduit
+// Project developers. See top-level LICENSE AND COPYRIGHT files for dates and
+// other details. No copyright assignment is required to contribute to Conduit.
+
+//-----------------------------------------------------------------------------
+///
+/// file: t_blueprint_partition_helpers.hpp
+///
+//-----------------------------------------------------------------------------
+
+#ifndef T_BLUEPRINT_PARTITION_HELPERS_HPP
+#define T_BLUEPRINT_PARTITION_HELPERS_HPP
+
+//-----------------------------------------------------------------------------
+#ifdef GENERATE_BASELINES
+  #ifdef _WIN32
+    #include <direct.h>
+    void create_path(const std::string &path) { _mkdir(path.c_str()); }
+  #else
+    #include <sys/stat.h>
+    #include <sys/types.h>
+    void create_path(const std::string &path) { mkdir(path.c_str(), S_IRWXU); }
+  #endif
+#else
+  void create_path(const std::string &) {}
+#endif
+
+//-----------------------------------------------------------------------------
+std::string
+baseline_file(const std::string &basename)
+{
+    std::string path(baseline_dir());
+    create_path(path);
+    path += (sep + std::string("t_blueprint_mesh_partition"));
+    create_path(path);
+    path += (sep + basename + ".yaml");
+    return path;
+}
+
+//-----------------------------------------------------------------------------
+void
+make_baseline(const std::string &filename, const conduit::Node &n)
+{
+    conduit::relay::io::save(n, filename, "yaml");
+}
+
+//-----------------------------------------------------------------------------
+void
+load_baseline(const std::string &filename, conduit::Node &n)
+{
+    conduit::relay::io::load(filename, "yaml", n);
+}
+
+//-----------------------------------------------------------------------------
+bool
+compare_baseline(const std::string &filename, const conduit::Node &n)
+{
+    const double tolerance = 1.e-6;
+    conduit::Node baseline, info;
+    conduit::relay::io::load(filename, "yaml", baseline);
+
+    // Node::diff returns true if the nodes are different. We want not different.
+    bool equal = !baseline.diff(n, info, tolerance, true);
+
+    if(!equal)
+    {
+       const char *line = "*************************************************************";
+       cout << "Difference!" << endl;
+       cout << line << endl;
+       info.print();
+    }
+    return equal;
+}
+
+//-----------------------------------------------------------------------------
+bool
+check_if_hdf5_enabled()
+{
+    conduit::Node io_protos;
+    conduit::relay::io::about(io_protos["io"]);
+    return io_protos["io/protocols/hdf5"].as_string() == "enabled";
+}
+
+//-----------------------------------------------------------------------------
+void
+save_node(const std::string &filename, const conduit::Node &mesh)
+{
+    conduit::relay::io::blueprint::save_mesh(mesh, filename + ".yaml", "yaml");
+}
+
+//-----------------------------------------------------------------------------
+void
+save_visit(const std::string &filename, const conduit::Node &n)
+{
+    // NOTE: My VisIt only wants to read HDF5 root files for some reason.
+    bool hdf5_enabled = check_if_hdf5_enabled();
+
+    auto pos = filename.rfind("/");
+    std::string fn(filename.substr(pos+1,filename.size()-pos-1));
+    pos = fn.rfind(".");
+    std::string fn_noext(fn.substr(0, pos));
+
+
+    // Save all the domains to individual files.
+    auto ndoms = conduit::blueprint::mesh::number_of_domains(n);
+    if(ndoms < 1)
+        return;
+    char dnum[20];
+    if(ndoms == 1)
+    {
+        sprintf(dnum, "%05d", 0);
+        std::stringstream ss;
+        ss << fn_noext << "." << dnum;
+
+        if(hdf5_enabled)
+            conduit::relay::io::save(n, ss.str() + ".hdf5", "hdf5");
+        // VisIt won't read it:
+        conduit::relay::io::save(n, ss.str() + ".yaml", "yaml");
+    }
+    else
+    {
+        for(size_t i = 0; i < ndoms; i++)
+        {
+            sprintf(dnum, "%05d", static_cast<int>(i));
+            std::stringstream ss;
+            ss << fn_noext << "." << dnum;
+
+            if(hdf5_enabled)
+                conduit::relay::io::save(n[i], ss.str() + ".hdf5", "hdf5");
+            // VisIt won't read it:
+            conduit::relay::io::save(n[i], ss.str() + ".yaml", "yaml");
+        }
+    }
+
+    // Add index stuff to it so we can plot it in VisIt.
+    conduit::Node root;
+    if(ndoms == 1)
+        conduit::blueprint::mesh::generate_index(n, "", ndoms, root["blueprint_index/mesh"]);
+    else
+        conduit::blueprint::mesh::generate_index(n[0], "", ndoms, root["blueprint_index/mesh"]);
+    root["protocol/name"] = "hdf5";
+    root["protocol/version"] = CONDUIT_VERSION;
+    root["number_of_files"] = ndoms;
+    root["number_of_trees"] = ndoms;
+    root["file_pattern"] = (fn_noext + ".%05d.hdf5");
+    root["tree_pattern"] = "/";
+
+    if(hdf5_enabled)
+        conduit::relay::io::save(root, fn_noext + "_hdf5.root", "hdf5");
+
+    root["file_pattern"] = (fn_noext + ".%05d.yaml");
+    // VisIt won't read it:
+    conduit::relay::io::save(root, fn_noext + "_yaml.root", "yaml");
+}
+
+#endif