do not merge_feature_shard when using metapath_split_opt (PaddlePaddl…

…e#198) Co-authored-by: root <root@yq01-inf-hic-k8s-a100-ab2-0009.yq01.baidu.com>
DesmonDay · Jan 11, 2023 · 74839ce · 74839ce
1 parent 0763145
commit 74839ce
Show file tree

Hide file tree

Showing 4 changed files with 21 additions and 12 deletions.
diff --git a/paddle/fluid/distributed/ps/table/common_graph_table.cc b/paddle/fluid/distributed/ps/table/common_graph_table.cc
@@ -36,6 +36,7 @@ DECLARE_bool(graph_load_in_parallel);
 DECLARE_bool(graph_get_neighbor_id);
 DECLARE_int32(gpugraph_storage_mode);
 DECLARE_uint64(gpugraph_slot_feasign_max_num);
+DECLARE_bool(graph_metapath_split_opt);
 
 namespace paddle {
 namespace distributed {
@@ -536,14 +537,18 @@ void GraphTable::release_graph_edge() {
 
 void GraphTable::release_graph_node() {
   build_graph_type_keys();
-  if (FLAGS_gpugraph_storage_mode != paddle::framework::GpuGraphStorageMode::
-                                         MEM_EMB_FEATURE_AND_GPU_GRAPH &&
-      FLAGS_gpugraph_storage_mode != paddle::framework::GpuGraphStorageMode::
-                                         SSD_EMB_AND_MEM_FEATURE_GPU_GRAPH) {
+  if (FLAGS_graph_metapath_split_opt) {
     clear_feature_shard();
   } else {
-    merge_feature_shard();
-    feature_shrink_to_fit();
+    if (FLAGS_gpugraph_storage_mode != paddle::framework::GpuGraphStorageMode::
+                                           MEM_EMB_FEATURE_AND_GPU_GRAPH &&
+        FLAGS_gpugraph_storage_mode != paddle::framework::GpuGraphStorageMode::
+                                           SSD_EMB_AND_MEM_FEATURE_GPU_GRAPH) {
+      clear_feature_shard();
+    } else {
+      merge_feature_shard();
+      feature_shrink_to_fit();
+    }
   }
 }
 #endif
@@ -1712,7 +1717,7 @@ std::pair<uint64_t, uint64_t> GraphTable::parse_edge_file(
 
     local_valid_count++;
   }
-  VLOG(2) << local_count << " edges are loaded from filepath->" << path;
+  VLOG(2) << local_valid_count << "/" << local_count << " edges are loaded from filepath->" << path;
   return {local_count, local_valid_count};
 }
 
@@ -2186,7 +2191,7 @@ int GraphTable::parse_feature(int idx,
       return 0;
     }
   } else {
-    VLOG(2) << "feature_name[" << name << "] is not in feat_id_map, ntype_id["
+    VLOG(4) << "feature_name[" << name << "] is not in feat_id_map, ntype_id["
             << idx << "] feat_id_map_size[" << feat_id_map.size() << "]";
   }
 
@@ -2504,7 +2509,7 @@ int32_t GraphTable::Initialize(const GraphParameter &graph) {
   auto graph_feature = graph.graph_feature();
   auto node_types = graph.node_types();
   auto edge_types = graph.edge_types();
-  VLOG(0) << "got " << edge_types.size() << "edge types in total";
+  VLOG(0) << "got " << edge_types.size() << " edge types in total";
   feat_id_map.resize(node_types.size());
   for (int k = 0; k < edge_types.size(); k++) {
     VLOG(0) << "in initialize: get a edge_type " << edge_types[k];

diff --git a/paddle/fluid/framework/data_feed.cu b/paddle/fluid/framework/data_feed.cu
@@ -2622,12 +2622,12 @@ int GraphDataGenerator::FillWalkBufMultiPath() {
 
   if (!sage_mode_) {
     uint64_t h_uniq_node_num = CopyUniqueNodes();
-    VLOG(0) << "sample_times:" << sample_times << ", d_walk_size:" << buf_size_
+    VLOG(1) << "sample_times:" << sample_times << ", d_walk_size:" << buf_size_
             << ", d_walk_offset:" << i << ", total_rows:" << total_row_
             << ", h_uniq_node_num:" << h_uniq_node_num
             << ", total_samples:" << total_samples;
   } else {
-    VLOG(0) << "sample_times:" << sample_times << ", d_walk_size:" << buf_size_
+    VLOG(1) << "sample_times:" << sample_times << ", d_walk_size:" << buf_size_
             << ", d_walk_offset:" << i << ", total_rows:" << total_row_
             << ", total_samples:" << total_samples;
   }

diff --git a/paddle/fluid/framework/fleet/heter_ps/graph_gpu_ps_table_inl.cu b/paddle/fluid/framework/fleet/heter_ps/graph_gpu_ps_table_inl.cu
@@ -1505,6 +1505,9 @@ NeighborSampleResultV2 GpuPsGraphTable::graph_neighbor_sample_all_edge_type(
     for (int idx = 0; idx < edge_type_len; idx++) {
       int table_offset = get_table_offset(i, GraphTableType::EDGE_TABLE, idx);
       int offset = i * graph_table_num_ + idx;
+      if (tables_[table_offset] == NULL) {
+        continue;
+      }
       tables_[table_offset]->get(
           reinterpret_cast<uint64_t*>(node.key_storage),
           reinterpret_cast<uint64_t*>(node_info_base + idx * shard_len),

diff --git a/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.cu b/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.cu
@@ -366,11 +366,13 @@ std::vector<std::string> GraphGpuWrapper::get_ntype_from_etype(
 void GraphGpuWrapper::set_up_types(std::vector<std::string> &edge_types,
                                    std::vector<std::string> &node_types) {
   id_to_edge = edge_types;
+  edge_to_id.clear();
   for (size_t table_id = 0; table_id < edge_types.size(); table_id++) {
     int res = edge_to_id.size();
     edge_to_id[edge_types[table_id]] = res;
   }
   id_to_feature = node_types;
+  node_to_id.clear();
   for (size_t table_id = 0; table_id < node_types.size(); table_id++) {
     int res = node_to_id.size();
     node_to_id[node_types[table_id]] = res;
@@ -863,7 +865,6 @@ std::string &GraphGpuWrapper::get_edge_type_size() {
       ((GpuPsGraphTable *)graph_table)->cpu_graph_table_->edge_type_size;
   std::string delim = ";";
   edge_type_size_str_ = paddle::string::join_strings(edge_type_size, delim);
-  std::cout << "edge_type_size_str: " << edge_type_size_str_ << std::endl;
   return edge_type_size_str_;
 }