diff --git a/CMakeLists.txt b/CMakeLists.txt index e7d16ecfd7002..9002cb287e855 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,6 +20,9 @@ else(APPLE AND WITH_ARM) cmake_minimum_required(VERSION 3.15) cmake_policy(VERSION 3.10) endif(APPLE AND WITH_ARM) +# use to get_property location of static lib +# https://cmake.org/cmake/help/v3.0/policy/CMP0026.html?highlight=cmp0026 +cmake_policy(SET CMP0026 OLD) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index ba59eae392c66..35170b5198dc3 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -176,6 +176,36 @@ function(create_static_lib TARGET_NAME) endif() endfunction() +function(create_dummy_static_lib TARGET_NAME) + set(options "") + set(oneValueArgs "") + set(multiValueArgs LIBS DEPS LIMIT) + cmake_parse_arguments(merge "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + list(REMOVE_DUPLICATES merge_LIBS) + set(index 1) + set(offset 1) + # the dummy target would be consisted of limit size libraries + set(limit ${merge_LIMIT}) + list(LENGTH merge_LIBS libs_len) + foreach(lib ${merge_LIBS}) + list(APPEND merge_list ${lib}) + list(LENGTH merge_list listlen) + if ((${listlen} GREATER ${limit}) OR (${offset} EQUAL ${libs_len})) + message("Merge and generate static library: ${TARGET_NAME}_static_${index}") + merge_static_libs(${TARGET_NAME}_static_${index} ${merge_list}) + if(merge_DEPS) + target_link_libraries(${TARGET_NAME}_static_${index} ${merge_DEPS}) + endif() + set(merge_list) + list(APPEND ${TARGET_NAME}_list ${TARGET_NAME}_static_${index}) + MATH(EXPR index "${index}+1") + endif() + MATH(EXPR offset "${offset}+1") + endforeach() + cc_library(${TARGET_NAME} DEPS ${${TARGET_NAME}_list}) +endfunction() + function(merge_static_libs TARGET_NAME) set(libs ${ARGN}) list(REMOVE_DUPLICATES libs) @@ -193,92 +223,61 @@ function(merge_static_libs TARGET_NAME) # also help to track dependencies. set(target_SRCS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c) - if(APPLE) # Use OSX's libtool to merge archives - # Make the generated dummy source file depended on all static input - # libs. If input lib changes,the source file is touched - # which causes the desired effect (relink). - add_custom_command(OUTPUT ${target_SRCS} - COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS} - DEPENDS ${libs}) - - # Generate dummy static lib - generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs") - - target_link_libraries(${TARGET_NAME} ${libs_deps}) + # Make the generated dummy source file depended on all static input + # libs. If input lib changes,the source file is touched + # which causes the desired effect (relink). + add_custom_command(OUTPUT ${target_SRCS} + COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS} + DEPENDS ${libs}) + + # Generate dummy staic lib + generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs") + target_link_libraries(${TARGET_NAME} ${libs_deps}) + # OSX: use 'libtool' to merge archives + if(APPLE) foreach(lib ${libs}) # Get the file names of the libraries to be merged set(libfiles ${libfiles} $) endforeach() add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + COMMENT "Merge and generate static lib: lib${TARGET_NAME}.a" COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles} ) - endif(APPLE) - if(LINUX) # general UNIX: use "ar" to extract objects and re-add to a common lib - set(target_DIR ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.dir) - - foreach(lib ${libs}) - set(objlistfile ${target_DIR}/${lib}.objlist) # list of objects in the input library - set(objdir ${target_DIR}/${lib}.objdir) - - add_custom_command(OUTPUT ${objdir} - COMMAND ${CMAKE_COMMAND} -E make_directory ${objdir} - DEPENDS ${lib}) + endif() - add_custom_command(OUTPUT ${objlistfile} - COMMAND ${CMAKE_AR} -x "$" - COMMAND ${CMAKE_AR} -t "$" > ${objlistfile} - DEPENDS ${lib} ${objdir} - WORKING_DIRECTORY ${objdir}) + # LINUX: use "ar" to extract objects and re-add to a common lib + if(LINUX) + set(mri_file ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.mri CACHE INTERNAL "phi_static.mri file") + get_property(ABS_MERGE_LIB_PATH TARGET ${TARGET_NAME} PROPERTY LOCATION) + file(WRITE ${mri_file} "create ${ABS_MERGE_LIB_PATH}\n") - list(APPEND target_OBJS "${objlistfile}") + foreach(lib ${libs}) + get_property(ABS_LIB_PATH TARGET ${lib} PROPERTY LOCATION) + file(APPEND ${mri_file} "addlib ${ABS_LIB_PATH}\n") endforeach() - - # Make the generated dummy source file depended on all static input - # libs. If input lib changes,the source file is touched - # which causes the desired effect (relink). - add_custom_command(OUTPUT ${target_SRCS} - COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS} - DEPENDS ${libs} ${target_OBJS}) - - # Generate dummy staic lib - generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs") - - target_link_libraries(${TARGET_NAME} ${libs_deps}) - - # Get the file name of the generated library - set(target_LIBNAME "$") + file(APPEND ${mri_file} "save\nend\n") add_custom_command(TARGET ${TARGET_NAME} POST_BUILD - COMMAND ${CMAKE_AR} crs ${target_LIBNAME} `find ${target_DIR} -name '*.o'` - COMMAND ${CMAKE_RANLIB} ${target_LIBNAME} - WORKING_DIRECTORY ${target_DIR}) - endif(LINUX) - if(WIN32) # windows do not support gcc/nvcc combined compiling. Use msvc lib.exe to merge libs. - # Make the generated dummy source file depended on all static input - # libs. If input lib changes,the source file is touched - # which causes the desired effect (relink). - add_custom_command(OUTPUT ${target_SRCS} - COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS} - DEPENDS ${libs}) - # Generate dummy staic lib - generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs") - - target_link_libraries(${TARGET_NAME} ${libs_deps}) + COMMENT "Merge and generate static lib: lib${TARGET_NAME}.a" + COMMAND ${CMAKE_AR} -M < ${mri_file} + COMMAND ${CMAKE_RANLIB} "$") + endif() + # Windows do not support gcc/nvcc combined compiling. Use msvc 'lib.exe' to merge libs. + if(WIN32) foreach(lib ${libs}) - # Get the file names of the libraries to be merged set(libfiles ${libfiles} $) endforeach() - # msvc will put libarary in directory of "/Release/xxxlib" by default - # COMMAND cmake -E remove "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/${TARGET_NAME}.lib" + # msvc compiler will put libarary in directory of "/Release/xxxlib" by default add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + COMMENT "Merge and generate static lib: lib${TARGET_NAME}.lib" COMMAND cmake -E make_directory $ COMMAND lib /OUT:$ ${libfiles} ) - endif(WIN32) -endfunction(merge_static_libs) + endif() +endfunction() function(check_coverage_opt TARGET_NAME SRCS) if(WITH_COVERAGE AND WITH_INCREMENTAL_COVERAGE) @@ -1076,4 +1075,3 @@ function(math_library TARGET) cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${math_library_DEPS} ${math_common_deps}) endif() endfunction() - diff --git a/paddle/fluid/distributed/collective/CMakeLists.txt b/paddle/fluid/distributed/collective/CMakeLists.txt index 6d736d5543ce4..f6b1bd47c1e46 100644 --- a/paddle/fluid/distributed/collective/CMakeLists.txt +++ b/paddle/fluid/distributed/collective/CMakeLists.txt @@ -1,20 +1,20 @@ -cc_library(processgroup SRCS ProcessGroup.cc DEPS phi phi_api eager_api) -cc_library(eager_reducer SRCS reducer.cc DEPS eager_api processgroup phi phi_api string_helper) +cc_library(processgroup SRCS ProcessGroup.cc DEPS phi_api eager_api) +cc_library(eager_reducer SRCS reducer.cc DEPS eager_api processgroup phi_api string_helper) if (WITH_DISTRIBUTE) - cc_library(processgroup_gloo SRCS ProcessGroupGloo.cc DEPS phi phi_api eager_api gloo_wrapper) + cc_library(processgroup_gloo SRCS ProcessGroupGloo.cc DEPS phi_api eager_api gloo_wrapper) endif() if(WITH_NCCL) - cc_library(processgroup_nccl SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api) + cc_library(processgroup_nccl SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi_api eager_api) if (WITH_DISTRIBUTE AND WITH_PSCORE) - cc_library(processgroup_heter SRCS ProcessGroupHeter.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api) + cc_library(processgroup_heter SRCS ProcessGroupHeter.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi_api eager_api) endif() endif() if(WITH_ASCEND_CL) - cc_library(processgroup_hccl SRCS ProcessGroupHCCL.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi phi_api eager_api) + cc_library(processgroup_hccl SRCS ProcessGroupHCCL.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi_api eager_api) if (WITH_DISTRIBUTE AND WITH_PSCORE) - cc_library(processgroup_heter SRCS ProcessGroupHeter.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi phi_api eager_api) + cc_library(processgroup_heter SRCS ProcessGroupHeter.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi_api eager_api) endif() endif() diff --git a/paddle/fluid/eager/accumulation/CMakeLists.txt b/paddle/fluid/eager/accumulation/CMakeLists.txt index 43ca707f4f6fb..0531aa5aab373 100644 --- a/paddle/fluid/eager/accumulation/CMakeLists.txt +++ b/paddle/fluid/eager/accumulation/CMakeLists.txt @@ -1 +1 @@ -cc_library(accumulation_node SRCS accumulation_node.cc DEPS gradient_accumulator phi phi_api grad_node_info) +cc_library(accumulation_node SRCS accumulation_node.cc DEPS gradient_accumulator phi_api grad_node_info) diff --git a/paddle/fluid/eager/api/utils/CMakeLists.txt b/paddle/fluid/eager/api/utils/CMakeLists.txt index c34df3972c23e..a2a380ebad6c5 100644 --- a/paddle/fluid/eager/api/utils/CMakeLists.txt +++ b/paddle/fluid/eager/api/utils/CMakeLists.txt @@ -1,3 +1,3 @@ -cc_library(tensor_utils SRCS tensor_utils.cc DEPS phi phi_api autograd_meta grad_node_info accumulation_node) +cc_library(tensor_utils SRCS tensor_utils.cc DEPS phi_api autograd_meta grad_node_info accumulation_node) cc_library(hook_utils SRCS hook_utils.cc DEPS phi tensor_utils autograd_meta grad_node_info utils accumulation_node) cc_library(global_utils SRCS global_utils.cc DEPS place tracer) diff --git a/paddle/fluid/eager/pylayer/CMakeLists.txt b/paddle/fluid/eager/pylayer/CMakeLists.txt index 8c660fa9694ed..59030342eccad 100644 --- a/paddle/fluid/eager/pylayer/CMakeLists.txt +++ b/paddle/fluid/eager/pylayer/CMakeLists.txt @@ -1 +1 @@ -cc_library(py_layer_node SRCS py_layer_node.cc DEPS pybind phi phi_api grad_node_info) +cc_library(py_layer_node SRCS py_layer_node.cc DEPS pybind phi_api grad_node_info) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index b6a7aea4f9cd7..bb7f3f26463d4 100755 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -206,11 +206,11 @@ ENDIF() IF(WITH_XPU) cc_library(operator SRCS operator.cc DEPS xpu_op_list op_info device_context tensor scope glog trainer_desc_proto data_feed_proto shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils - phi phi_utils kernel_factory infershape_utils op_utils) + phi_utils kernel_factory infershape_utils op_utils) ELSE() cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog trainer_desc_proto data_feed_proto shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils - phi phi_utils kernel_factory infershape_utils op_utils) + phi_utils kernel_factory infershape_utils op_utils) ENDIF() cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context) @@ -418,7 +418,7 @@ cc_library(save_load_util SRCS save_load_util.cc DEPS tensor scope layer) cc_test(save_load_util_test SRCS save_load_util_test.cc DEPS save_load_util tensor scope layer) cc_library(generator SRCS generator.cc DEPS enforce place) -cc_library(infershape_utils SRCS infershape_utils.cc DEPS lod_tensor selected_rows_utils attribute place phi var_type_traits phi phi_api_utils op_info shape_inference) +cc_library(infershape_utils SRCS infershape_utils.cc DEPS lod_tensor selected_rows_utils attribute place var_type_traits phi phi_api_utils op_info shape_inference) cc_test(infershape_utils_test SRCS infershape_utils_test.cc DEPS infershape_utils infermeta_utils meta_tensor) # Get the current working branch diff --git a/paddle/fluid/imperative/CMakeLists.txt b/paddle/fluid/imperative/CMakeLists.txt index 107bbdf09a021..92af1901b71ab 100644 --- a/paddle/fluid/imperative/CMakeLists.txt +++ b/paddle/fluid/imperative/CMakeLists.txt @@ -1,9 +1,9 @@ cc_library(imperative_flag SRCS flags.cc DEPS gflags flags) cc_library(var_helper SRCS var_helper.cc DEPS tensor phi_api) IF(WITH_XPU) -cc_library(prepared_operator SRCS prepared_operator.cc DEPS xpu_op_list proto_desc operator device_context lod_tensor selected_rows_utils var_type_traits op_kernel_type data_transform nan_inf_utils phi_api phi phi_utils var_helper) +cc_library(prepared_operator SRCS prepared_operator.cc DEPS xpu_op_list proto_desc operator device_context lod_tensor selected_rows_utils var_type_traits op_kernel_type data_transform nan_inf_utils phi_api phi_utils var_helper) ELSE() -cc_library(prepared_operator SRCS prepared_operator.cc DEPS proto_desc operator device_context lod_tensor selected_rows_utils var_type_traits op_kernel_type data_transform nan_inf_utils phi_api phi phi_utils var_helper) +cc_library(prepared_operator SRCS prepared_operator.cc DEPS proto_desc operator device_context lod_tensor selected_rows_utils var_type_traits op_kernel_type data_transform nan_inf_utils phi_api phi_utils var_helper) ENDIF() cc_library(layer SRCS layer.cc DEPS prepared_operator math_function imperative_flag variable_helper op_registry var_helper phi_api) add_subdirectory(jit) diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index bdf364aa9adcd..7a1f3e8326aeb 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -36,7 +36,7 @@ endif() # fluid_modules exclude API-interface of inference/api and inference/capi_exp get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) get_property(phi_modules GLOBAL PROPERTY PHI_MODULES) -set(utils_modules stringpiece pretty_log string_helper) +set(utils_modules stringpiece pretty_log string_helper benchmark) add_subdirectory(api) @@ -50,9 +50,9 @@ if(WITH_ONNXRUNTIME) set(STATIC_INFERENCE_API ${STATIC_INFERENCE_API} onnxruntime_predictor) endif() -#TODO(wilber, T8T9): Do we still need to support windows gpu static library? +#windows GPU static library over the limit, so not create_static_lib, and cc_library is dummy if(WIN32 AND WITH_GPU) - cc_library(paddle_inference DEPS ${fluid_modules} ${phi_modules} ${STATIC_INFERENCE_API} ${utils_modules}) + cc_library(paddle_inference DEPS ${fluid_modules} phi ${STATIC_INFERENCE_API} ${utils_modules}) else() create_static_lib(paddle_inference ${fluid_modules} ${phi_modules} ${STATIC_INFERENCE_API} ${utils_modules}) endif() @@ -84,7 +84,7 @@ set(SHARED_INFERENCE_SRCS ${PADDLE_CUSTOM_OP_SRCS}) # shared inference library deps -set(SHARED_INFERENCE_DEPS ${fluid_modules} ${phi_modules} analysis_predictor) +set(SHARED_INFERENCE_DEPS ${fluid_modules} phi analysis_predictor ${utils_modules}) if (WITH_CRYPTO) set(SHARED_INFERENCE_DEPS ${SHARED_INFERENCE_DEPS} paddle_crypto) diff --git a/paddle/fluid/inference/tensorrt/CMakeLists.txt b/paddle/fluid/inference/tensorrt/CMakeLists.txt index d1d146b2ce5f6..c713e3a66ac71 100644 --- a/paddle/fluid/inference/tensorrt/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/CMakeLists.txt @@ -1,8 +1,8 @@ # Compiling with WITH_PYTHON=ON and WITH_TENSORRT=ON failed on windows. Temporarily add paddle_inference_api dependency to solve the problem if(WIN32) -nv_library(tensorrt_engine SRCS engine.cc trt_int8_calibrator.cc DEPS ${GLOB_OPERATOR_DEPS} framework_proto device_context boost paddle_inference_api) + nv_library(tensorrt_engine SRCS engine.cc trt_int8_calibrator.cc DEPS ${GLOB_OPERATOR_DEPS} framework_proto device_context boost paddle_inference_api) else() -nv_library(tensorrt_engine SRCS engine.cc trt_int8_calibrator.cc DEPS ${GLOB_OPERATOR_DEPS} framework_proto device_context boost) + nv_library(tensorrt_engine SRCS engine.cc trt_int8_calibrator.cc DEPS ${GLOB_OPERATOR_DEPS} framework_proto device_context boost) endif() nv_library(tensorrt_op_teller SRCS op_teller.cc DEPS framework_proto device_context boost) nv_test(test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader) diff --git a/paddle/phi/CMakeLists.txt b/paddle/phi/CMakeLists.txt index d43e327393f25..0595ea4d8bddf 100644 --- a/paddle/phi/CMakeLists.txt +++ b/paddle/phi/CMakeLists.txt @@ -27,7 +27,7 @@ set(PHI_DEPS convert_utils dense_tensor phi_context kernel_factory kernel_contex get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS) set(PHI_DEPS ${PHI_DEPS} ${phi_kernels}) -cc_library(phi DEPS ${PHI_DEPS}) +create_dummy_static_lib(phi LIBS ${PHI_DEPS} LIMIT 100) set(phi_extension_header_file ${CMAKE_CURRENT_SOURCE_DIR}/extension.h CACHE INTERNAL "phi/extension.h file") file(WRITE ${phi_extension_header_file} "// Header file generated by paddle/phi/CMakeLists.txt for external users,\n// DO NOT edit or include it within paddle.\n\n#pragma once\n\n") diff --git a/paddle/phi/kernels/CMakeLists.txt b/paddle/phi/kernels/CMakeLists.txt index a3a71ab692245..437c55c840f1a 100644 --- a/paddle/phi/kernels/CMakeLists.txt +++ b/paddle/phi/kernels/CMakeLists.txt @@ -36,7 +36,7 @@ set(MANUAL_BUILD_KERNELS ${AUTOTUNE_KERNELS} cross_entropy_kernel adam_kernel ad matrix_power_kernel matrix_power_grad_kernel maxout_kernel maxout_grad_kernel pool_kernel put_along_axis_kernel put_along_axis_grad_kernel segment_pool_kernel segment_pool_grad_kernel softmax_kernel softmax_grad_kernel take_along_axis_kernel take_along_axis_grad_kernel - triangular_solve_grad_kernel determinant_grad_kernel reduce_sum_kernel rnn_kernel rnn_grad_kernel warpctc_kernel warpctc_grad_kernel) + triangular_solve_grad_kernel determinant_grad_kernel reduce_sum_kernel reduce_mean_kernel rnn_kernel rnn_grad_kernel warpctc_kernel warpctc_grad_kernel) foreach(src ${AUTOTUNE_KERNELS}) kernel_library(${src} DEPS ${COMMON_KERNEL_DEPS} switch_autotune) endforeach() @@ -52,6 +52,7 @@ kernel_library(hierarchical_sigmoid_grad_kernel DEPS ${COMMON_KERNEL_DEPS} matri kernel_library(gumbel_softmax_kernel DEPS ${COMMON_KERNEL_DEPS} softmax) kernel_library(gumbel_softmax_grad_kernel DEPS ${COMMON_KERNEL_DEPS} softmax) kernel_library(reduce_sum_kernel DEPS ${COMMON_KERNEL_DEPS} cast_kernel) +kernel_library(reduce_mean_kernel DEPS ${COMMON_KERNEL_DEPS} cast_kernel) kernel_library(matrix_power_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse) kernel_library(matrix_power_grad_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse) kernel_library(maxout_kernel DEPS ${COMMON_KERNEL_DEPS} maxouting)