From f56e306be4f3c2d1bdd4547f78fa9902157c66c5 Mon Sep 17 00:00:00 2001 From: mroethlin <53755555+mroethlin@users.noreply.github.com> Date: Mon, 18 Jan 2021 13:08:33 +0100 Subject: [PATCH] Introduce Persistent Temporary Fields and Mesh to the Cuda Backend (#1089) ## Technical Description Currently, temporary fields are allocated in the constructor of the generated class. Since the API functions to the FORTRAN and cpp drivers hold the stencil on the stack, this leads to memory (de-)allocation on each call. This is fine for debugging, but not for production runs. Thus, this PR keeps that behavior for the convenience wrappers starting from host memory, but introduces static `setup` and `free` functions which have to be called by the host when using the production interface which assumes device pointers. Additionally, since the APIs are touched either way, globals can now be communicated from FORTRAN to the CUDA backend. Furthermore this PR contains a small refactoring and removes the (now) superfluous template parameter from the generated stencil class ### Resolves / Enhances Addresses part of #1038 Fixes #1042 ### Testing Since this affects the CUDA-ico backend this is tested by `icondusk-e2e` --- dawn/src/dawn/AST/ASTExpr.h | 2 +- .../dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp | 334 ++++++++++-------- .../dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.h | 24 +- dawn/src/dawn/CodeGen/F90Util.h | 36 +- .../GenerateUnstructuredStencils.cpp | 5 +- 5 files changed, 230 insertions(+), 171 deletions(-) diff --git a/dawn/src/dawn/AST/ASTExpr.h b/dawn/src/dawn/AST/ASTExpr.h index 827f8850e..cbf3e702f 100644 --- a/dawn/src/dawn/AST/ASTExpr.h +++ b/dawn/src/dawn/AST/ASTExpr.h @@ -647,7 +647,7 @@ class ReductionOverNeighborExpr : public Expr { // due to current design limitations (getChildren() returning a view into memory), the operands // hold a copy of the (shared pointer to) the weights std::vector> operands_ = std::vector>(2); - bool chainIsValid() const; + bool chainIsValid() const; public: /// @name Constructor & Destructor diff --git a/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp b/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp index 67b32ec3f..5d7d33182 100644 --- a/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp +++ b/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp @@ -18,6 +18,7 @@ #include "dawn/AST/ASTExpr.h" #include "dawn/AST/IterationSpace.h" #include "dawn/AST/LocationType.h" +#include "dawn/CodeGen/CXXUtil.h" #include "dawn/CodeGen/Cuda-ico/LocToStringUtils.h" #include "dawn/CodeGen/Cuda/CodeGeneratorHelper.h" #include "dawn/CodeGen/F90Util.h" @@ -38,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -140,23 +142,10 @@ void CudaIcoCodeGen::generateGpuMesh( for(auto space : spaces) { gpuMeshClass.addMember("int*", chainToTableString(space)); } - { - auto gpuMeshFromLibCtor = gpuMeshClass.addConstructor(); - gpuMeshFromLibCtor.addArg("const dawn::mesh_t& mesh"); - gpuMeshFromLibCtor.addStatement("NumVertices = mesh.nodes().size()"); - gpuMeshFromLibCtor.addStatement("NumCells = mesh.cells().size()"); - gpuMeshFromLibCtor.addStatement("NumEdges = mesh.edges().size()"); - for(auto space : spaces) { - gpuMeshFromLibCtor.addStatement("gpuErrchk(cudaMalloc((void**)&" + chainToTableString(space) + - ", sizeof(int) * " + chainToDenseSizeStringHostMesh(space) + - "* " + chainToSparseSizeString(space) + "))"); - gpuMeshFromLibCtor.addStatement( - "dawn::generateNbhTable(mesh, " + chainToVectorString(space) + ", " + - chainToDenseSizeStringHostMesh(space) + ", " + chainToSparseSizeString(space) + ", " + - chainToTableString(space) + ", /*include center*/" + std::to_string(space.IncludeCenter) + - ")"); - } + auto gpuMeshDefaultCtor = gpuMeshClass.addConstructor(); + gpuMeshDefaultCtor.startBody(); + gpuMeshDefaultCtor.commit(); } { auto gpuMeshFromGlobalCtor = gpuMeshClass.addConstructor(); @@ -186,6 +175,13 @@ void CudaIcoCodeGen::generateRunFun( const auto& globalsMap = stencilInstantiation->getIIR()->getGlobalVariableMap(); + runFun.addBlockStatement("if (!is_setup_)", [&]() { + std::string stencilName = stencilInstantiation->getName(); + runFun.addStatement("printf(\"" + stencilName + + "has not been set up! make sure setup() is called before run!\\n\")"); + runFun.addStatement("return"); + }); + // find block sizes to generate std::set stageLocType; for(const auto& ms : iterateIIROver(*(stencilInstantiation->getIIR()))) { @@ -338,7 +334,8 @@ void CudaIcoCodeGen::generateRunFun( } kernelCall << numElString << ", "; - // which loc size args (int CellIdx, int EdgeIdx, int CellIdx) need to be passed additionally? + // which loc size args (int CellIdx, int EdgeIdx, int CellIdx) need to be passed + // additionally? std::set locArgs; for(auto field : fields) { if(field.second.getFieldDimensions().isVertical()) { @@ -425,102 +422,26 @@ static void allocTempFields(MemberFunction& ctor, const iir::Stencil& stencil, P } } -void CudaIcoCodeGen::generateStencilClassCtr(MemberFunction& ctor, const iir::Stencil& stencil, - const sir::GlobalVariableMap& globalsMap, - CodeGenProperties& codeGenProperties) const { - - // arguments: mesh, kSize, fields - if(!globalsMap.empty()) { - ctor.addArg("globals globals"); - } - - const auto& APIFields = stencil.getMetadata().getAPIFields(); - - ctor.addArg("const dawn::mesh_t& mesh"); - ctor.addArg("int kSize"); - for(auto field : APIFields) { - auto dims = stencil.getMetadata().getFieldDimensions(field); - auto fieldName = stencil.getMetadata().getFieldNameFromAccessID(field); - if(dims.isVertical()) { - ctor.addArg("dawn::vertical_field_t& " + fieldName); - continue; - } - auto hdims = sir::dimension_cast( - dims.getHorizontalFieldDimension()); - if(hdims.isDense()) { - ctor.addArg(locToDenseTypeString(hdims.getDenseLocationType()) + "& " + fieldName); - } else { - ctor.addArg(locToSparseTypeString(hdims.getDenseLocationType()) + "& " + fieldName); - } - } - - // initializers for base class, mesh, kSize - std::string stencilName = - codeGenProperties.getStencilName(StencilContext::SC_Stencil, stencil.getStencilID()); - ctor.addInit("sbase(\"" + stencilName + "\")"); - ctor.addInit("mesh_(mesh)"); - ctor.addInit("kSize_(kSize)"); - if(!globalsMap.empty()) { - ctor.addInit("m_globals(globals)"); - } - - std::stringstream fieldsStr; - { - bool first = true; - for(auto fieldID : APIFields) { - if(!first) { - fieldsStr << ", "; - } - fieldsStr << stencil.getMetadata().getFieldNameFromAccessID(fieldID) + ".data()"; - first = false; - } - } - ctor.addStatement("copy_memory(" + fieldsStr.str() + ", true)"); - allocTempFields(ctor, stencil, codeGenOptions.UnstrPadding); -} - -void CudaIcoCodeGen::generateStencilClassDtr(MemberFunction& stencilClassDtor, - const iir::Stencil& stencil) { - bool requriesDtor = stencil.getMetadata() - .hasAccessesOfType(); - DAWN_ASSERT_MSG(requriesDtor, "only generate dtor for stencils with temporaries!"); +void CudaIcoCodeGen::generateStencilFree(MemberFunction& stencilFree, const iir::Stencil& stencil) { + stencilFree.startBody(); for(auto accessID : stencil.getMetadata() .getAccessesOfType()) { auto fname = stencil.getMetadata().getFieldNameFromAccessID(accessID); - stencilClassDtor.addStatement("gpuErrchk(cudaFree(" + fname + "_))"); + stencilFree.addStatement("gpuErrchk(cudaFree(" + fname + "_))"); } } -void CudaIcoCodeGen::generateStencilClassCtrMinimal(MemberFunction& ctor, - const iir::Stencil& stencil, - const sir::GlobalVariableMap& globalsMap, - CodeGenProperties& codeGenProperties) const { - - if(!globalsMap.empty()) { - ctor.addArg("globals globals"); - } - // arguments: mesh, kSize, fields - ctor.addArg("const dawn::GlobalGpuTriMesh *mesh"); - ctor.addArg("int kSize"); - - // initializers for base class, mesh, kSize - std::string stencilName = - codeGenProperties.getStencilName(StencilContext::SC_Stencil, stencil.getStencilID()); - ctor.addInit("sbase(\"" + stencilName + "\")"); - ctor.addInit("mesh_(mesh)"); - if(!globalsMap.empty()) { - ctor.addInit("m_globals(globals)"); - } - ctor.addInit("kSize_(kSize)"); - - allocTempFields(ctor, stencil, codeGenOptions.UnstrPadding); +void CudaIcoCodeGen::generateStencilSetup(MemberFunction& stencilSetup, + const iir::Stencil& stencil) { + stencilSetup.addStatement("mesh_ = GpuTriMesh(mesh)"); + stencilSetup.addStatement("kSize_ = kSize"); + stencilSetup.addStatement("is_setup_ = true"); + allocTempFields(stencilSetup, stencil, codeGenOptions.UnstrPadding); } void CudaIcoCodeGen::generateCopyMemoryFun(MemberFunction& copyFun, const iir::Stencil& stencil) const { - const auto& APIFields = stencil.getMetadata().getAPIFields(); const auto& stenFields = stencil.getOrderedFields(); auto usedAPIFields = makeRange(APIFields, [&stenFields](int f) { return stenFields.count(f); }); @@ -561,7 +482,6 @@ void CudaIcoCodeGen::generateCopyMemoryFun(MemberFunction& copyFun, void CudaIcoCodeGen::generateCopyPtrFun(MemberFunction& copyFun, const iir::Stencil& stencil) const { - const auto& APIFields = stencil.getMetadata().getAPIFields(); const auto& stenFields = stencil.getOrderedFields(); auto usedAPIFields = makeRange(APIFields, [&stenFields](int f) { return stenFields.count(f); }); @@ -693,7 +613,6 @@ void CudaIcoCodeGen::generateCopyBackFun(MemberFunction& copyBackFun, const iir: void CudaIcoCodeGen::generateStencilClasses( const std::shared_ptr& stencilInstantiation, Class& stencilWrapperClass, CodeGenProperties& codeGenProperties) { - const auto& stencils = stencilInstantiation->getStencils(); const auto& globalsMap = stencilInstantiation->getIIR()->getGlobalVariableMap(); @@ -711,11 +630,19 @@ void CudaIcoCodeGen::generateStencilClasses( // generate members (fields + kSize + gpuMesh) stencilClass.changeAccessibility("private"); + auto temporaries = stencil.getMetadata() + .getAccessesOfType(); for(auto field : support::orderMap(stencil.getFields())) { - stencilClass.addMember("::dawn::float_type*", field.second.Name + "_"); + if(temporaries.count(stencil.getMetadata().getAccessIDFromName(field.second.Name))) { + stencilClass.addMember("static ::dawn::float_type*", field.second.Name + "_"); + } else { + stencilClass.addMember("::dawn::float_type*", field.second.Name + "_"); + } } - stencilClass.addMember("int", "kSize_ = 0"); - stencilClass.addMember("GpuTriMesh", "mesh_"); + stencilClass.addMember("static int", "kSize_"); + stencilClass.addMember("static GpuTriMesh", "mesh_"); + stencilClass.addMember("static bool", "is_setup_"); stencilClass.changeAccessibility("public"); @@ -724,17 +651,15 @@ void CudaIcoCodeGen::generateStencilClasses( } // constructor from library - auto stencilClassConstructor = stencilClass.addConstructor(); - generateStencilClassCtr(stencilClassConstructor, stencil, globalsMap, codeGenProperties); - stencilClassConstructor.commit(); - - if(stencil.getMetadata() - .hasAccessesOfType()) { - auto stencilClassDestructor = stencilClass.addDestructor(false /*isVirtual*/); - generateStencilClassDtr(stencilClassDestructor, stencil); - stencilClassDestructor.commit(); - } + auto stencilClassFree = stencilClass.addMemberFunction("static void", "free"); + generateStencilFree(stencilClassFree, stencil); + stencilClassFree.commit(); + + auto stencilClassSetup = stencilClass.addMemberFunction("static void", "setup"); + stencilClassSetup.addArg("const dawn::GlobalGpuTriMesh *mesh"); + stencilClassSetup.addArg("int kSize"); + generateStencilSetup(stencilClassSetup, stencil); + stencilClassSetup.commit(); // grid helper fun // can not be placed in cuda utils sinze it needs LEVELS_PER_THREAD and BLOCK_SIZE, which @@ -746,10 +671,10 @@ void CudaIcoCodeGen::generateStencilClasses( gridFun.commit(); // minmal ctor - auto stencilClassMinimalConstructor = stencilClass.addConstructor(); - generateStencilClassCtrMinimal(stencilClassMinimalConstructor, stencil, globalsMap, - codeGenProperties); - stencilClassMinimalConstructor.commit(); + auto stencilClassDefaultConstructor = stencilClass.addConstructor(); + stencilClassDefaultConstructor.addInit("sbase(\"" + stencilName + "\")"); + stencilClassDefaultConstructor.startBody(); + stencilClassDefaultConstructor.commit(); // run method auto runFun = stencilClass.addMemberFunction("void", "run"); @@ -757,12 +682,8 @@ void CudaIcoCodeGen::generateStencilClasses( runFun.commit(); // copy back fun - auto copyBackFunInterface = stencilClass.addMemberFunction("void", "CopyResultToHost"); - generateCopyBackFun(copyBackFunInterface, stencil, true); - copyBackFunInterface.commit(); - auto copyBackFunRawPtr = stencilClass.addMemberFunction("void", "CopyResultToHost"); - generateCopyBackFun(copyBackFunRawPtr, stencil, false); + generateCopyBackFun(copyBackFunRawPtr, stencil, true); copyBackFunRawPtr.commit(); // copy to funs @@ -808,13 +729,34 @@ void CudaIcoCodeGen::generateAllAPIRunFunctions( "double", "run_" + wrapperName, apiRunFunStreams[0], /*indent level*/ 0, onlyDecl)); } + const auto& globalsMap = stencilInstantiation->getIIR()->getGlobalVariableMap(); + auto addExplodedGlobals = [](const sir::GlobalVariableMap& globalsMap, MemberFunction& fun) { + for(const auto& global : globalsMap) { + std::string Name = global.first; + std::string Type = sir::Value::typeToString(global.second.getType()); + fun.addArg(Type + " " + Name); + } + }; + + if(fromHost) { + for(auto& apiRunFun : apiRunFuns) { + apiRunFun->addArg("dawn::GlobalGpuTriMesh *mesh"); + apiRunFun->addArg("int k_size"); + } + if(!globalsMap.empty()) { + apiRunFuns[0]->addArg("globals globals"); + } + addExplodedGlobals(globalsMap, *apiRunFuns[1]); + } else { + addExplodedGlobals(globalsMap, *apiRunFuns[0]); + } for(auto& apiRunFun : apiRunFuns) { - apiRunFun->addArg("dawn::GlobalGpuTriMesh *mesh"); - apiRunFun->addArg("int k_size"); for(auto accessID : stencilInstantiation->getMetaData().getAPIFields()) { apiRunFun->addArg("::dawn::float_type *" + stencilInstantiation->getMetaData().getNameFromAccessID(accessID)); } + } + for(auto& apiRunFun : apiRunFuns) { apiRunFun->finishArgs(); } @@ -865,18 +807,34 @@ void CudaIcoCodeGen::generateAllAPIRunFunctions( const std::string stencilName = codeGenProperties.getStencilName(StencilContext::SC_Stencil, stencil.getStencilID()); + const std::string fullStencilName = + "dawn_generated::cuda_ico::" + wrapperName + "::" + stencilName; + + auto copyGlobals = [](const sir::GlobalVariableMap& globalsMap, MemberFunction& fun, + bool wrapped) { + for(const auto& global : globalsMap) { + std::string Name = global.first; + std::string Type = sir::Value::typeToString(global.second.getType()); + fun.addStatement("s.set_" + Name + "(" + (wrapped ? "globals." + Name : Name) + ")"); + } + }; for(auto& apiRunFun : apiRunFuns) { - apiRunFun->addStatement("dawn_generated::cuda_ico::" + wrapperName + - "::" + stencilName + " s(mesh, k_size)"); + apiRunFun->addStatement(fullStencilName + " s"); } if(fromHost) { + for(auto& apiRunFun : apiRunFuns) { + apiRunFun->addStatement(fullStencilName + "::setup(mesh, k_size)"); + } // depending if we are calling from c or from fortran, we need to transpose the data or // not apiRunFuns[0]->addStatement("s.copy_memory(" + fieldsStr.str() + ", true)"); apiRunFuns[1]->addStatement("s.copy_memory(" + fieldsStr.str() + ", false)"); + copyGlobals(globalsMap, *apiRunFuns[0], true); + copyGlobals(globalsMap, *apiRunFuns[1], false); } else { apiRunFuns[0]->addStatement("s.copy_pointers(" + fieldsStr.str() + ")"); + copyGlobals(globalsMap, *apiRunFuns[0], false); } for(auto& apiRunFun : apiRunFuns) { apiRunFun->addStatement("s.run()"); @@ -886,6 +844,9 @@ void CudaIcoCodeGen::generateAllAPIRunFunctions( if(fromHost) { apiRunFuns[0]->addStatement("s.CopyResultToHost(" + ioFieldStr.str() + ", true)"); apiRunFuns[1]->addStatement("s.CopyResultToHost(" + ioFieldStr.str() + ", false)"); + for(auto& apiRunFun : apiRunFuns) { + apiRunFun->addStatement(fullStencilName + "::free()"); + } } for(auto& apiRunFun : apiRunFuns) { apiRunFun->addStatement("return time"); @@ -898,20 +859,76 @@ void CudaIcoCodeGen::generateAllAPIRunFunctions( } } else { - for(auto& apiRunFun : apiRunFuns) { - apiRunFun->commit(); - } for(const auto& stream : apiRunFunStreams) { ssSW << stream.str() << ";\n"; } + for(auto& apiRunFun : apiRunFuns) { + apiRunFun->commit(); + } } } } +void CudaIcoCodeGen::generateMemMgmtFunctions( + std::stringstream& ssSW, const std::shared_ptr& stencilInstantiation, + CodeGenProperties& codeGenProperties, bool onlyDecl) const { + const std::string wrapperName = stencilInstantiation->getName(); + std::string stencilName = codeGenProperties.getStencilName( + StencilContext::SC_Stencil, stencilInstantiation->getStencils()[0]->getStencilID()); + const std::string fullStencilName = + "dawn_generated::cuda_ico::" + wrapperName + "::" + stencilName; + + MemberFunction setupFun("void", "setup_" + wrapperName, ssSW, 0, onlyDecl); + setupFun.addArg("dawn::GlobalGpuTriMesh *mesh"); + setupFun.addArg("int k_size"); + setupFun.finishArgs(); + if(!onlyDecl) { + setupFun.addStatement(fullStencilName + "::setup(mesh, k_size)"); + } + if(onlyDecl) { + ssSW << ";"; + } + setupFun.commit(); + + MemberFunction freeFun("void", "free_" + wrapperName, ssSW, 0, onlyDecl); + freeFun.finishArgs(); + if(!onlyDecl) { + freeFun.startBody(); + freeFun.addStatement(fullStencilName + "::free()"); + } + if(onlyDecl) { + ssSW << ";"; + } + freeFun.commit(); +} + +void CudaIcoCodeGen::generateStaticMembersTrailer( + std::stringstream& ssSW, const std::shared_ptr& stencilInstantiation, + CodeGenProperties& codeGenProperties) const { + auto& stencil = stencilInstantiation->getStencils()[0]; + const std::string wrapperName = stencilInstantiation->getName(); + std::string stencilName = + codeGenProperties.getStencilName(StencilContext::SC_Stencil, stencil->getStencilID()); + const std::string fullStencilName = + "dawn_generated::cuda_ico::" + wrapperName + "::" + stencilName; + + for(auto accessID : stencil->getMetadata() + .getAccessesOfType()) { + auto fname = stencil->getMetadata().getFieldNameFromAccessID(accessID); + ssSW << "::dawn::float_type *" << fullStencilName << "::" << fname << "_;\n"; + } + ssSW << "int " << fullStencilName << "::" + << "kSize_;\n"; + ssSW << "bool " << fullStencilName << "::" + << "is_setup_ = false;\n"; + ssSW << "dawn_generated::cuda_ico::" + wrapperName << "::GpuTriMesh " << fullStencilName << "::" + << "mesh_;\n"; +} + void CudaIcoCodeGen::generateAllCudaKernels( std::stringstream& ssSW, const std::shared_ptr& stencilInstantiation) { - ASTStencilBody stencilBodyCXXVisitor(stencilInstantiation->getMetaData(), codeGenOptions.UnstrPadding); const auto& globalsMap = stencilInstantiation->getIIR()->getGlobalVariableMap(); @@ -1067,7 +1084,6 @@ void CudaIcoCodeGen::generateAllCudaKernels( std::string CudaIcoCodeGen::generateStencilInstantiation( const std::shared_ptr& stencilInstantiation) { - using namespace codegen; std::stringstream ssSW; @@ -1092,7 +1108,7 @@ std::string CudaIcoCodeGen::generateStencilInstantiation( ss << "int " + chainToSparseSizeString(space) << " "; first = false; } - Class stencilWrapperClass(stencilInstantiation->getName(), ssSW, "typename LibTag"); + Class stencilWrapperClass(stencilInstantiation->getName(), ssSW); for(auto space : spaces) { std::string spaceStr = std::to_string(ICOChainSize(space)); if(space.IncludeCenter) { @@ -1130,7 +1146,9 @@ std::string CudaIcoCodeGen::generateStencilInstantiation( bool fromHost = true; generateAllAPIRunFunctions(ssSW, stencilInstantiation, codeGenProperties, fromHost); generateAllAPIRunFunctions(ssSW, stencilInstantiation, codeGenProperties, !fromHost); + generateMemMgmtFunctions(ssSW, stencilInstantiation, codeGenProperties); ssSW << "}\n"; + generateStaticMembersTrailer(ssSW, stencilInstantiation, codeGenProperties); return ssSW.str(); } @@ -1148,6 +1166,7 @@ void CudaIcoCodeGen::generateCHeaderSI( /*onlyDecl=*/true); generateAllAPIRunFunctions(ssSW, stencilInstantiation, codeGenProperties, !fromHost, /*onlyDecl=*/true); + generateMemMgmtFunctions(ssSW, stencilInstantiation, codeGenProperties, /*onlyDecl=*/true); ssSW << "}\n"; } @@ -1169,11 +1188,27 @@ static void generateF90InterfaceSI(FortranInterfaceModuleGen& fimGen, const std::shared_ptr& stencilInstantiation) { const auto& stencils = stencilInstantiation->getStencils(); + const auto& globalsMap = stencilInstantiation->getIIR()->getGlobalVariableMap(); + auto globalTypeToFortType = [](const sir::Global& global) { + switch(global.getType()) { + case sir::Value::Kind::Boolean: + return FortranInterfaceAPI::InterfaceType::BOOLEAN; + case sir::Value::Kind::Double: + return FortranInterfaceAPI::InterfaceType::DOUBLE; + case sir::Value::Kind::Float: + return FortranInterfaceAPI::InterfaceType::FLOAT; + case sir::Value::Kind::Integer: + return FortranInterfaceAPI::InterfaceType::INTEGER; + case sir::Value::Kind::String: + default: + throw std::runtime_error("string globals not supported in cuda ico backend"); + } + }; // The following assert is needed because we have only one (user-defined) name for a stencil // instantiation (stencilInstantiation->getName()). We could compute a per-stencil name ( - // codeGenProperties.getStencilName(StencilContext::SC_Stencil, stencil.getStencilID()) ) however - // the interface would not be very useful if the name is generated. + // codeGenProperties.getStencilName(StencilContext::SC_Stencil, stencil.getStencilID()) ) + // however the interface would not be very useful if the name is generated. DAWN_ASSERT_MSG(stencils.size() <= 1, "Unable to generate interface. More than one stencil in stencil instantiation."); @@ -1182,11 +1217,15 @@ generateF90InterfaceSI(FortranInterfaceModuleGen& fimGen, FortranInterfaceAPI::InterfaceType::DOUBLE), FortranInterfaceAPI("run_" + stencilInstantiation->getName() + "_from_fort_host", FortranInterfaceAPI::InterfaceType::DOUBLE)}; + + // only from host convenience wrapper takes mesh and k_size + apis[1].addArg("mesh", FortranInterfaceAPI::InterfaceType::OBJ); + apis[1].addArg("k_size", FortranInterfaceAPI::InterfaceType::INTEGER); for(auto&& api : apis) { - api.addArg("mesh", FortranInterfaceAPI::InterfaceType::OBJ); - api.addArg("k_size", FortranInterfaceAPI::InterfaceType::INTEGER); + for(const auto& global : globalsMap) { + api.addArg(global.first, globalTypeToFortType(global.second)); + } for(auto fieldID : stencilInstantiation->getMetaData().getAPIFields()) { - api.addArg( stencilInstantiation->getMetaData().getNameFromAccessID(fieldID), FortranInterfaceAPI::InterfaceType::DOUBLE /* Unfortunately we need to know at codegen @@ -1197,6 +1236,15 @@ generateF90InterfaceSI(FortranInterfaceModuleGen& fimGen, fimGen.addAPI(std::move(api)); } + + // memory management functions for production interface + FortranInterfaceAPI setup("setup_" + stencilInstantiation->getName()); + FortranInterfaceAPI free("free_" + stencilInstantiation->getName()); + setup.addArg("mesh", FortranInterfaceAPI::InterfaceType::OBJ); + setup.addArg("k_size", FortranInterfaceAPI::InterfaceType::INTEGER); + + fimGen.addAPI(std::move(setup)); + fimGen.addAPI(std::move(free)); } std::string CudaIcoCodeGen::generateF90Interface(std::string moduleName) const { @@ -1216,7 +1264,6 @@ std::string CudaIcoCodeGen::generateF90Interface(std::string moduleName) const { } std::unique_ptr CudaIcoCodeGen::generateCode() { - DAWN_LOG(INFO) << "Starting code generation for ..."; // Generate code for StencilInstantiations @@ -1276,7 +1323,6 @@ std::unique_ptr CudaIcoCodeGen::generateCode() { return std::make_unique(filename, std::move(ppDefines), std::move(stencils), std::move(globals)); } - } // namespace cudaico } // namespace codegen } // namespace dawn diff --git a/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.h b/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.h index ba34cefec..7fd27460f 100644 --- a/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.h +++ b/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.h @@ -78,6 +78,16 @@ class CudaIcoCodeGen : public CodeGen { CodeGenProperties& codeGenProperties, bool fromHost, bool onlyDecl = false) const; + void + generateMemMgmtFunctions(std::stringstream& ssSW, + const std::shared_ptr& stencilInstantiation, + CodeGenProperties& codeGenProperties, bool onlyDecl = false) const; + + void generateStaticMembersTrailer( + std::stringstream& ssSW, + const std::shared_ptr& stencilInstantiation, + CodeGenProperties& codeGenProperties) const; + void generateGpuMesh(const std::shared_ptr& stencilInstantiation, Class& stencilWrapperClass, CodeGenProperties& codeGenProperties); @@ -86,18 +96,8 @@ class CudaIcoCodeGen : public CodeGen { void generateGridFun(MemberFunction& runFun); - void generateStencilClassCtr(MemberFunction& stencilClassCtor, const iir::Stencil& stencil, - const sir::GlobalVariableMap& globalsMap, - CodeGenProperties& codeGenProperties) const; - - void generateStencilClassDtr(MemberFunction& stencilClassDtor, const iir::Stencil& stencil); - - void generateStencilClassCtrMinimal(MemberFunction& stencilClassCtor, const iir::Stencil& stencil, - const sir::GlobalVariableMap& globalsMap, - CodeGenProperties& codeGenProperties) const; - - void generateStencilClassRawPtrCtr(MemberFunction& stencilClassCtor, const iir::Stencil& stencil, - CodeGenProperties& codeGenProperties) const; + void generateStencilFree(MemberFunction& stencilClassDtor, const iir::Stencil& stencil); + void generateStencilSetup(MemberFunction& stencilClassDtor, const iir::Stencil& stencil); void generateCopyBackFun(MemberFunction& copyBackFun, const iir::Stencil& stencil, bool rawPtrs) const; diff --git a/dawn/src/dawn/CodeGen/F90Util.h b/dawn/src/dawn/CodeGen/F90Util.h index 4d1c48dd1..4e1104fb6 100644 --- a/dawn/src/dawn/CodeGen/F90Util.h +++ b/dawn/src/dawn/CodeGen/F90Util.h @@ -74,7 +74,7 @@ class IndentedStringStream { class FortranInterfaceAPI { public: - enum class InterfaceType { INTEGER, FLOAT, DOUBLE, CHAR, OBJ }; + enum class InterfaceType { INTEGER, FLOAT, DOUBLE, CHAR, BOOLEAN, OBJ }; FortranInterfaceAPI(std::string name, std::optional returnType = std::nullopt) : name_(name) { if(returnType) { @@ -96,6 +96,8 @@ class FortranInterfaceAPI { return "real(c_float)"; case InterfaceType::CHAR: return "character(kind=c_char)"; + case InterfaceType::BOOLEAN: + return "logical(c_bool)"; case InterfaceType::OBJ: return "type(c_ptr)"; } @@ -105,7 +107,10 @@ class FortranInterfaceAPI { void streamInterface(IndentedStringStream& ss) const { bool isFunction = returnType_ != ""; ss << (isFunction ? returnType_ + " function" : std::string("subroutine")) << " &" << endline; - ss << name_ << "( &"; + ss << name_ << "( "; + if(args_.size() > 0) { + ss << "&"; + } { std::string sep; for(const auto& arg : args_) { @@ -113,7 +118,9 @@ class FortranInterfaceAPI { sep = ", &"; } } - ss << " &" << endline; + if(args_.size() > 0) { + ss << " &" << endline; + } ss << ") bind(c)" << endline; ss.increaseIndent(); @@ -123,15 +130,20 @@ class FortranInterfaceAPI { if(std::get<2>(arg) == 0) { ss << "value"; } else { - ss << "dimension("; - { - std::string sep; - for(int c = 0; c < std::get<2>(arg); ++c) { - ss << sep << ":"; - sep = ","; - } - } - ss << ")"; + ss << "dimension(*)"; + // NOTE: this is what we would want. However, this leads to seg faults when being called + // from a FORTRAN host (but not with OpenACC ptrs for reasons currentyl not well + // understood). We can re-introduce rank safety on the wrap() / verify() level + // + // ss << "dimension("; + // { + // std::string sep; + // for(int c = 0; c < std::get<2>(arg); ++c) { + // ss << sep << ":"; + // sep = ","; + // } + // } + // ss << ")"; } ss << ", target :: " << std::get<0>(arg) << endline; }); diff --git a/dawn/test/integration-test/unstructured/GenerateUnstructuredStencils.cpp b/dawn/test/integration-test/unstructured/GenerateUnstructuredStencils.cpp index 8ef516330..dcb540f26 100644 --- a/dawn/test/integration-test/unstructured/GenerateUnstructuredStencils.cpp +++ b/dawn/test/integration-test/unstructured/GenerateUnstructuredStencils.cpp @@ -1161,8 +1161,9 @@ int main() { b.stmt(b.assignExpr(b.at(c_f), b.lit(1.))))), b.stage(LocType::Edges, b.doMethod(dawn::sir::Interval::Start, dawn::sir::Interval::End, b.stmt(b.assignExpr(b.at(e_f), b.lit(1.))))), - b.stage(LocType::Vertices, b.doMethod(dawn::sir::Interval::Start, dawn::sir::Interval::End, - b.stmt(b.assignExpr(b.at(v_f), b.lit(1.)))))))); + b.stage(LocType::Vertices, + b.doMethod(dawn::sir::Interval::Start, dawn::sir::Interval::End, + b.stmt(b.assignExpr(b.at(v_f), b.lit(1.)))))))); std::ofstream of("generated/generated_" + stencilName + ".hpp"); DAWN_ASSERT_MSG(of, "couldn't open output file!\n");