diff --git a/dawn/src/dawn/AST/ASTExpr.cpp b/dawn/src/dawn/AST/ASTExpr.cpp index 4183609f7..6db0bafc4 100644 --- a/dawn/src/dawn/AST/ASTExpr.cpp +++ b/dawn/src/dawn/AST/ASTExpr.cpp @@ -459,17 +459,16 @@ ReductionOverNeighborExpr::ReductionOverNeighborExpr(std::string const& op, std::shared_ptr const& rhs, std::shared_ptr const& init, std::vector chain, - bool includeCenter, SourceLocation loc) + bool includeCenter, std::vector offsets, + SourceLocation loc) : Expr(Kind::ReductionOverNeighborExpr, loc), op_(op), - iterSpace_(std::move(chain), includeCenter), operands_{rhs, init} {} + iterSpace_(std::move(chain), includeCenter), operands_{rhs, init}, offsets_(offsets) {} -ReductionOverNeighborExpr::ReductionOverNeighborExpr(std::string const& op, - std::shared_ptr const& rhs, - std::shared_ptr const& init, - std::vector> weights, - std::vector chain, - bool includeCenter, SourceLocation loc) - : ReductionOverNeighborExpr(op, rhs, init, chain, includeCenter, loc) { +ReductionOverNeighborExpr::ReductionOverNeighborExpr( + std::string const& op, std::shared_ptr const& rhs, std::shared_ptr const& init, + std::vector> weights, std::vector chain, + bool includeCenter, std::vector offsets, SourceLocation loc) + : ReductionOverNeighborExpr(op, rhs, init, chain, includeCenter, offsets, loc) { DAWN_ASSERT_MSG(weights.size() > 0, "empty weights vector passed!\n"); weights_ = weights; operands_.insert(operands_.end(), weights.begin(), weights.end()); @@ -477,7 +476,8 @@ ReductionOverNeighborExpr::ReductionOverNeighborExpr(std::string const& op, ReductionOverNeighborExpr::ReductionOverNeighborExpr(ReductionOverNeighborExpr const& expr) : Expr(Kind::ReductionOverNeighborExpr, expr.getSourceLocation()), op_(expr.getOp()), - weights_(expr.getWeights()), iterSpace_(expr.iterSpace_), operands_(expr.operands_) {} + weights_(expr.getWeights()), iterSpace_(expr.iterSpace_), operands_(expr.operands_), + offsets_(expr.offsets_) {} ReductionOverNeighborExpr& ReductionOverNeighborExpr::operator=(ReductionOverNeighborExpr const& expr) { @@ -486,6 +486,7 @@ ReductionOverNeighborExpr::operator=(ReductionOverNeighborExpr const& expr) { operands_ = expr.operands_; iterSpace_ = expr.iterSpace_; weights_ = expr.getWeights(); + offsets_ = expr.offsets_; return *this; } @@ -516,7 +517,7 @@ bool ReductionOverNeighborExpr::equals(const Expr* other, bool compareData) cons return otherPtr && otherPtr->getInit()->equals(getInit().get(), compareData) && otherPtr->getOp() == getOp() && otherPtr->getRhs()->equals(getRhs().get(), compareData) && - otherPtr->iterSpace_ == iterSpace_; + otherPtr->iterSpace_ == iterSpace_ && offsets_ == otherPtr->offsets_; } bool ReductionOverNeighborExpr::isArithmetic() const { diff --git a/dawn/src/dawn/AST/ASTExpr.h b/dawn/src/dawn/AST/ASTExpr.h index 97c7c5e0b..c1c67dd13 100644 --- a/dawn/src/dawn/AST/ASTExpr.h +++ b/dawn/src/dawn/AST/ASTExpr.h @@ -647,6 +647,7 @@ class ReductionOverNeighborExpr : public Expr { // hold a copy of the (shared pointer to) the weights std::vector> operands_ = std::vector>(2); bool chainIsValid() const; + std::vector offsets_ = {}; public: inline static const std::vector arithmeticOps{"+", "-", "*", "/", "%"}; @@ -654,12 +655,13 @@ class ReductionOverNeighborExpr : public Expr { /// @{ ReductionOverNeighborExpr(std::string const& op, std::shared_ptr const& rhs, std::shared_ptr const& init, std::vector chain, - bool includeCenter = false, SourceLocation loc = SourceLocation()); + bool includeCenter = false, std::vector offsets_ = {}, + SourceLocation loc = SourceLocation()); ReductionOverNeighborExpr(std::string const& op, std::shared_ptr const& rhs, std::shared_ptr const& init, std::vector> weights, std::vector chain, bool includeCenter = false, - SourceLocation loc = SourceLocation()); + std::vector offsets_ = {}, SourceLocation loc = SourceLocation()); ReductionOverNeighborExpr(ReductionOverNeighborExpr const& stmt); ReductionOverNeighborExpr& operator=(ReductionOverNeighborExpr const& stmt); /// @} @@ -672,6 +674,7 @@ class ReductionOverNeighborExpr : public Expr { std::vector getNbhChain() const { return iterSpace_; }; ast::LocationType getLhsLocation() const { return iterSpace_.Chain.front(); }; const std::optional>>& getWeights() const { return weights_; }; + const std::vector& getOffsets() const { return offsets_; }; bool getIncludeCenter() const { return iterSpace_.IncludeCenter; }; ast::UnstructuredIterationSpace getIterSpace() const { return iterSpace_; } diff --git a/dawn/src/dawn/AST/proto/AST/statements.proto b/dawn/src/dawn/AST/proto/AST/statements.proto index 491b384df..e7c3e9054 100644 --- a/dawn/src/dawn/AST/proto/AST/statements.proto +++ b/dawn/src/dawn/AST/proto/AST/statements.proto @@ -48,9 +48,7 @@ message CartesianDimension { // It could also have a sparse dimension. In such case the sparse part is // non-empty. // @ingroup ast_proto -message UnstructuredDimension { - UnstructuredIterationSpace iter_space = 1; -} +message UnstructuredDimension { UnstructuredIterationSpace iter_space = 1; } // @brief Dimensions spanned by a field // @@ -323,9 +321,7 @@ message LoopDescriptorGeneral { // dummy message for future use } -message LoopDescriptorChain { - UnstructuredIterationSpace iter_space = 1; -} +message LoopDescriptorChain { UnstructuredIterationSpace iter_space = 1; } message LoopDescriptor { oneof desc { @@ -335,7 +331,7 @@ message LoopDescriptor { } message LoopStmt { - Stmt statements = 1; // List of statements (must be a BlockStmt) + Stmt statements = 1; // List of statements (must be a BlockStmt) LoopDescriptor loop_descriptor = 2; // Loop bounds description SourceLocation loc = 3; // Source location StmtData data = 4; // Generic Stmt's data container @@ -366,8 +362,7 @@ message ReturnStmt { // // @ingroup ast_proto message VarDeclStmtData { - google.protobuf.Int32Value accessID = - 1; // ID of the variable declared in the statement + google.protobuf.Int32Value accessID = 1; // ID of the variable declared in the statement } // @brief Declaration of a variable @@ -543,7 +538,7 @@ message FunCallExpr { // @endcode // @ingroup ast_proto message StencilFunCallExpr { - string callee = 1; // Identifier of the stencil function (i.e callee) + string callee = 1; // Identifier of the stencil function (i.e callee) repeated Expr arguments = 2; // List of arguments SourceLocation loc = 3; // Source location int32 ID = 4; // ID of the Expr @@ -601,18 +596,16 @@ message StencilFunArgExpr { // // @ingroup ast_proto message AccessExprData { - google.protobuf.Int32Value accessID = - 1; // Access ID of variable/literal/field accessed + google.protobuf.Int32Value accessID = 1; // Access ID of variable/literal/field accessed } // @brief Access to a variable // // @ingroup ast_proto message VarAccessExpr { - string name = 1; // Name of the variable - Expr index = 2; // Is it an array access (i.e var[2])? - bool is_external = - 3; // Is this an access to a external variable (e.g a global)? + string name = 1; // Name of the variable + Expr index = 2; // Is it an array access (i.e var[2])? + bool is_external = 3; // Is this an access to a external variable (e.g a global)? SourceLocation loc = 4; // Source location AccessExprData data = 5; // Access data int32 ID = 6; // ID of the Expr @@ -709,15 +702,15 @@ message LiteralAccessExpr { // // @ingroup ast_proto message ReductionOverNeighborExpr { - string op = 1; // Reduction operation - Expr rhs = 2; // Operation to be applied for each neighbor - Expr init = 3; // Initial value of reduction - repeated Expr weights = - 4; // weights (required to be of equal type, e.g. just floats) + string op = 1; // Reduction operation + Expr rhs = 2; // Operation to be applied for each neighbor + Expr init = 3; // Initial value of reduction + repeated Expr weights = 4; // weights (required to be of equal type, e.g. just floats) UnstructuredIterationSpace iter_space = 5; // Neighbor chain definining the neighbors to reduce from and the // location type to reduce to (first element) - SourceLocation loc = 6; + SourceLocation loc = 6; + repeated int32 offsets = 7; } // @brief Abstract syntax tree of the AST @@ -760,4 +753,3 @@ message GlobalVariableValue { message GlobalVariableMap { map map = 1; // Mape of global variables (name to value) } - diff --git a/dawn/src/dawn/CodeGen/CXXNaive-ico/ASTStencilBody.cpp b/dawn/src/dawn/CodeGen/CXXNaive-ico/ASTStencilBody.cpp index e60772d24..213ca6443 100644 --- a/dawn/src/dawn/CodeGen/CXXNaive-ico/ASTStencilBody.cpp +++ b/dawn/src/dawn/CodeGen/CXXNaive-ico/ASTStencilBody.cpp @@ -21,6 +21,8 @@ #include "dawn/IIR/StencilFunctionInstantiation.h" #include "dawn/SIR/SIR.h" #include "dawn/Support/Unreachable.h" +#include +#include static std::string nbhChainToVectorString(const std::vector& chain) { auto getLocationTypeString = [](dawn::ast::LocationType type) { @@ -228,6 +230,10 @@ std::string ASTStencilBody::makeIndexString(const std::shared_ptrfront()); + offsets_->pop_front(); + } return "deref(LibTag{}, " + sparseArgName_ + ")," + sparseIdx + ", " + kiterStr; } @@ -256,6 +262,10 @@ std::string ASTStencilBody::makeIndexString(const std::shared_ptrfront()); + offsets_->pop_front(); + } return "deref(LibTag{}, " + sparseArgName_ + ")," + sparseIdx; } @@ -408,10 +418,7 @@ void ASTStencilBody::visit(const std::shared_ptr reductionDepth_++; expr->getRhs()->accept(*this); reductionDepth_--; - if(reductionDepth_ == 0) { - parentIsReduction_ = false; - currentChain_.clear(); - } + // "pop" argName denseArgName_ = argName; if(!expr->isArithmetic()) { @@ -421,6 +428,9 @@ void ASTStencilBody::visit(const std::shared_ptr ss_ << ASTStencilBody::ReductionSparseIndexVarName(reductionDepth_) << "++;\n"; ss_ << "return lhs;\n"; ss_ << "}"; + if(!expr->getOffsets().empty()) { + offsets_ = std::deque(expr->getOffsets().begin(), expr->getOffsets().end()); + } if(hasWeights) { auto weights = expr->getWeights().value(); bool first = true; @@ -440,6 +450,12 @@ void ASTStencilBody::visit(const std::shared_ptr ss_ << ", /*include center*/ true"; } ss_ << ")"; + offsets_ = std::nullopt; + + if(reductionDepth_ == 0) { + parentIsReduction_ = false; + currentChain_.clear(); + } } void ASTStencilBody::setCurrentStencilFunction( diff --git a/dawn/src/dawn/CodeGen/CXXNaive-ico/ASTStencilBody.h b/dawn/src/dawn/CodeGen/CXXNaive-ico/ASTStencilBody.h index 430931156..924844570 100644 --- a/dawn/src/dawn/CodeGen/CXXNaive-ico/ASTStencilBody.h +++ b/dawn/src/dawn/CodeGen/CXXNaive-ico/ASTStencilBody.h @@ -62,6 +62,7 @@ class ASTStencilBody : public ASTCodeGenCXX { bool parentIsReduction_ = false; bool parentIsForLoop_ = false; std::vector currentChain_; + std::optional> offsets_; size_t reductionDepth_ = 0; diff --git a/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp b/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp index 67cb2ab7e..d1e47791b 100644 --- a/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp +++ b/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp @@ -19,6 +19,7 @@ #include "dawn/IIR/ASTExpr.h" #include #include +#include #include #include #include @@ -133,7 +134,7 @@ void ASTStencilBody::visit(const std::shared_ptr& expr) { } std::string ASTStencilBody::makeIndexString(const std::shared_ptr& expr, - std::string kiterStr) const { + std::string kiterStr) { bool isVertical = metadata_.getFieldDimensions(iir::getAccessID(expr)).isVertical(); if(isVertical) { return kiterStr; @@ -164,7 +165,12 @@ std::string ASTStencilBody::makeIndexString(const std::shared_ptrfront()); + offsets_->pop_front(); + } + return nbhIter + " * kSize * " + denseSize + " + " + kiterStr + "*" + denseSize + " + " + pidxStr(); } @@ -182,8 +188,13 @@ std::string ASTStencilBody::makeIndexString(const std::shared_ptrfront()); + offsets_->pop_front(); + } std::string sparseSize = chainToSparseSizeString(unstrDims.getIterSpace()); - return nbhIterStr() + " * " + denseSize + " + " + pidxStr(); + return nbhIter + " * " + denseSize + " + " + pidxStr(); } DAWN_ASSERT_MSG(false, "Bad Field configuration found in code gen!"); @@ -311,6 +322,9 @@ void ASTStencilBody::evalNeighbourReduction( expr->getInit()->accept(*this); ss_ << ";\n"; auto weights = expr->getWeights(); + if(!expr->getOffsets().empty()) { + offsets_ = std::deque(expr->getOffsets().begin(), expr->getOffsets().end()); + } if(weights.has_value()) { ss_ << "::dawn::float_type " << weights_name << "[" << weights->size() << "] = {"; bool first = true; @@ -322,6 +336,7 @@ void ASTStencilBody::evalNeighbourReduction( first = false; } ss_ << "};\n"; + offsets_ = std::nullopt; } ss_ << "for (int " + nbhIterStr() + " = 0; " + nbhIterStr() + " < " diff --git a/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.h b/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.h index 447fb3575..311432b67 100644 --- a/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.h +++ b/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.h @@ -62,6 +62,7 @@ class ASTStencilBody : public ASTCodeGenCXX { bool parentIsReduction_ = false; bool parentIsForLoop_ = false; + std::optional> offsets_; bool genAtlasCompatCode_ = false; std::map> reductionParser_; @@ -70,8 +71,7 @@ class ASTStencilBody : public ASTCodeGenCXX { /// Nesting level of argument lists of stencil function *calls* int nestingOfStencilFunArgLists_; - std::string makeIndexString(const std::shared_ptr& expr, - std::string kiter) const; + std::string makeIndexString(const std::shared_ptr& expr, std::string kiter); bool hasIrregularPentagons(const std::vector& chain) const; void evalNeighbourReduction(const std::shared_ptr& expr); void generateNeighbourRedLoop(std::stringstream& ss) const; diff --git a/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp b/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp index 839ceed1e..de30b3866 100644 --- a/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp +++ b/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp @@ -24,12 +24,14 @@ #include "dawn/CodeGen/Cuda/CodeGeneratorHelper.h" #include "dawn/CodeGen/F90Util.h" #include "dawn/CodeGen/IcoChainSizes.h" +#include "dawn/IIR/ASTExpr.h" #include "dawn/IIR/Field.h" #include "dawn/IIR/Interval.h" #include "dawn/IIR/LoopOrder.h" #include "dawn/IIR/MultiStage.h" #include "dawn/IIR/Stage.h" #include "dawn/IIR/Stencil.h" +#include "dawn/IIR/StencilMetaInformation.h" #include "dawn/SIR/SIR.h" #include "dawn/Support/Assert.h" #include "dawn/Support/Exception.h" @@ -184,6 +186,11 @@ class CollectIterationSpaces : public ast::ASTVisitorForwardingNonConst { for(auto c : expr->getChildren()) { c->accept(*this); } + if(expr->getWeights().has_value()) { + for(auto w : expr->getWeights().value()) { + w->accept(*this); + } + } } void visit(const std::shared_ptr& stmt) override { @@ -194,12 +201,26 @@ class CollectIterationSpaces : public ast::ASTVisitorForwardingNonConst { } } + void visit(const std::shared_ptr& expr) override { + if(metadata_.getFieldDimensions(iir::getAccessID(expr)).isVertical()) { + return; + } + auto unstrDim = ast::dimension_cast( + metadata_.getFieldDimensions(iir::getAccessID(expr)).getHorizontalFieldDimension()); + if(unstrDim.isSparse()) { + spaces_.insert(unstrDim.getIterSpace()); + } + } + const std::unordered_set& getSpaces() const { return spaces_; } + CollectIterationSpaces(const iir::StencilMetaInformation& metadata) : metadata_(metadata) {} + private: std::unordered_set spaces_; + const iir::StencilMetaInformation& metadata_; }; void CudaIcoCodeGen::generateGpuMesh( @@ -216,7 +237,7 @@ void CudaIcoCodeGen::generateGpuMesh( gpuMeshClass.addMember("dawn::unstructured_domain", "DomainLower"); gpuMeshClass.addMember("dawn::unstructured_domain", "DomainUpper"); - CollectIterationSpaces spaceCollector; + CollectIterationSpaces spaceCollector(stencilInstantiation->getMetaData()); std::unordered_set spaces; for(const auto& doMethod : iterateIIROver(*(stencilInstantiation->getIIR()))) { doMethod->getAST().accept(spaceCollector); @@ -368,7 +389,7 @@ void CudaIcoCodeGen::generateRunFun( kernelCall << kName; // which nbh tables need to be passed / which templates need to be defined? - CollectIterationSpaces chainStringCollector; + CollectIterationSpaces chainStringCollector(stencilInstantiation->getMetaData()); for(const auto& doMethod : stage->getChildren()) { doMethod->getAST().accept(chainStringCollector); } @@ -761,7 +782,7 @@ void CudaIcoCodeGen::generateAllAPIRunFunctions( const auto& stencils = stencilInstantiation->getStencils(); DAWN_ASSERT_MSG(stencils.size() <= 1, "code generation only for at most one stencil!\n"); - CollectIterationSpaces chainCollector; + CollectIterationSpaces chainCollector(stencilInstantiation->getMetaData()); std::set> chains; for(const auto& doMethod : iterateIIROver(*(stencilInstantiation->getIIR()))) { doMethod->getAST().accept(chainCollector); @@ -1233,7 +1254,7 @@ void CudaIcoCodeGen::generateAllCudaKernels( //-------------------------------------- // which nbh tables / size templates need to be passed? - CollectIterationSpaces chainStringCollector; + CollectIterationSpaces chainStringCollector(stencilInstantiation->getMetaData()); for(const auto& doMethod : stage->getChildren()) { doMethod->getAST().accept(chainStringCollector); } @@ -1376,7 +1397,7 @@ std::string CudaIcoCodeGen::generateStencilInstantiation( generateAllCudaKernels(ssSW, stencilInstantiation); - CollectIterationSpaces spaceCollector; + CollectIterationSpaces spaceCollector(stencilInstantiation->getMetaData()); std::unordered_set spaces; for(const auto& doMethod : iterateIIROver(*(stencilInstantiation->getIIR()))) { doMethod->getAST().accept(spaceCollector); diff --git a/dawn/src/dawn/Serialization/ASTSerializer.cpp b/dawn/src/dawn/Serialization/ASTSerializer.cpp index e9d29f5b2..fea602dfd 100644 --- a/dawn/src/dawn/Serialization/ASTSerializer.cpp +++ b/dawn/src/dawn/Serialization/ASTSerializer.cpp @@ -741,6 +741,10 @@ void ProtoStmtBuilder::visit(const std::shared_ptr& e currentExprProto_.pop(); } } + + for(int offset : expr->getOffsets()) { + protoExpr->add_offsets(offset); + } } void setAST(proto::ast::AST* astProto, const AST* ast) { @@ -1034,11 +1038,16 @@ std::shared_ptr makeExpr(const proto::ast::Expr& expressionProto, chain.push_back(getLocationTypeFromProtoLocationType(exprProto.iter_space().chain(i))); } + std::vector offsets; + for(int i = 0; i < exprProto.offsets_size(); ++i) { + offsets.push_back(exprProto.offsets(i)); + } + if(weights.empty()) { auto expr = std::make_shared( exprProto.op(), makeExpr(exprProto.rhs(), dataType, maxID), makeExpr(exprProto.init(), dataType, maxID), chain, - exprProto.iter_space().include_center(), makeLocation(exprProto)); + exprProto.iter_space().include_center(), offsets, makeLocation(exprProto)); return expr; } else { std::vector> deserializedWeights; @@ -1048,7 +1057,7 @@ std::shared_ptr makeExpr(const proto::ast::Expr& expressionProto, auto expr = std::make_shared( exprProto.op(), makeExpr(exprProto.rhs(), dataType, maxID), makeExpr(exprProto.init(), dataType, maxID), deserializedWeights, chain, - exprProto.iter_space().include_center(), makeLocation(exprProto)); + exprProto.iter_space().include_center(), offsets, makeLocation(exprProto)); return expr; } } diff --git a/dawn/src/dawn/Serialization/SIRSerializer.cpp b/dawn/src/dawn/Serialization/SIRSerializer.cpp index 912b537c9..024630c1c 100644 --- a/dawn/src/dawn/Serialization/SIRSerializer.cpp +++ b/dawn/src/dawn/Serialization/SIRSerializer.cpp @@ -13,10 +13,10 @@ //===------------------------------------------------------------------------------------------===// #include "dawn/Serialization/SIRSerializer.h" +#include "dawn/AST/AST/statements.pb.h" #include "dawn/SIR/AST.h" #include "dawn/SIR/SIR.h" #include "dawn/SIR/SIR/SIR.pb.h" -#include "dawn/AST/AST/statements.pb.h" #include "dawn/Serialization/ASTSerializer.h" #include "dawn/Support/Exception.h" #include "dawn/Support/Format.h" @@ -276,8 +276,7 @@ static std::shared_ptr makeField(const dawn::proto::ast::Field& fiel return field; } -static BuiltinTypeID -makeBuiltinTypeID(const dawn::proto::ast::BuiltinType& builtinTypeProto) { +static BuiltinTypeID makeBuiltinTypeID(const dawn::proto::ast::BuiltinType& builtinTypeProto) { switch(builtinTypeProto.type_id()) { case dawn::proto::ast::BuiltinType_TypeID_Invalid: return BuiltinTypeID::Invalid; @@ -343,10 +342,9 @@ makeVerticalRegion(const dawn::proto::ast::VerticalRegion& verticalRegionProto) auto interval = makeInterval(verticalRegionProto.interval()); // VerticalRegion.LoopOrder - auto loopOrder = - verticalRegionProto.loop_order() == dawn::proto::ast::VerticalRegion::Backward - ? sir::VerticalRegion::LoopOrderKind::Backward - : sir::VerticalRegion::LoopOrderKind::Forward; + auto loopOrder = verticalRegionProto.loop_order() == dawn::proto::ast::VerticalRegion::Backward + ? sir::VerticalRegion::LoopOrderKind::Backward + : sir::VerticalRegion::LoopOrderKind::Forward; auto verticalRegion = std::make_shared(ast, interval, loopOrder, loc); @@ -530,14 +528,19 @@ static std::shared_ptr makeExpr(const dawn::proto::ast::Expr& express chain.push_back(getLocationTypeFromProtoLocationType(exprProto.iter_space().chain(i))); } + std::vector offsets; + for(int i = 0; i < exprProto.offsets_size(); ++i) { + offsets.push_back(exprProto.offsets(i)); + } + if(weights.size() > 0) { return std::make_shared( exprProto.op(), makeExpr(exprProto.rhs()), makeExpr(exprProto.init()), weights, chain, - exprProto.iter_space().include_center(), makeLocation(exprProto)); + exprProto.iter_space().include_center(), offsets, makeLocation(exprProto)); } else { return std::make_shared( exprProto.op(), makeExpr(exprProto.rhs()), makeExpr(exprProto.init()), chain, - exprProto.iter_space().include_center(), makeLocation(exprProto)); + exprProto.iter_space().include_center(), offsets, makeLocation(exprProto)); } } case dawn::proto::ast::Expr::EXPR_NOT_SET: @@ -726,8 +729,7 @@ static std::shared_ptr deserializeImpl(const std::string& str, SIRSerialize stencilFunction->Loc = makeLocation(stencilFunctionProto); // StencilFunction.Args - for(const dawn::proto::ast::StencilFunctionArg& sirArg : - stencilFunctionProto.arguments()) { + for(const dawn::proto::ast::StencilFunctionArg& sirArg : stencilFunctionProto.arguments()) { switch(sirArg.Arg_case()) { case dawn::proto::ast::StencilFunctionArg::kFieldValue: stencilFunction->Args.emplace_back(makeField(sirArg.field_value())); @@ -764,20 +766,24 @@ static std::shared_ptr deserializeImpl(const std::string& str, SIRSerialize switch(sirValue.Value_case()) { case proto::ast::GlobalVariableValue::kBooleanValue: - value = std::make_shared(static_cast(sirValue.boolean_value()), isConstExpr); + value = + std::make_shared(static_cast(sirValue.boolean_value()), isConstExpr); break; case proto::ast::GlobalVariableValue::kIntegerValue: - value = std::make_shared(static_cast(sirValue.integer_value()), isConstExpr); + value = + std::make_shared(static_cast(sirValue.integer_value()), isConstExpr); break; case proto::ast::GlobalVariableValue::kFloatValue: - value = std::make_shared(static_cast(sirValue.float_value()), isConstExpr); + value = + std::make_shared(static_cast(sirValue.float_value()), isConstExpr); break; case proto::ast::GlobalVariableValue::kDoubleValue: - value = std::make_shared(static_cast(sirValue.double_value()), isConstExpr); + value = std::make_shared(static_cast(sirValue.double_value()), + isConstExpr); break; case proto::ast::GlobalVariableValue::kStringValue: value = std::make_shared(static_cast(sirValue.string_value()), - isConstExpr); + isConstExpr); break; case proto::ast::GlobalVariableValue::VALUE_NOT_SET: default: diff --git a/dawn/src/dawn/Unittest/IIRBuilder.h b/dawn/src/dawn/Unittest/IIRBuilder.h index 729a25ca3..bbc443f80 100644 --- a/dawn/src/dawn/Unittest/IIRBuilder.h +++ b/dawn/src/dawn/Unittest/IIRBuilder.h @@ -327,8 +327,8 @@ class IIRBuilder { std::unique_ptr stencil(MultiStages&&... multistages) { DAWN_ASSERT(si_); auto ret = std::make_unique(si_->getMetaData(), ast::Attr{}, si_->nextUID()); - int x[] = {(ret->insertChild(std::forward(multistages)), 0)...}; - (void)x; + // int x[] = {(ret->insertChild(std::forward(multistages)), 0)...}; + (ret->insertChild(std::forward(multistages)), ...); return ret; } diff --git a/dawn/src/dawn/Validator/UnstructuredDimensionChecker.cpp b/dawn/src/dawn/Validator/UnstructuredDimensionChecker.cpp index a97d5114e..d5cf773a0 100644 --- a/dawn/src/dawn/Validator/UnstructuredDimensionChecker.cpp +++ b/dawn/src/dawn/Validator/UnstructuredDimensionChecker.cpp @@ -493,7 +493,7 @@ void UnstructuredDimensionChecker::UnstructuredDimensionCheckerImpl::visit( } // check weighs for consistency w.r.t dimensions - if(reductionExpr->getWeights().has_value()) { + if(reductionExpr->getWeights().has_value() && reductionExpr->getOffsets().empty()) { // check weights one by one UnstructuredDimensionChecker::UnstructuredDimensionCheckerImpl weightChecker( nameToDimensions_, idToNameMap_, idToLocalVariableData_, config_); diff --git a/dawn/src/dawn/Validator/WeightChecker.cpp b/dawn/src/dawn/Validator/WeightChecker.cpp index 9e4a0f46f..3bfd800c1 100644 --- a/dawn/src/dawn/Validator/WeightChecker.cpp +++ b/dawn/src/dawn/Validator/WeightChecker.cpp @@ -42,7 +42,8 @@ void WeightChecker::WeightCheckerImpl::visit( DAWN_ASSERT(nameToDimensions_.count(fieldName)); weightsValid_ = ast::dimension_cast( nameToDimensions_.at(fieldName).getHorizontalFieldDimension()) - .isDense(); + .isDense() || + parentAllowsSparse_; } } void WeightChecker::WeightCheckerImpl::visit( @@ -74,6 +75,7 @@ void WeightChecker::WeightCheckerImpl::visit( void WeightChecker::WeightCheckerImpl::visit( const std::shared_ptr& expr) { + parentAllowsSparse_ = !expr->getOffsets().empty(); if(expr->getWeights().has_value()) { parentIsWeight_ = true; for(const auto& weight : *expr->getWeights()) { @@ -84,6 +86,7 @@ void WeightChecker::WeightCheckerImpl::visit( } parentIsWeight_ = false; } + parentAllowsSparse_ = false; expr->getRhs()->accept(*this); } diff --git a/dawn/src/dawn/Validator/WeightChecker.h b/dawn/src/dawn/Validator/WeightChecker.h index bf8a7ec67..b0079c1df 100644 --- a/dawn/src/dawn/Validator/WeightChecker.h +++ b/dawn/src/dawn/Validator/WeightChecker.h @@ -42,6 +42,7 @@ class WeightChecker { private: bool weightsValid_ = true; bool parentIsWeight_ = false; + bool parentAllowsSparse_ = false; const std::unordered_map nameToDimensions_; const std::unordered_map idToNameMap_; std::stack> diff --git a/dawn/src/dawn4py/serialization/utils.py b/dawn/src/dawn4py/serialization/utils.py index f4bb35ca1..deddab6ad 100644 --- a/dawn/src/dawn4py/serialization/utils.py +++ b/dawn/src/dawn4py/serialization/utils.py @@ -820,7 +820,8 @@ def make_reduction_over_neighbor_expr( init: ExprType, chain: List[LocationTypeValue], weights: List[ExprType] = None, - include_center: bool = False + include_center: bool = False, + offsets: List[int] = None ) -> ReductionOverNeighborExpr: """ Create a ReductionOverNeighborExpr @@ -839,6 +840,8 @@ def make_reduction_over_neighbor_expr( iterSpace.chain.extend(chain) if weights is not None and len(weights) != 0: expr.weights.extend([make_expr(weight) for weight in weights]) + if offsets is not None and len(offsets) != 0: + expr.offsets.extend([offset for offset in offsets]) iterSpace.include_center = include_center expr.iter_space.CopyFrom(iterSpace) return expr diff --git a/dawn/test/integration-test/dawn4py-tests/CMakeLists.txt b/dawn/test/integration-test/dawn4py-tests/CMakeLists.txt index 480e301a8..ce8993a42 100644 --- a/dawn/test/integration-test/dawn4py-tests/CMakeLists.txt +++ b/dawn/test/integration-test/dawn4py-tests/CMakeLists.txt @@ -56,3 +56,5 @@ add_python_example(NAME vertical_indirection) add_python_example(NAME global_var) add_python_example(NAME global_var_unstructured) add_python_example(NAME ICON_gradient) +add_python_example(NAME offset_reduction_cpp VERIFY) +add_python_example(NAME offset_reduction_cuda VERIFY) diff --git a/dawn/test/integration-test/dawn4py-tests/data/offset_reduction_cpp_ref.cpp b/dawn/test/integration-test/dawn4py-tests/data/offset_reduction_cpp_ref.cpp new file mode 100644 index 000000000..d9e65e6d2 --- /dev/null +++ b/dawn/test/integration-test/dawn4py-tests/data/offset_reduction_cpp_ref.cpp @@ -0,0 +1,114 @@ +#define DAWN_GENERATED 1 +#undef DAWN_BACKEND_T +#define DAWN_BACKEND_T CXXNAIVEICO +#define GRIDTOOLS_DAWN_NO_INCLUDE +#include +#include +#include + +namespace dawn_generated { +namespace cxxnaiveico { +template +class offset_reduction_cpp { +private: + struct stencil_34 { + ::dawn::mesh_t const& m_mesh; + int m_k_size; + ::dawn::edge_field_t& m_out_vn_e; + ::dawn::sparse_edge_field_t& m_raw_diam_coeff; + ::dawn::edge_field_t& m_prism_thick_e; + ::dawn::sparse_edge_field_t& m_e2c_aux; + ::dawn::sparse_edge_field_t& m_e2c_aux_h; + ::dawn::unstructured_domain m_unstructured_domain; + + public: + stencil_34(::dawn::mesh_t const& mesh, int k_size, + ::dawn::edge_field_t& out_vn_e, + ::dawn::sparse_edge_field_t& raw_diam_coeff, + ::dawn::edge_field_t& prism_thick_e, + ::dawn::sparse_edge_field_t& e2c_aux, + ::dawn::sparse_edge_field_t& e2c_aux_h) + : m_mesh(mesh), m_k_size(k_size), m_out_vn_e(out_vn_e), m_raw_diam_coeff(raw_diam_coeff), + m_prism_thick_e(prism_thick_e), m_e2c_aux(e2c_aux), m_e2c_aux_h(e2c_aux_h) {} + + ~stencil_34() {} + + void sync_storages() {} + static constexpr ::dawn::driver::unstructured_extent out_vn_e_extent = {false, 0, 0}; + static constexpr ::dawn::driver::unstructured_extent raw_diam_coeff_extent = {true, 0, 0}; + static constexpr ::dawn::driver::unstructured_extent prism_thick_e_extent = {true, 0, 0}; + static constexpr ::dawn::driver::unstructured_extent e2c_aux_extent = {true, 0, 0}; + static constexpr ::dawn::driver::unstructured_extent e2c_aux_h_extent = {true, 0, 0}; + + void run() { + using ::dawn::deref; + { + for(int k = 0 + 0; k <= (m_k_size == 0 ? 0 : (m_k_size)) + 0 - 1 + 0; ++k) { + for(auto const& loc : getEdges(LibTag{}, m_mesh)) { + m_out_vn_e(deref(LibTag{}, loc), (k + 0)) = reduce( + LibTag{}, m_mesh, loc, (::dawn::float_type).0, + std::vector<::dawn::LocationType>{::dawn::LocationType::Edges, + ::dawn::LocationType::Cells, + ::dawn::LocationType::Edges}, + [&, sparse_dimension_idx0 = int(0)](auto& lhs, auto red_loc1, + auto const& weight) mutable { + lhs += weight * + (m_raw_diam_coeff(deref(LibTag{}, loc), sparse_dimension_idx0, (k + 0)) * + m_prism_thick_e(deref(LibTag{}, red_loc1), (k + 0))); + sparse_dimension_idx0++; + return lhs; + }, + std::vector<::dawn::float_type>({m_e2c_aux(deref(LibTag{}, loc), 0, (k + 0)), + m_e2c_aux(deref(LibTag{}, loc), 0, (k + 0)), + m_e2c_aux(deref(LibTag{}, loc), 1, (k + 0)), + m_e2c_aux(deref(LibTag{}, loc), 1, (k + 0))})); + m_out_vn_e(deref(LibTag{}, loc), (k + 0)) = reduce( + LibTag{}, m_mesh, loc, (::dawn::float_type).0, + std::vector<::dawn::LocationType>{::dawn::LocationType::Edges, + ::dawn::LocationType::Cells, + ::dawn::LocationType::Edges}, + [&, sparse_dimension_idx0 = int(0)](auto& lhs, auto red_loc1, + auto const& weight) mutable { + lhs += weight * + (m_raw_diam_coeff(deref(LibTag{}, loc), sparse_dimension_idx0, (k + 0)) * + m_prism_thick_e(deref(LibTag{}, red_loc1), (k + 0))); + sparse_dimension_idx0++; + return lhs; + }, + std::vector<::dawn::float_type>( + {m_e2c_aux_h(deref(LibTag{}, loc), 0), m_e2c_aux_h(deref(LibTag{}, loc), 0), + m_e2c_aux_h(deref(LibTag{}, loc), 1), m_e2c_aux_h(deref(LibTag{}, loc), 1)})); + } + } + } + sync_storages(); + } + }; + static constexpr const char* s_name = "offset_reduction_cpp"; + stencil_34 m_stencil_34; + +public: + offset_reduction_cpp(const offset_reduction_cpp&) = delete; + + // Members + + void set_splitter_index(::dawn::LocationType loc, ::dawn::UnstructuredSubdomain subdomain, + int offset, int index) { + m_stencil_34.m_unstructured_domain.set_splitter_index({loc, subdomain, offset}, index); + } + + offset_reduction_cpp(const ::dawn::mesh_t& mesh, int k_size, + ::dawn::edge_field_t& out_vn_e, + ::dawn::sparse_edge_field_t& raw_diam_coeff, + ::dawn::edge_field_t& prism_thick_e, + ::dawn::sparse_edge_field_t& e2c_aux, + ::dawn::sparse_edge_field_t& e2c_aux_h) + : m_stencil_34(mesh, k_size, out_vn_e, raw_diam_coeff, prism_thick_e, e2c_aux, e2c_aux_h) {} + + void run() { + m_stencil_34.run(); + ; + } +}; +} // namespace cxxnaiveico +} // namespace dawn_generated diff --git a/dawn/test/integration-test/dawn4py-tests/data/offset_reduction_cuda_ref.cpp b/dawn/test/integration-test/dawn4py-tests/data/offset_reduction_cuda_ref.cpp new file mode 100644 index 000000000..c4336d124 --- /dev/null +++ b/dawn/test/integration-test/dawn4py-tests/data/offset_reduction_cuda_ref.cpp @@ -0,0 +1,286 @@ +#include "driver-includes/unstructured_interface.hpp" +#include "driver-includes/unstructured_domain.hpp" +#include "driver-includes/defs.hpp" +#include "driver-includes/cuda_utils.hpp" +#include "driver-includes/cuda_verify.hpp" +#include "driver-includes/to_vtk.h" +#define GRIDTOOLS_DAWN_NO_INCLUDE +#include "driver-includes/math.hpp" +#include +#define BLOCK_SIZE 128 +#define LEVELS_PER_THREAD 1 +using namespace gridtools::dawn; + +namespace dawn_generated { +namespace cuda_ico { +template +__global__ void offset_reduction_cuda_stencil34_ms47_s52_kernel( + int EdgeStride, int kSize, int hSize, const int* ecTable, const int* eceTable, + ::dawn::float_type* __restrict__ out_vn_e, + const ::dawn::float_type* __restrict__ raw_diam_coeff, + const ::dawn::float_type* __restrict__ prism_thick_e, + const ::dawn::float_type* __restrict__ e2c_aux, + const ::dawn::float_type* __restrict__ e2c_aux_h) { + unsigned int pidx = blockIdx.x * blockDim.x + threadIdx.x; + int klo = 0; + int khi = kSize + 0; + if(pidx >= hSize) { + return; + } + for(int kIter = klo; kIter < khi; kIter++) { + if(kIter >= kSize + 0) { + return; + } + ::dawn::float_type lhs_36 = (::dawn::float_type).0; + ::dawn::float_type weights_36[4] = { + e2c_aux[0 * kSize * EdgeStride + (kIter + 0) * EdgeStride + pidx], + e2c_aux[0 * kSize * EdgeStride + (kIter + 0) * EdgeStride + pidx], + e2c_aux[1 * kSize * EdgeStride + (kIter + 0) * EdgeStride + pidx], + e2c_aux[1 * kSize * EdgeStride + (kIter + 0) * EdgeStride + pidx]}; + for(int nbhIter0 = 0; nbhIter0 < E_C_E_SIZE; nbhIter0++) { + int nbhIdx0 = eceTable[pidx * E_C_E_SIZE + nbhIter0]; + lhs_36 += weights_36[nbhIter0] * + (raw_diam_coeff[nbhIter0 * kSize * EdgeStride + (kIter + 0) * EdgeStride + pidx] * + prism_thick_e[(kIter + 0) * EdgeStride + nbhIdx0]); + } + out_vn_e[(kIter + 0) * EdgeStride + pidx] = lhs_36; + ::dawn::float_type lhs_40 = (::dawn::float_type).0; + ::dawn::float_type weights_40[4] = { + e2c_aux_h[0 * EdgeStride + pidx], e2c_aux_h[0 * EdgeStride + pidx], + e2c_aux_h[1 * EdgeStride + pidx], e2c_aux_h[1 * EdgeStride + pidx]}; + for(int nbhIter0 = 0; nbhIter0 < E_C_E_SIZE; nbhIter0++) { + int nbhIdx0 = eceTable[pidx * E_C_E_SIZE + nbhIter0]; + lhs_40 += weights_40[nbhIter0] * + (raw_diam_coeff[nbhIter0 * kSize * EdgeStride + (kIter + 0) * EdgeStride + pidx] * + prism_thick_e[(kIter + 0) * EdgeStride + nbhIdx0]); + } + out_vn_e[(kIter + 0) * EdgeStride + pidx] = lhs_40; + } +} + +class offset_reduction_cuda { +public: + static const int E_C_E_SIZE = 4; + static const int E_C_SIZE = 2; + + struct GpuTriMesh { + int NumVertices; + int NumEdges; + int NumCells; + int VertexStride; + int EdgeStride; + int CellStride; + dawn::unstructured_domain DomainLower; + dawn::unstructured_domain DomainUpper; + int* eceTable; + int* ecTable; + + GpuTriMesh() {} + + GpuTriMesh(const dawn::GlobalGpuTriMesh* mesh) { + NumVertices = mesh->NumVertices; + NumCells = mesh->NumCells; + NumEdges = mesh->NumEdges; + VertexStride = mesh->VertexStride; + CellStride = mesh->CellStride; + EdgeStride = mesh->EdgeStride; + DomainLower = mesh->DomainLower; + DomainUpper = mesh->DomainUpper; + eceTable = mesh->NeighborTables.at(std::tuple, bool>{ + {dawn::LocationType::Edges, dawn::LocationType::Cells, dawn::LocationType::Edges}, 0}); + ecTable = mesh->NeighborTables.at(std::tuple, bool>{ + {dawn::LocationType::Edges, dawn::LocationType::Cells}, 0}); + } + }; + + struct stencil_34 { + private: + ::dawn::float_type* out_vn_e_; + ::dawn::float_type* raw_diam_coeff_; + ::dawn::float_type* prism_thick_e_; + ::dawn::float_type* e2c_aux_; + ::dawn::float_type* e2c_aux_h_; + static int kSize_; + static GpuTriMesh mesh_; + static bool is_setup_; + static cudaStream_t stream_; + + public: + static const GpuTriMesh& getMesh() { return mesh_; } + + static int getKSize() { return kSize_; } + + static void free() {} + + static void setup(const dawn::GlobalGpuTriMesh* mesh, int kSize, cudaStream_t stream) { + mesh_ = GpuTriMesh(mesh); + kSize_ = kSize; + is_setup_ = true; + stream_ = stream; + } + + dim3 grid(int kSize, int elSize, bool kparallel) { + if(kparallel) { + int dK = (kSize + LEVELS_PER_THREAD - 1) / LEVELS_PER_THREAD; + return dim3((elSize + BLOCK_SIZE - 1) / BLOCK_SIZE, dK, 1); + } else { + return dim3((elSize + BLOCK_SIZE - 1) / BLOCK_SIZE, 1, 1); + } + } + + stencil_34() {} + + void run() { + if(!is_setup_) { + printf( + "offset_reduction_cuda has not been set up! make sure setup() is called before run!\n"); + return; + } + dim3 dB(BLOCK_SIZE, 1, 1); + int hsize52 = mesh_.NumEdges; + if(hsize52 == 0) { + return; + } + dim3 dG52 = grid(kSize_ + 0 - 0, hsize52, false); + offset_reduction_cuda_stencil34_ms47_s52_kernel + <<>>(mesh_.EdgeStride, kSize_, hsize52, mesh_.ecTable, + mesh_.eceTable, out_vn_e_, raw_diam_coeff_, prism_thick_e_, + e2c_aux_, e2c_aux_h_); +#ifndef NDEBUG + + gpuErrchk(cudaPeekAtLastError()); + gpuErrchk(cudaDeviceSynchronize()); +#endif + } + + void CopyResultToHost(::dawn::float_type* out_vn_e, bool do_reshape) { + if(do_reshape) { + ::dawn::float_type* host_buf = new ::dawn::float_type[(mesh_.EdgeStride) * kSize_]; + gpuErrchk(cudaMemcpy((::dawn::float_type*)host_buf, out_vn_e_, + (mesh_.EdgeStride) * kSize_ * sizeof(::dawn::float_type), + cudaMemcpyDeviceToHost)); + dawn::reshape_back(host_buf, out_vn_e, kSize_, mesh_.EdgeStride); + delete[] host_buf; + } else { + gpuErrchk(cudaMemcpy(out_vn_e, out_vn_e_, + (mesh_.EdgeStride) * kSize_ * sizeof(::dawn::float_type), + cudaMemcpyDeviceToHost)); + } + } + + void copy_memory(::dawn::float_type* out_vn_e, ::dawn::float_type* raw_diam_coeff, + ::dawn::float_type* prism_thick_e, ::dawn::float_type* e2c_aux, + ::dawn::float_type* e2c_aux_h, bool do_reshape) { + dawn::initField(out_vn_e, &out_vn_e_, mesh_.EdgeStride, kSize_, do_reshape); + dawn::initSparseField(raw_diam_coeff, &raw_diam_coeff_, mesh_.EdgeStride, E_C_E_SIZE, kSize_, + do_reshape); + dawn::initField(prism_thick_e, &prism_thick_e_, mesh_.EdgeStride, kSize_, do_reshape); + dawn::initSparseField(e2c_aux, &e2c_aux_, mesh_.EdgeStride, E_C_SIZE, kSize_, do_reshape); + dawn::initSparseField(e2c_aux_h, &e2c_aux_h_, mesh_.EdgeStride, E_C_SIZE, 1, do_reshape); + } + + void copy_pointers(::dawn::float_type* out_vn_e, ::dawn::float_type* raw_diam_coeff, + ::dawn::float_type* prism_thick_e, ::dawn::float_type* e2c_aux, + ::dawn::float_type* e2c_aux_h) { + out_vn_e_ = out_vn_e; + raw_diam_coeff_ = raw_diam_coeff; + prism_thick_e_ = prism_thick_e; + e2c_aux_ = e2c_aux; + e2c_aux_h_ = e2c_aux_h; + } + }; +}; +} // namespace cuda_ico +} // namespace dawn_generated +extern "C" { +void run_offset_reduction_cuda_from_c_host(dawn::GlobalGpuTriMesh* mesh, int k_size, + ::dawn::float_type* out_vn_e, + ::dawn::float_type* raw_diam_coeff, + ::dawn::float_type* prism_thick_e, + ::dawn::float_type* e2c_aux, + ::dawn::float_type* e2c_aux_h) { + dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34 s; + dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34::setup(mesh, k_size, 0); + s.copy_memory(out_vn_e, raw_diam_coeff, prism_thick_e, e2c_aux, e2c_aux_h, true); + s.run(); + s.CopyResultToHost(out_vn_e, true); + dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34::free(); + return; +} +void run_offset_reduction_cuda_from_fort_host(dawn::GlobalGpuTriMesh* mesh, int k_size, + ::dawn::float_type* out_vn_e, + ::dawn::float_type* raw_diam_coeff, + ::dawn::float_type* prism_thick_e, + ::dawn::float_type* e2c_aux, + ::dawn::float_type* e2c_aux_h) { + dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34 s; + dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34::setup(mesh, k_size, 0); + s.copy_memory(out_vn_e, raw_diam_coeff, prism_thick_e, e2c_aux, e2c_aux_h, false); + s.run(); + s.CopyResultToHost(out_vn_e, false); + dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34::free(); + return; +} +void run_offset_reduction_cuda(::dawn::float_type* out_vn_e, ::dawn::float_type* raw_diam_coeff, + ::dawn::float_type* prism_thick_e, ::dawn::float_type* e2c_aux, + ::dawn::float_type* e2c_aux_h) { + dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34 s; + s.copy_pointers(out_vn_e, raw_diam_coeff, prism_thick_e, e2c_aux, e2c_aux_h); + s.run(); + return; +} +bool verify_offset_reduction_cuda(const ::dawn::float_type* out_vn_e_dsl, + const ::dawn::float_type* out_vn_e, const double out_vn_e_rel_tol, + const double out_vn_e_abs_tol, const int iteration) { + using namespace std::chrono; + const auto& mesh = dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34::getMesh(); + int kSize = dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34::getKSize(); + high_resolution_clock::time_point t_start = high_resolution_clock::now(); + bool isValid; + isValid = ::dawn::verify_field((mesh.EdgeStride) * kSize, out_vn_e_dsl, out_vn_e, "out_vn_e", + out_vn_e_rel_tol, out_vn_e_abs_tol); + if(!isValid) { +#ifdef __SERIALIZE_ON_ERROR + serialize_dense_edges(0, (mesh.NumEdges - 1), kSize, (mesh.EdgeStride), out_vn_e, + "offset_reduction_cuda", "out_vn_e", iteration); + serialize_dense_edges(0, (mesh.NumEdges - 1), kSize, (mesh.EdgeStride), out_vn_e_dsl, + "offset_reduction_cuda", "out_vn_e_dsl", iteration); + std::cout << "[DSL] serializing out_vn_e as error is high.\n" << std::flush; +#endif + } +#ifdef __SERIALIZE_ON_ERROR + + serialize_flush_iter("offset_reduction_cuda", iteration); +#endif + high_resolution_clock::time_point t_end = high_resolution_clock::now(); + duration timing = duration_cast>(t_end - t_start); + std::cout << "[DSL] Verification took " << timing.count() << " seconds.\n" << std::flush; + return isValid; +} +void run_and_verify_offset_reduction_cuda( + ::dawn::float_type* out_vn_e, ::dawn::float_type* raw_diam_coeff, + ::dawn::float_type* prism_thick_e, ::dawn::float_type* e2c_aux, ::dawn::float_type* e2c_aux_h, + ::dawn::float_type* out_vn_e_before, const double out_vn_e_rel_tol, + const double out_vn_e_abs_tol) { + static int iteration = 0; + std::cout << "[DSL] Running stencil offset_reduction_cuda (" << iteration << ") ...\n" + << std::flush; + run_offset_reduction_cuda(out_vn_e_before, raw_diam_coeff, prism_thick_e, e2c_aux, e2c_aux_h); + std::cout << "[DSL] offset_reduction_cuda run time: " << time << "s\n" << std::flush; + std::cout << "[DSL] Verifying stencil offset_reduction_cuda...\n" << std::flush; + verify_offset_reduction_cuda(out_vn_e_before, out_vn_e, out_vn_e_rel_tol, out_vn_e_abs_tol, + iteration); + iteration++; +} +void setup_offset_reduction_cuda(dawn::GlobalGpuTriMesh* mesh, int k_size, cudaStream_t stream) { + dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34::setup(mesh, k_size, stream); +} +void free_offset_reduction_cuda() { + dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34::free(); +} +} +int dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34::kSize_; +cudaStream_t dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34::stream_; +bool dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34::is_setup_ = false; +dawn_generated::cuda_ico::offset_reduction_cuda::GpuTriMesh + dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34::mesh_; diff --git a/dawn/test/integration-test/dawn4py-tests/offset_reduction_cpp.py b/dawn/test/integration-test/dawn4py-tests/offset_reduction_cpp.py new file mode 100644 index 000000000..206ed11e1 --- /dev/null +++ b/dawn/test/integration-test/dawn4py-tests/offset_reduction_cpp.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python + +# ===-----------------------------------------------------------------------------*- Python -*-===## +# _ +# | | +# __| | __ ___ ___ ___ +# / _` |/ _` \ \ /\ / / '_ | +# | (_| | (_| |\ V V /| | | | +# \__,_|\__,_| \_/\_/ |_| |_| - Compiler Toolchain +# +# +# This file is distributed under the MIT License (MIT). +# See LICENSE.txt for details. +# +# ===------------------------------------------------------------------------------------------===## + +"""Copy stencil HIR generator + +This program creates the HIR corresponding to an unstructured stencil using the SIR serialization Python API. +The code is meant as an example for high-level DSLs that could generate HIR from their own +internal IR. +""" + +import argparse +import os + +import dawn4py +from dawn4py.serialization import SIR, AST +from dawn4py.serialization import utils as sir_utils +from google.protobuf.json_format import MessageToJson, Parse + +OUTPUT_NAME = "offset_reduction_cpp" +OUTPUT_FILE = f"{OUTPUT_NAME}.cpp" +OUTPUT_PATH = f"{OUTPUT_NAME}.cpp" + + +def main(args: argparse.Namespace): + interval = sir_utils.make_interval( + AST.Interval.Start, AST.Interval.End, 0, 0) + + # create the out = in[i+1] statement + body_ast = sir_utils.make_ast( + [ + sir_utils.make_assignment_stmt( + sir_utils.make_unstructured_field_access_expr("out_vn_e"), + sir_utils.make_reduction_over_neighbor_expr( + "+", + sir_utils.make_binary_operator( + sir_utils.make_unstructured_field_access_expr( + "raw_diam_coeff", horizontal_offset=sir_utils.make_unstructured_offset(True)), + "*", + sir_utils.make_unstructured_field_access_expr( + "prism_thick_e", horizontal_offset=sir_utils.make_unstructured_offset(True)), + ), + sir_utils.make_literal_access_expr( + ".0", AST.BuiltinType.Float), + chain=[AST.LocationType.Value("Edge"), AST.LocationType.Value("Cell"), AST.LocationType.Value("Edge")], + weights=[sir_utils.make_unstructured_field_access_expr( + "e2c_aux", horizontal_offset=sir_utils.make_unstructured_offset(True)), + sir_utils.make_unstructured_field_access_expr( + "e2c_aux", horizontal_offset=sir_utils.make_unstructured_offset(True)), + sir_utils.make_unstructured_field_access_expr( + "e2c_aux", horizontal_offset=sir_utils.make_unstructured_offset(True)), + sir_utils.make_unstructured_field_access_expr( + "e2c_aux", horizontal_offset=sir_utils.make_unstructured_offset(True))], + offsets=[0, 0, 1, 1] + ), + "=", + ), + sir_utils.make_assignment_stmt( + sir_utils.make_unstructured_field_access_expr("out_vn_e"), + sir_utils.make_reduction_over_neighbor_expr( + "+", + sir_utils.make_binary_operator( + sir_utils.make_unstructured_field_access_expr( + "raw_diam_coeff", horizontal_offset=sir_utils.make_unstructured_offset(True)), + "*", + sir_utils.make_unstructured_field_access_expr( + "prism_thick_e", horizontal_offset=sir_utils.make_unstructured_offset(True)), + ), + sir_utils.make_literal_access_expr( + ".0", AST.BuiltinType.Float), + chain=[AST.LocationType.Value("Edge"), AST.LocationType.Value("Cell"), AST.LocationType.Value("Edge")], + weights=[sir_utils.make_unstructured_field_access_expr( + "e2c_aux_h", horizontal_offset=sir_utils.make_unstructured_offset(True)), + sir_utils.make_unstructured_field_access_expr( + "e2c_aux_h", horizontal_offset=sir_utils.make_unstructured_offset(True)), + sir_utils.make_unstructured_field_access_expr( + "e2c_aux_h", horizontal_offset=sir_utils.make_unstructured_offset(True)), + sir_utils.make_unstructured_field_access_expr( + "e2c_aux_h", horizontal_offset=sir_utils.make_unstructured_offset(True))], + offsets=[0, 0, 1, 1] + ), + "=", + ) + ] + ) + + vertical_region_stmt = sir_utils.make_vertical_region_decl_stmt( + body_ast, interval, AST.VerticalRegion.Forward + ) + + sir = sir_utils.make_sir( + OUTPUT_FILE, + AST.GridType.Value("Unstructured"), + [ + sir_utils.make_stencil( + OUTPUT_NAME, + sir_utils.make_ast([vertical_region_stmt]), + [ + sir_utils.make_field( + "out_vn_e", + sir_utils.make_field_dimensions_unstructured( + [AST.LocationType.Value("Edge")], 1 + ), + ), + sir_utils.make_field( + "raw_diam_coeff", + sir_utils.make_field_dimensions_unstructured( + [AST.LocationType.Value("Edge"), + AST.LocationType.Value("Cell"), + AST.LocationType.Value("Edge")], 1 + ), + ), + sir_utils.make_field( + "prism_thick_e", + sir_utils.make_field_dimensions_unstructured( + [AST.LocationType.Value("Edge")], 1 + ), + ), + sir_utils.make_field( + "e2c_aux", + sir_utils.make_field_dimensions_unstructured( + [AST.LocationType.Value("Edge"), AST.LocationType.Value("Cell")], 1 + ), + ), + sir_utils.make_field( + "e2c_aux_h", + sir_utils.make_field_dimensions_unstructured( + [AST.LocationType.Value("Edge"), AST.LocationType.Value("Cell")], 0 + ), + ), + ], + ), + ], + ) + + # compile + code = dawn4py.compile(sir, backend=dawn4py.CodeGenBackend.CXXNaiveIco) + + # write to file + print(f"Writing generated code to '{OUTPUT_PATH}'") + with open(OUTPUT_PATH, "w") as f: + f.write(code) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Generate a simple unstructured copy stencil using Dawn compiler" + ) + parser.add_argument( + "-v", + "--verbose", + dest="verbose", + action="store_true", + default=False, + help="Print the generated SIR", + ) + main(parser.parse_args()) diff --git a/dawn/test/integration-test/dawn4py-tests/offset_reduction_cuda.py b/dawn/test/integration-test/dawn4py-tests/offset_reduction_cuda.py new file mode 100644 index 000000000..2fb3f4a23 --- /dev/null +++ b/dawn/test/integration-test/dawn4py-tests/offset_reduction_cuda.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python + +# ===-----------------------------------------------------------------------------*- Python -*-===## +# _ +# | | +# __| | __ ___ ___ ___ +# / _` |/ _` \ \ /\ / / '_ | +# | (_| | (_| |\ V V /| | | | +# \__,_|\__,_| \_/\_/ |_| |_| - Compiler Toolchain +# +# +# This file is distributed under the MIT License (MIT). +# See LICENSE.txt for details. +# +# ===------------------------------------------------------------------------------------------===## + +"""Copy stencil HIR generator + +This program creates the HIR corresponding to an unstructured stencil using the SIR serialization Python API. +The code is meant as an example for high-level DSLs that could generate HIR from their own +internal IR. +""" + +import argparse +import os + +import dawn4py +from dawn4py.serialization import SIR, AST +from dawn4py.serialization import utils as sir_utils +from google.protobuf.json_format import MessageToJson, Parse + +OUTPUT_NAME = "offset_reduction_cuda" +OUTPUT_FILE = f"{OUTPUT_NAME}.cpp" +OUTPUT_PATH = f"{OUTPUT_NAME}.cpp" + + +def main(args: argparse.Namespace): + interval = sir_utils.make_interval( + AST.Interval.Start, AST.Interval.End, 0, 0) + + # create the out = in[i+1] statement + body_ast = sir_utils.make_ast( + [ + sir_utils.make_assignment_stmt( + sir_utils.make_unstructured_field_access_expr("out_vn_e"), + sir_utils.make_reduction_over_neighbor_expr( + "+", + sir_utils.make_binary_operator( + sir_utils.make_unstructured_field_access_expr( + "raw_diam_coeff", horizontal_offset=sir_utils.make_unstructured_offset(True)), + "*", + sir_utils.make_unstructured_field_access_expr( + "prism_thick_e", horizontal_offset=sir_utils.make_unstructured_offset(True)), + ), + sir_utils.make_literal_access_expr( + ".0", AST.BuiltinType.Float), + chain=[AST.LocationType.Value("Edge"), AST.LocationType.Value("Cell"), AST.LocationType.Value("Edge")], + weights=[sir_utils.make_unstructured_field_access_expr( + "e2c_aux", horizontal_offset=sir_utils.make_unstructured_offset(True)), + sir_utils.make_unstructured_field_access_expr( + "e2c_aux", horizontal_offset=sir_utils.make_unstructured_offset(True)), + sir_utils.make_unstructured_field_access_expr( + "e2c_aux", horizontal_offset=sir_utils.make_unstructured_offset(True)), + sir_utils.make_unstructured_field_access_expr( + "e2c_aux", horizontal_offset=sir_utils.make_unstructured_offset(True))], + offsets=[0, 0, 1, 1] + ), + "=", + ), + sir_utils.make_assignment_stmt( + sir_utils.make_unstructured_field_access_expr("out_vn_e"), + sir_utils.make_reduction_over_neighbor_expr( + "+", + sir_utils.make_binary_operator( + sir_utils.make_unstructured_field_access_expr( + "raw_diam_coeff", horizontal_offset=sir_utils.make_unstructured_offset(True)), + "*", + sir_utils.make_unstructured_field_access_expr( + "prism_thick_e", horizontal_offset=sir_utils.make_unstructured_offset(True)), + ), + sir_utils.make_literal_access_expr( + ".0", AST.BuiltinType.Float), + chain=[AST.LocationType.Value("Edge"), AST.LocationType.Value("Cell"), AST.LocationType.Value("Edge")], + weights=[sir_utils.make_unstructured_field_access_expr( + "e2c_aux_h", horizontal_offset=sir_utils.make_unstructured_offset(True)), + sir_utils.make_unstructured_field_access_expr( + "e2c_aux_h", horizontal_offset=sir_utils.make_unstructured_offset(True)), + sir_utils.make_unstructured_field_access_expr( + "e2c_aux_h", horizontal_offset=sir_utils.make_unstructured_offset(True)), + sir_utils.make_unstructured_field_access_expr( + "e2c_aux_h", horizontal_offset=sir_utils.make_unstructured_offset(True))], + offsets=[0, 0, 1, 1] + ), + "=", + ) + ] + ) + + vertical_region_stmt = sir_utils.make_vertical_region_decl_stmt( + body_ast, interval, AST.VerticalRegion.Forward + ) + + sir = sir_utils.make_sir( + OUTPUT_FILE, + AST.GridType.Value("Unstructured"), + [ + sir_utils.make_stencil( + OUTPUT_NAME, + sir_utils.make_ast([vertical_region_stmt]), + [ + sir_utils.make_field( + "out_vn_e", + sir_utils.make_field_dimensions_unstructured( + [AST.LocationType.Value("Edge")], 1 + ), + ), + sir_utils.make_field( + "raw_diam_coeff", + sir_utils.make_field_dimensions_unstructured( + [AST.LocationType.Value("Edge"), + AST.LocationType.Value("Cell"), + AST.LocationType.Value("Edge")], 1 + ), + ), + sir_utils.make_field( + "prism_thick_e", + sir_utils.make_field_dimensions_unstructured( + [AST.LocationType.Value("Edge")], 1 + ), + ), + sir_utils.make_field( + "e2c_aux", + sir_utils.make_field_dimensions_unstructured( + [AST.LocationType.Value("Edge"), AST.LocationType.Value("Cell")], 1 + ), + ), + sir_utils.make_field( + "e2c_aux_h", + sir_utils.make_field_dimensions_unstructured( + [AST.LocationType.Value("Edge"), AST.LocationType.Value("Cell")], 0 + ), + ), + ], + ), + ], + ) + + # compile + code = dawn4py.compile(sir, backend=dawn4py.CodeGenBackend.CUDAIco) + + # write to file + print(f"Writing generated code to '{OUTPUT_PATH}'") + with open(OUTPUT_PATH, "w") as f: + f.write(code) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Generate a simple unstructured copy stencil using Dawn compiler" + ) + parser.add_argument( + "-v", + "--verbose", + dest="verbose", + action="store_true", + default=False, + help="Print the generated SIR", + ) + main(parser.parse_args())