From 1d5e8b17bc88e0aa5d519ef0fb4525d7f2b61ff4 Mon Sep 17 00:00:00 2001 From: mroeth Date: Thu, 12 Dec 2019 14:45:40 +0100 Subject: [PATCH 01/13] fixed VarDeclStmt visitor --- dawn/src/dawn/IIR/DoMethod.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dawn/src/dawn/IIR/DoMethod.cpp b/dawn/src/dawn/IIR/DoMethod.cpp index c6f3b35b6..56097584c 100644 --- a/dawn/src/dawn/IIR/DoMethod.cpp +++ b/dawn/src/dawn/IIR/DoMethod.cpp @@ -51,6 +51,8 @@ class ReplaceNamesVisitor : public iir::ASTVisitorForwarding, public NonCopyable DAWN_ASSERT_MSG(accessmap.size() == 1, "can only be one write access"); std::string realName = metadata_.getNameFromAccessID(accessmap.begin()->first); stmt->getName() = realName; + for(const auto& expr : stmt->getInitList()) + expr->accept(*this); } void visit(const std::shared_ptr& expr) override { auto data = expr->getData(); From 3a504123df1bbc243613ac27b10a399eef147735 Mon Sep 17 00:00:00 2001 From: mroeth Date: Thu, 16 Jan 2020 17:01:18 +0100 Subject: [PATCH 02/13] updating git hook ignore list for new codegen tests --- scripts/ignore_list.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/ignore_list.sh b/scripts/ignore_list.sh index 77114160c..5fff170f4 100644 --- a/scripts/ignore_list.sh +++ b/scripts/ignore_list.sh @@ -5,5 +5,6 @@ ignore_regex_list=( "^(\./)?dawn/examples/python/data/" "^(\./)?dawn/test/unit-test/test_dawn4py/data" "^(\./)?gtclang/test/utils/googletest/" + "^(\./)?dawn/test/unit-test/dawn/CodeGen/Naive/reference" "/bundle/" ) From 2662ecb03436b2efe466f0aa8dc528269befcbd4 Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Thu, 23 Jan 2020 11:13:21 +0100 Subject: [PATCH 03/13] Use experimental::fs if fs is not supported (#640) ## Technical Description Use std::experimental::filesystem if std::filesystem is not supported, i.e. gcc < 8. --- dawn/src/dawn/Compiler/DawnCompiler.cpp | 10 ++++------ dawn/src/dawn/Support/FileSystem.h | 26 +++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 6 deletions(-) create mode 100644 dawn/src/dawn/Support/FileSystem.h diff --git a/dawn/src/dawn/Compiler/DawnCompiler.cpp b/dawn/src/dawn/Compiler/DawnCompiler.cpp index fb0f1e3ab..74ead87d9 100644 --- a/dawn/src/dawn/Compiler/DawnCompiler.cpp +++ b/dawn/src/dawn/Compiler/DawnCompiler.cpp @@ -48,6 +48,7 @@ #include "dawn/Serialization/IIRSerializer.h" #include "dawn/Support/Array.h" #include "dawn/Support/EditDistance.h" +#include "dawn/Support/FileSystem.h" #include "dawn/Support/Logging.h" #include "dawn/Support/StringSwitch.h" #include "dawn/Support/StringUtil.h" @@ -55,8 +56,6 @@ #include "dawn/Validator/GridTypeChecker.h" #include "dawn/Validator/LocationTypeChecker.h" -#include - namespace dawn { namespace { @@ -236,9 +235,8 @@ std::unique_ptr DawnCompiler::runOptimizer(std::shared_ptrgetName() << "`"; if(options_->SerializeIIR) { - const std::filesystem::path p(options_->OutputFile.empty() - ? instantiation->getMetaData().getFileName() - : options_->OutputFile); + const fs::path p(options_->OutputFile.empty() ? instantiation->getMetaData().getFileName() + : options_->OutputFile); IIRSerializer::serialize(static_cast(p.stem()) + "." + std::to_string(i) + ".iir", instantiation, serializationKind); @@ -332,4 +330,4 @@ DiagnosticsEngine& DawnCompiler::getDiagnostics() { return *diagnostics_.get(); const Options& DawnCompiler::getOptions() const { return *options_.get(); } Options& DawnCompiler::getOptions() { return *options_.get(); } -} // namespace dawn \ No newline at end of file +} // namespace dawn diff --git a/dawn/src/dawn/Support/FileSystem.h b/dawn/src/dawn/Support/FileSystem.h new file mode 100644 index 000000000..dca3a30b1 --- /dev/null +++ b/dawn/src/dawn/Support/FileSystem.h @@ -0,0 +1,26 @@ +//===--------------------------------------------------------------------------------*- C++ -*-===// +// _ +// | | +// __| | __ ___ ___ ___ +// / _` |/ _` \ \ /\ / / '_ | +// | (_| | (_| |\ V V /| | | | +// \__,_|\__,_| \_/\_/ |_| |_| - Compiler Toolchain +// +// +// This file is distributed under the MIT License (MIT). +// See LICENSE.txt for details. +// +//===------------------------------------------------------------------------------------------===// + +#ifndef DAWN_SUPPORT_FILESYSTEM_H +#define DAWN_SUPPORT_FILESYSTEM_H + +#if __has_include() +#include +namespace fs = std::filesystem; +#elif __has_include() +#include +namespace fs = std::experimental::filesystem; +#endif + +#endif From bca72323acbd1f26786e7ded3fb1dae2d6932fbf Mon Sep 17 00:00:00 2001 From: Matthias Roethlin Date: Wed, 27 Jan 2021 16:32:13 +0100 Subject: [PATCH 04/13] wip --- dawn/src/dawn/AST/ASTExpr.cpp | 18 +-- dawn/src/dawn/AST/ASTExpr.h | 7 +- dawn/src/dawn/SIR/proto/SIR/statements.proto | 37 ++--- dawn/src/dawn/Serialization/ASTSerializer.cpp | 13 +- dawn/src/dawn/Serialization/SIRSerializer.cpp | 23 ++- dawn/src/dawn/Validator/WeightChecker.cpp | 5 +- dawn/src/dawn/Validator/WeightChecker.h | 1 + dawn/src/dawn4py/serialization/utils.py | 5 +- .../dawn4py-tests/offset_reduction.py | 142 ++++++++++++++++++ 9 files changed, 207 insertions(+), 44 deletions(-) create mode 100644 dawn/test/integration-test/dawn4py-tests/offset_reduction.py diff --git a/dawn/src/dawn/AST/ASTExpr.cpp b/dawn/src/dawn/AST/ASTExpr.cpp index 885a16fbe..370d3bcb7 100644 --- a/dawn/src/dawn/AST/ASTExpr.cpp +++ b/dawn/src/dawn/AST/ASTExpr.cpp @@ -459,17 +459,16 @@ ReductionOverNeighborExpr::ReductionOverNeighborExpr(std::string const& op, std::shared_ptr const& rhs, std::shared_ptr const& init, std::vector chain, - bool includeCenter, SourceLocation loc) + bool includeCenter, std::vector offsets, + SourceLocation loc) : Expr(Kind::ReductionOverNeighborExpr, loc), op_(op), - iterSpace_(std::move(chain), includeCenter), operands_{rhs, init} {} + iterSpace_(std::move(chain), includeCenter), operands_{rhs, init}, offsets_(offsets) {} -ReductionOverNeighborExpr::ReductionOverNeighborExpr(std::string const& op, - std::shared_ptr const& rhs, - std::shared_ptr const& init, - std::vector> weights, - std::vector chain, - bool includeCenter, SourceLocation loc) - : ReductionOverNeighborExpr(op, rhs, init, chain, includeCenter, loc) { +ReductionOverNeighborExpr::ReductionOverNeighborExpr( + std::string const& op, std::shared_ptr const& rhs, std::shared_ptr const& init, + std::vector> weights, std::vector chain, + bool includeCenter, std::vector offsets, SourceLocation loc) + : ReductionOverNeighborExpr(op, rhs, init, chain, includeCenter, offsets, loc) { DAWN_ASSERT_MSG(weights.size() > 0, "empty weights vector passed!\n"); weights_ = weights; operands_.insert(operands_.end(), weights.begin(), weights.end()); @@ -486,6 +485,7 @@ ReductionOverNeighborExpr::operator=(ReductionOverNeighborExpr const& expr) { operands_ = expr.operands_; iterSpace_ = expr.iterSpace_; weights_ = expr.getWeights(); + offsets_ = expr.offsets_; return *this; } diff --git a/dawn/src/dawn/AST/ASTExpr.h b/dawn/src/dawn/AST/ASTExpr.h index 557196753..11987b8bb 100644 --- a/dawn/src/dawn/AST/ASTExpr.h +++ b/dawn/src/dawn/AST/ASTExpr.h @@ -647,18 +647,20 @@ class ReductionOverNeighborExpr : public Expr { // hold a copy of the (shared pointer to) the weights std::vector> operands_ = std::vector>(2); bool chainIsValid() const; + std::vector offsets_; public: /// @name Constructor & Destructor /// @{ ReductionOverNeighborExpr(std::string const& op, std::shared_ptr const& rhs, std::shared_ptr const& init, std::vector chain, - bool includeCenter = false, SourceLocation loc = SourceLocation()); + bool includeCenter = false, std::vector offsets_ = {}, + SourceLocation loc = SourceLocation()); ReductionOverNeighborExpr(std::string const& op, std::shared_ptr const& rhs, std::shared_ptr const& init, std::vector> weights, std::vector chain, bool includeCenter = false, - SourceLocation loc = SourceLocation()); + std::vector offsets_ = {}, SourceLocation loc = SourceLocation()); ReductionOverNeighborExpr(ReductionOverNeighborExpr const& stmt); ReductionOverNeighborExpr& operator=(ReductionOverNeighborExpr const& stmt); /// @} @@ -671,6 +673,7 @@ class ReductionOverNeighborExpr : public Expr { std::vector getNbhChain() const { return iterSpace_; }; ast::LocationType getLhsLocation() const { return iterSpace_.Chain.front(); }; const std::optional>>& getWeights() const { return weights_; }; + const std::vector getOffsets() const { return offsets_; }; bool getIncludeCenter() const { return iterSpace_.IncludeCenter; }; ast::UnstructuredIterationSpace getIterSpace() const { return iterSpace_; } diff --git a/dawn/src/dawn/SIR/proto/SIR/statements.proto b/dawn/src/dawn/SIR/proto/SIR/statements.proto index 0a460098d..7e68cfdb3 100644 --- a/dawn/src/dawn/SIR/proto/SIR/statements.proto +++ b/dawn/src/dawn/SIR/proto/SIR/statements.proto @@ -48,9 +48,7 @@ message CartesianDimension { // It could also have a sparse dimension. In such case the sparse part is // non-empty. // @ingroup sir_proto -message UnstructuredDimension { - UnstructuredIterationSpace iter_space = 1; -} +message UnstructuredDimension { UnstructuredIterationSpace iter_space = 1; } // @brief Dimensions spanned by a field // @@ -323,9 +321,7 @@ message LoopDescriptorGeneral { // dummy message for future use } -message LoopDescriptorChain { - UnstructuredIterationSpace iter_space = 1; -} +message LoopDescriptorChain { UnstructuredIterationSpace iter_space = 1; } message LoopDescriptor { oneof desc { @@ -335,7 +331,7 @@ message LoopDescriptor { } message LoopStmt { - Stmt statements = 1; // List of statements (must be a BlockStmt) + Stmt statements = 1; // List of statements (must be a BlockStmt) LoopDescriptor loop_descriptor = 2; // Loop bounds description SourceLocation loc = 3; // Source location StmtData data = 4; // Generic Stmt's data container @@ -366,8 +362,7 @@ message ReturnStmt { // // @ingroup sir_proto message VarDeclStmtData { - google.protobuf.Int32Value accessID = - 1; // ID of the variable declared in the statement + google.protobuf.Int32Value accessID = 1; // ID of the variable declared in the statement } // @brief Declaration of a variable @@ -543,7 +538,7 @@ message FunCallExpr { // @endcode // @ingroup sir_proto message StencilFunCallExpr { - string callee = 1; // Identifier of the stencil function (i.e callee) + string callee = 1; // Identifier of the stencil function (i.e callee) repeated Expr arguments = 2; // List of arguments SourceLocation loc = 3; // Source location int32 ID = 4; // ID of the Expr @@ -601,18 +596,16 @@ message StencilFunArgExpr { // // @ingroup sir_proto message AccessExprData { - google.protobuf.Int32Value accessID = - 1; // Access ID of variable/literal/field accessed + google.protobuf.Int32Value accessID = 1; // Access ID of variable/literal/field accessed } // @brief Access to a variable // // @ingroup sir_proto message VarAccessExpr { - string name = 1; // Name of the variable - Expr index = 2; // Is it an array access (i.e var[2])? - bool is_external = - 3; // Is this an access to a external variable (e.g a global)? + string name = 1; // Name of the variable + Expr index = 2; // Is it an array access (i.e var[2])? + bool is_external = 3; // Is this an access to a external variable (e.g a global)? SourceLocation loc = 4; // Source location AccessExprData data = 5; // Access data int32 ID = 6; // ID of the Expr @@ -709,15 +702,15 @@ message LiteralAccessExpr { // // @ingroup sir_proto message ReductionOverNeighborExpr { - string op = 1; // Reduction operation - Expr rhs = 2; // Operation to be applied for each neighbor - Expr init = 3; // Initial value of reduction - repeated Expr weights = - 4; // weights (required to be of equal type, e.g. just floats) + string op = 1; // Reduction operation + Expr rhs = 2; // Operation to be applied for each neighbor + Expr init = 3; // Initial value of reduction + repeated Expr weights = 4; // weights (required to be of equal type, e.g. just floats) UnstructuredIterationSpace iter_space = 5; // Neighbor chain definining the neighbors to reduce from and the // location type to reduce to (first element) - SourceLocation loc = 6; + SourceLocation loc = 6; + repeated int32 offsets = 7; } // @brief Abstract syntax tree of the SIR diff --git a/dawn/src/dawn/Serialization/ASTSerializer.cpp b/dawn/src/dawn/Serialization/ASTSerializer.cpp index 5c84b5077..40dc1f901 100644 --- a/dawn/src/dawn/Serialization/ASTSerializer.cpp +++ b/dawn/src/dawn/Serialization/ASTSerializer.cpp @@ -741,6 +741,10 @@ void ProtoStmtBuilder::visit(const std::shared_ptr& e currentExprProto_.pop(); } } + + for(int offset : expr->getOffsets()) { + protoExpr->add_offsets(offset); + } } void setAST(proto::statements::AST* astProto, const AST* ast) { @@ -1034,11 +1038,16 @@ std::shared_ptr makeExpr(const proto::statements::Expr& expressionProto, chain.push_back(getLocationTypeFromProtoLocationType(exprProto.iter_space().chain(i))); } + std::vector offsets; + for(int i = 0; i < exprProto.offsets_size(); ++i) { + offsets.push_back(exprProto.offsets(i)); + } + if(weights.empty()) { auto expr = std::make_shared( exprProto.op(), makeExpr(exprProto.rhs(), dataType, maxID), makeExpr(exprProto.init(), dataType, maxID), chain, - exprProto.iter_space().include_center(), makeLocation(exprProto)); + exprProto.iter_space().include_center(), offsets, makeLocation(exprProto)); return expr; } else { std::vector> deserializedWeights; @@ -1048,7 +1057,7 @@ std::shared_ptr makeExpr(const proto::statements::Expr& expressionProto, auto expr = std::make_shared( exprProto.op(), makeExpr(exprProto.rhs(), dataType, maxID), makeExpr(exprProto.init(), dataType, maxID), deserializedWeights, chain, - exprProto.iter_space().include_center(), makeLocation(exprProto)); + exprProto.iter_space().include_center(), offsets, makeLocation(exprProto)); return expr; } } diff --git a/dawn/src/dawn/Serialization/SIRSerializer.cpp b/dawn/src/dawn/Serialization/SIRSerializer.cpp index a96599df1..b7186cc71 100644 --- a/dawn/src/dawn/Serialization/SIRSerializer.cpp +++ b/dawn/src/dawn/Serialization/SIRSerializer.cpp @@ -530,14 +530,19 @@ static std::shared_ptr makeExpr(const dawn::proto::statements::Expr& chain.push_back(getLocationTypeFromProtoLocationType(exprProto.iter_space().chain(i))); } + std::vector offsets; + for(int i = 0; i < exprProto.offsets_size(); ++i) { + offsets.push_back(exprProto.offsets(i)); + } + if(weights.size() > 0) { return std::make_shared( exprProto.op(), makeExpr(exprProto.rhs()), makeExpr(exprProto.init()), weights, chain, - exprProto.iter_space().include_center(), makeLocation(exprProto)); + exprProto.iter_space().include_center(), offsets, makeLocation(exprProto)); } else { return std::make_shared( exprProto.op(), makeExpr(exprProto.rhs()), makeExpr(exprProto.init()), chain, - exprProto.iter_space().include_center(), makeLocation(exprProto)); + exprProto.iter_space().include_center(), offsets, makeLocation(exprProto)); } } case dawn::proto::statements::Expr::EXPR_NOT_SET: @@ -764,20 +769,24 @@ static std::shared_ptr deserializeImpl(const std::string& str, SIRSerialize switch(sirValue.Value_case()) { case sir::proto::GlobalVariableValue::kBooleanValue: - value = std::make_shared(static_cast(sirValue.boolean_value()), isConstExpr); + value = + std::make_shared(static_cast(sirValue.boolean_value()), isConstExpr); break; case sir::proto::GlobalVariableValue::kIntegerValue: - value = std::make_shared(static_cast(sirValue.integer_value()), isConstExpr); + value = + std::make_shared(static_cast(sirValue.integer_value()), isConstExpr); break; case sir::proto::GlobalVariableValue::kFloatValue: - value = std::make_shared(static_cast(sirValue.float_value()), isConstExpr); + value = + std::make_shared(static_cast(sirValue.float_value()), isConstExpr); break; case sir::proto::GlobalVariableValue::kDoubleValue: - value = std::make_shared(static_cast(sirValue.double_value()), isConstExpr); + value = std::make_shared(static_cast(sirValue.double_value()), + isConstExpr); break; case sir::proto::GlobalVariableValue::kStringValue: value = std::make_shared(static_cast(sirValue.string_value()), - isConstExpr); + isConstExpr); break; case sir::proto::GlobalVariableValue::VALUE_NOT_SET: default: diff --git a/dawn/src/dawn/Validator/WeightChecker.cpp b/dawn/src/dawn/Validator/WeightChecker.cpp index f2ccf2d8a..6e9479c46 100644 --- a/dawn/src/dawn/Validator/WeightChecker.cpp +++ b/dawn/src/dawn/Validator/WeightChecker.cpp @@ -42,7 +42,8 @@ void WeightChecker::WeightCheckerImpl::visit( DAWN_ASSERT(nameToDimensions_.count(fieldName)); weightsValid_ = sir::dimension_cast( nameToDimensions_.at(fieldName).getHorizontalFieldDimension()) - .isDense(); + .isDense() || + parentAllowsSparse_; } } void WeightChecker::WeightCheckerImpl::visit( @@ -74,6 +75,7 @@ void WeightChecker::WeightCheckerImpl::visit( void WeightChecker::WeightCheckerImpl::visit( const std::shared_ptr& expr) { + parentAllowsSparse_ = !expr->getOffsets().empty(); if(expr->getWeights().has_value()) { parentIsWeight_ = true; for(const auto& weight : *expr->getWeights()) { @@ -84,6 +86,7 @@ void WeightChecker::WeightCheckerImpl::visit( } parentIsWeight_ = false; } + parentAllowsSparse_ = false; expr->getRhs()->accept(*this); } diff --git a/dawn/src/dawn/Validator/WeightChecker.h b/dawn/src/dawn/Validator/WeightChecker.h index 02da2106e..1338a1dc8 100644 --- a/dawn/src/dawn/Validator/WeightChecker.h +++ b/dawn/src/dawn/Validator/WeightChecker.h @@ -42,6 +42,7 @@ class WeightChecker { private: bool weightsValid_ = true; bool parentIsWeight_ = false; + bool parentAllowsSparse_ = false; const std::unordered_map nameToDimensions_; const std::unordered_map idToNameMap_; std::stack> diff --git a/dawn/src/dawn4py/serialization/utils.py b/dawn/src/dawn4py/serialization/utils.py index a0bf36972..e5cf2862c 100644 --- a/dawn/src/dawn4py/serialization/utils.py +++ b/dawn/src/dawn4py/serialization/utils.py @@ -820,7 +820,8 @@ def make_reduction_over_neighbor_expr( init: ExprType, chain: List[LocationTypeValue], weights: List[ExprType] = None, - include_center: bool = False + include_center: bool = False, + offsets: List[int] = None ) -> ReductionOverNeighborExpr: """ Create a ReductionOverNeighborExpr @@ -839,6 +840,8 @@ def make_reduction_over_neighbor_expr( iterSpace.chain.extend(chain) if weights is not None and len(weights) != 0: expr.weights.extend([make_expr(weight) for weight in weights]) + if offsets is not None and len(offsets) != 0: + expr.offsets.extend([offset for offset in offsets]) iterSpace.include_center = include_center expr.iter_space.CopyFrom(iterSpace) return expr diff --git a/dawn/test/integration-test/dawn4py-tests/offset_reduction.py b/dawn/test/integration-test/dawn4py-tests/offset_reduction.py new file mode 100644 index 000000000..0a258f03c --- /dev/null +++ b/dawn/test/integration-test/dawn4py-tests/offset_reduction.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python + +# ===-----------------------------------------------------------------------------*- Python -*-===## +# _ +# | | +# __| | __ ___ ___ ___ +# / _` |/ _` \ \ /\ / / '_ | +# | (_| | (_| |\ V V /| | | | +# \__,_|\__,_| \_/\_/ |_| |_| - Compiler Toolchain +# +# +# This file is distributed under the MIT License (MIT). +# See LICENSE.txt for details. +# +# ===------------------------------------------------------------------------------------------===## + +"""Copy stencil HIR generator + +This program creates the HIR corresponding to an unstructured stencil using the SIR serialization Python API. +The code is meant as an example for high-level DSLs that could generate HIR from their own +internal IR. +""" + +import argparse +import os + +import dawn4py +from dawn4py.serialization import SIR +from dawn4py.serialization import utils as sir_utils +from google.protobuf.json_format import MessageToJson, Parse + +OUTPUT_NAME = "unstructured_stencil" +OUTPUT_FILE = f"{OUTPUT_NAME}.cpp" +OUTPUT_PATH = f"{OUTPUT_NAME}.cpp" + + +def main(args: argparse.Namespace): + interval = sir_utils.make_interval( + SIR.Interval.Start, SIR.Interval.End, 0, 0) + + # create the out = in[i+1] statement + body_ast = sir_utils.make_ast( + [ + sir_utils.make_assignment_stmt( + sir_utils.make_unstructured_field_access_expr("out_vn_e"), + sir_utils.make_reduction_over_neighbor_expr( + "+", + sir_utils.make_binary_operator( + sir_utils.make_unstructured_field_access_expr( + "raw_diam_coeff", horizontal_offset=sir_utils.make_unstructured_offset(True)), + "*", + sir_utils.make_unstructured_field_access_expr( + "prism_thick_e", horizontal_offset=sir_utils.make_unstructured_offset(True)), + ), + sir_utils.make_literal_access_expr( + ".0", SIR.BuiltinType.Float), + chain=[SIR.LocationType.Value("Edge"), SIR.LocationType.Value("Cell"), SIR.LocationType.Value("Edge")], + weights=[sir_utils.make_unstructured_field_access_expr( + "e2c_aux", horizontal_offset=sir_utils.make_unstructured_offset(True)), + sir_utils.make_unstructured_field_access_expr( + "e2c_aux", horizontal_offset=sir_utils.make_unstructured_offset(True)), + sir_utils.make_unstructured_field_access_expr( + "e2c_aux", horizontal_offset=sir_utils.make_unstructured_offset(True)), + sir_utils.make_unstructured_field_access_expr( + "e2c_aux", horizontal_offset=sir_utils.make_unstructured_offset(True))], + offsets=[0, 0, 1, 1] + ), + "=", + ) + ] + ) + + vertical_region_stmt = sir_utils.make_vertical_region_decl_stmt( + body_ast, interval, SIR.VerticalRegion.Forward + ) + + sir = sir_utils.make_sir( + OUTPUT_FILE, + SIR.GridType.Value("Unstructured"), + [ + sir_utils.make_stencil( + OUTPUT_NAME, + sir_utils.make_ast([vertical_region_stmt]), + [ + sir_utils.make_field( + "out_vn_e", + sir_utils.make_field_dimensions_unstructured( + [SIR.LocationType.Value("Edge")], 1 + ), + ), + sir_utils.make_field( + "raw_diam_coeff", + sir_utils.make_field_dimensions_unstructured( + [SIR.LocationType.Value("Edge"), + SIR.LocationType.Value("Cell"), + SIR.LocationType.Value("Edge")], 1 + ), + ), + sir_utils.make_field( + "prism_thick_e", + sir_utils.make_field_dimensions_unstructured( + [SIR.LocationType.Value("Edge")], 1 + ), + ), + sir_utils.make_field( + "e2c_aux", + sir_utils.make_field_dimensions_unstructured( + [SIR.LocationType.Value("Edge"), SIR.LocationType.Value("Cell")], 1 + ), + ), + ], + ), + ], + ) + + # print the SIR + f = open("unstructured_stencil.sir", "w") + f.write(MessageToJson(sir)) + f.close() + + # compile + code = dawn4py.compile(sir, backend=dawn4py.CodeGenBackend.CXXNaiveIco) + + # write to file + print(f"Writing generated code to '{OUTPUT_PATH}'") + with open(OUTPUT_PATH, "w") as f: + f.write(code) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Generate a simple unstructured copy stencil using Dawn compiler" + ) + parser.add_argument( + "-v", + "--verbose", + dest="verbose", + action="store_true", + default=False, + help="Print the generated SIR", + ) + main(parser.parse_args()) From 26e63473f7b180167228c3d2c6e626fc59ebb12c Mon Sep 17 00:00:00 2001 From: Matthias Roethlin Date: Thu, 28 Jan 2021 13:05:54 +0100 Subject: [PATCH 05/13] rough but complete implementation --- dawn/src/dawn/AST/ASTExpr.cpp | 3 +- .../CodeGen/CXXNaive-ico/ASTStencilBody.cpp | 32 ++++++++++++++++--- .../CodeGen/CXXNaive-ico/ASTStencilBody.h | 1 + .../dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp | 22 ++++++++++--- .../dawn/CodeGen/Cuda-ico/ASTStencilBody.h | 1 + .../dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp | 28 +++++++++++++--- .../UnstructuredDimensionChecker.cpp | 2 +- dawn/src/dawn4py/serialization/utils.py | 2 +- .../dawn4py-tests/offset_reduction.py | 4 +-- 9 files changed, 77 insertions(+), 18 deletions(-) diff --git a/dawn/src/dawn/AST/ASTExpr.cpp b/dawn/src/dawn/AST/ASTExpr.cpp index 370d3bcb7..f55657b8f 100644 --- a/dawn/src/dawn/AST/ASTExpr.cpp +++ b/dawn/src/dawn/AST/ASTExpr.cpp @@ -476,7 +476,8 @@ ReductionOverNeighborExpr::ReductionOverNeighborExpr( ReductionOverNeighborExpr::ReductionOverNeighborExpr(ReductionOverNeighborExpr const& expr) : Expr(Kind::ReductionOverNeighborExpr, expr.getSourceLocation()), op_(expr.getOp()), - weights_(expr.getWeights()), iterSpace_(expr.iterSpace_), operands_(expr.operands_) {} + weights_(expr.getWeights()), iterSpace_(expr.iterSpace_), operands_(expr.operands_), + offsets_(expr.offsets_) {} ReductionOverNeighborExpr& ReductionOverNeighborExpr::operator=(ReductionOverNeighborExpr const& expr) { diff --git a/dawn/src/dawn/CodeGen/CXXNaive-ico/ASTStencilBody.cpp b/dawn/src/dawn/CodeGen/CXXNaive-ico/ASTStencilBody.cpp index da7c028ed..045c28f4e 100644 --- a/dawn/src/dawn/CodeGen/CXXNaive-ico/ASTStencilBody.cpp +++ b/dawn/src/dawn/CodeGen/CXXNaive-ico/ASTStencilBody.cpp @@ -21,6 +21,8 @@ #include "dawn/IIR/StencilFunctionInstantiation.h" #include "dawn/SIR/SIR.h" #include "dawn/Support/Unreachable.h" +#include +#include static std::string nbhChainToVectorString(const std::vector& chain) { auto getLocationTypeString = [](dawn::ast::LocationType type) { @@ -219,6 +221,10 @@ std::string ASTStencilBody::makeIndexString(const std::shared_ptrfront()); + offsets_->pop_front(); + } return "deref(LibTag{}, " + resArgName + "), " + kiterStr; } @@ -228,6 +234,10 @@ std::string ASTStencilBody::makeIndexString(const std::shared_ptrfront()); + offsets_->pop_front(); + } return "deref(LibTag{}, " + sparseArgName_ + ")," + sparseIdx + ", " + kiterStr; } @@ -247,6 +257,10 @@ std::string ASTStencilBody::makeIndexString(const std::shared_ptrfront()); + offsets_->pop_front(); + } return "deref(LibTag{}, " + resArgName + ")"; } @@ -256,6 +270,10 @@ std::string ASTStencilBody::makeIndexString(const std::shared_ptrfront()); + offsets_->pop_front(); + } return "deref(LibTag{}, " + sparseArgName_ + ")," + sparseIdx; } @@ -401,16 +419,16 @@ void ASTStencilBody::visit(const std::shared_ptr reductionDepth_++; expr->getRhs()->accept(*this); reductionDepth_--; - if(reductionDepth_ == 0) { - parentIsReduction_ = false; - currentChain_.clear(); - } + // "pop" argName denseArgName_ = argName; ss_ << ";\n"; ss_ << ASTStencilBody::ReductionSparseIndexVarName(reductionDepth_) << "++;\n"; ss_ << "return lhs;\n"; ss_ << "}"; + if(!expr->getOffsets().empty()) { + offsets_ = std::deque(std::begin(expr->getOffsets()), std::end(expr->getOffsets())); + } if(hasWeights) { auto weights = expr->getWeights().value(); bool first = true; @@ -430,6 +448,12 @@ void ASTStencilBody::visit(const std::shared_ptr ss_ << ", /*include center*/ true"; } ss_ << ")"; + offsets_ = std::nullopt; + + if(reductionDepth_ == 0) { + parentIsReduction_ = false; + currentChain_.clear(); + } } void ASTStencilBody::setCurrentStencilFunction( diff --git a/dawn/src/dawn/CodeGen/CXXNaive-ico/ASTStencilBody.h b/dawn/src/dawn/CodeGen/CXXNaive-ico/ASTStencilBody.h index 17fbf45dd..165ba3eb6 100644 --- a/dawn/src/dawn/CodeGen/CXXNaive-ico/ASTStencilBody.h +++ b/dawn/src/dawn/CodeGen/CXXNaive-ico/ASTStencilBody.h @@ -62,6 +62,7 @@ class ASTStencilBody : public ASTCodeGenCXX { bool parentIsReduction_ = false; bool parentIsForLoop_ = false; std::vector currentChain_; + std::optional> offsets_; size_t reductionDepth_ = 0; diff --git a/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp b/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp index f3c5f5825..dcea77296 100644 --- a/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp +++ b/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp @@ -17,6 +17,7 @@ #include "dawn/IIR/AST.h" #include "dawn/IIR/ASTExpr.h" #include +#include #include #include @@ -115,11 +116,20 @@ std::string ASTStencilBody::makeIndexString(const std::shared_ptrfront()); + nbhIter = std::to_string(offsets_->front()); + offsets_->pop_front(); + } + if(isFullField && isDense) { if((parentIsReduction_ || parentIsForLoop_) && ast::offset_cast(expr->getOffset().horizontalOffset()) .hasOffset()) { - return kiterStr + "*" + denseSize + "+ nbhIdx"; + return kiterStr + "*" + denseSize + " + " + nbhIdx; } else { return kiterStr + "*" + denseSize + "+ pidx"; } @@ -129,7 +139,7 @@ std::string ASTStencilBody::makeIndexString(const std::shared_ptr(expr->getOffset().horizontalOffset()) .hasOffset()) { - return "nbhIdx"; + return nbhIdx; } else { return "pidx"; } @@ -147,7 +157,7 @@ std::string ASTStencilBody::makeIndexString(const std::shared_ptr expr->getInit()->accept(*this); ss_ << ";\n"; auto weights = expr->getWeights(); + if(!expr->getOffsets().empty()) { + offsets_ = std::deque(std::begin(expr->getOffsets()), std::end(expr->getOffsets())); + } if(weights.has_value()) { ss_ << "::dawn::float_type " << weights_name << "[" << weights->size() << "] = {"; bool first = true; @@ -232,6 +245,7 @@ void ASTStencilBody::visit(const std::shared_ptr } ss_ << "};\n"; } + offsets_ = std::nullopt; ss_ << "for (int nbhIter = 0; nbhIter < " << chainToSparseSizeString(expr->getIterSpace()) << "; nbhIter++)"; diff --git a/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.h b/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.h index 396c8c961..475784f0b 100644 --- a/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.h +++ b/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.h @@ -62,6 +62,7 @@ class ASTStencilBody : public ASTCodeGenCXX { bool parentIsReduction_ = false; bool parentIsForLoop_ = false; + std::optional> offsets_; bool firstPass_ = true; diff --git a/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp b/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp index b6db67b02..5757fab51 100644 --- a/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp +++ b/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp @@ -24,11 +24,13 @@ #include "dawn/CodeGen/Cuda/CodeGeneratorHelper.h" #include "dawn/CodeGen/F90Util.h" #include "dawn/CodeGen/IcoChainSizes.h" +#include "dawn/IIR/ASTExpr.h" #include "dawn/IIR/Field.h" #include "dawn/IIR/Interval.h" #include "dawn/IIR/MultiStage.h" #include "dawn/IIR/Stage.h" #include "dawn/IIR/Stencil.h" +#include "dawn/IIR/StencilMetaInformation.h" #include "dawn/Support/Assert.h" #include "dawn/Support/Exception.h" #include "dawn/Support/FileSystem.h" @@ -104,6 +106,11 @@ class CollectIterationSpaces : public ast::ASTVisitorForwarding { for(auto c : expr->getChildren()) { c->accept(*this); } + if(expr->getWeights().has_value()) { + for(auto w : expr->getWeights().value()) { + w->accept(*this); + } + } } void visit(const std::shared_ptr& stmt) override { @@ -114,12 +121,23 @@ class CollectIterationSpaces : public ast::ASTVisitorForwarding { } } + void visit(const std::shared_ptr& expr) override { + auto unstrDim = sir::dimension_cast( + metadata_.getFieldDimensions(iir::getAccessID(expr)).getHorizontalFieldDimension()); + if(unstrDim.isSparse()) { + spaces_.insert(unstrDim.getIterSpace()); + } + } + const std::unordered_set& getSpaces() const { return spaces_; } + CollectIterationSpaces(const iir::StencilMetaInformation& metadata) : metadata_(metadata) {} + private: std::unordered_set spaces_; + const iir::StencilMetaInformation& metadata_; }; void CudaIcoCodeGen::generateGpuMesh( @@ -132,7 +150,7 @@ void CudaIcoCodeGen::generateGpuMesh( gpuMeshClass.addMember("int", "NumCells"); gpuMeshClass.addMember("dawn::unstructured_domain", "Domain"); - CollectIterationSpaces spaceCollector; + CollectIterationSpaces spaceCollector(stencilInstantiation->getMetaData()); std::unordered_set spaces; for(const auto& doMethod : iterateIIROver(*(stencilInstantiation->getIIR()))) { doMethod->getAST().accept(spaceCollector); @@ -306,7 +324,7 @@ void CudaIcoCodeGen::generateRunFun( kernelCall << kName; // which nbh tables need to be passed / which templates need to be defined? - CollectIterationSpaces chainStringCollector; + CollectIterationSpaces chainStringCollector(stencilInstantiation->getMetaData()); for(const auto& doMethod : stage->getChildren()) { doMethod->getAST().accept(chainStringCollector); } @@ -703,7 +721,7 @@ void CudaIcoCodeGen::generateAllAPIRunFunctions( const auto& stencils = stencilInstantiation->getStencils(); DAWN_ASSERT_MSG(stencils.size() <= 1, "code generation only for at most one stencil!\n"); - CollectIterationSpaces chainCollector; + CollectIterationSpaces chainCollector(stencilInstantiation->getMetaData()); std::set> chains; for(const auto& doMethod : iterateIIROver(*(stencilInstantiation->getIIR()))) { doMethod->getAST().accept(chainCollector); @@ -940,7 +958,7 @@ void CudaIcoCodeGen::generateAllCudaKernels( //-------------------------------------- // which nbh tables / size templates need to be passed? - CollectIterationSpaces chainStringCollector; + CollectIterationSpaces chainStringCollector(stencilInstantiation->getMetaData()); for(const auto& doMethod : stage->getChildren()) { doMethod->getAST().accept(chainStringCollector); } @@ -1089,7 +1107,7 @@ std::string CudaIcoCodeGen::generateStencilInstantiation( generateAllCudaKernels(ssSW, stencilInstantiation); - CollectIterationSpaces spaceCollector; + CollectIterationSpaces spaceCollector(stencilInstantiation->getMetaData()); std::unordered_set spaces; for(const auto& doMethod : iterateIIROver(*(stencilInstantiation->getIIR()))) { doMethod->getAST().accept(spaceCollector); diff --git a/dawn/src/dawn/Validator/UnstructuredDimensionChecker.cpp b/dawn/src/dawn/Validator/UnstructuredDimensionChecker.cpp index a2745a64f..bf91286eb 100644 --- a/dawn/src/dawn/Validator/UnstructuredDimensionChecker.cpp +++ b/dawn/src/dawn/Validator/UnstructuredDimensionChecker.cpp @@ -482,7 +482,7 @@ void UnstructuredDimensionChecker::UnstructuredDimensionCheckerImpl::visit( } // check weighs for consistency w.r.t dimensions - if(reductionExpr->getWeights().has_value()) { + if(reductionExpr->getWeights().has_value() && reductionExpr->getOffsets().empty()) { // check weights one by one UnstructuredDimensionChecker::UnstructuredDimensionCheckerImpl weightChecker( nameToDimensions_, idToNameMap_, idToLocalVariableData_, config_); diff --git a/dawn/src/dawn4py/serialization/utils.py b/dawn/src/dawn4py/serialization/utils.py index e5cf2862c..7638f5ec8 100644 --- a/dawn/src/dawn4py/serialization/utils.py +++ b/dawn/src/dawn4py/serialization/utils.py @@ -841,7 +841,7 @@ def make_reduction_over_neighbor_expr( if weights is not None and len(weights) != 0: expr.weights.extend([make_expr(weight) for weight in weights]) if offsets is not None and len(offsets) != 0: - expr.offsets.extend([offset for offset in offsets]) + expr.offsets.extend([offset for offset in offsets]) iterSpace.include_center = include_center expr.iter_space.CopyFrom(iterSpace) return expr diff --git a/dawn/test/integration-test/dawn4py-tests/offset_reduction.py b/dawn/test/integration-test/dawn4py-tests/offset_reduction.py index 0a258f03c..a6885ff42 100644 --- a/dawn/test/integration-test/dawn4py-tests/offset_reduction.py +++ b/dawn/test/integration-test/dawn4py-tests/offset_reduction.py @@ -29,7 +29,7 @@ from dawn4py.serialization import utils as sir_utils from google.protobuf.json_format import MessageToJson, Parse -OUTPUT_NAME = "unstructured_stencil" +OUTPUT_NAME = "offset_reduction" OUTPUT_FILE = f"{OUTPUT_NAME}.cpp" OUTPUT_PATH = f"{OUTPUT_NAME}.cpp" @@ -114,7 +114,7 @@ def main(args: argparse.Namespace): ) # print the SIR - f = open("unstructured_stencil.sir", "w") + f = open("offset_reduction.sir", "w") f.write(MessageToJson(sir)) f.close() From 311c440ead9352e304284a23526d7eca5936e6c7 Mon Sep 17 00:00:00 2001 From: Matthias Roethlin Date: Wed, 10 Feb 2021 10:59:08 +0100 Subject: [PATCH 06/13] small improvements, fix stupid bug when constructing offset dequeue --- dawn/src/dawn/AST/ASTExpr.cpp | 2 +- dawn/src/dawn/AST/ASTExpr.h | 4 ++-- dawn/src/dawn/CodeGen/CXXNaive-ico/ASTStencilBody.cpp | 2 +- dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp | 3 ++- dawn/test/integration-test/dawn4py-tests/offset_reduction.py | 2 +- 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/dawn/src/dawn/AST/ASTExpr.cpp b/dawn/src/dawn/AST/ASTExpr.cpp index f55657b8f..caf13b1bc 100644 --- a/dawn/src/dawn/AST/ASTExpr.cpp +++ b/dawn/src/dawn/AST/ASTExpr.cpp @@ -517,7 +517,7 @@ bool ReductionOverNeighborExpr::equals(const Expr* other, bool compareData) cons return otherPtr && otherPtr->getInit()->equals(getInit().get(), compareData) && otherPtr->getOp() == getOp() && otherPtr->getRhs()->equals(getRhs().get(), compareData) && - otherPtr->iterSpace_ == iterSpace_; + otherPtr->iterSpace_ == iterSpace_ && offsets_ == otherPtr->offsets_; } } // namespace ast diff --git a/dawn/src/dawn/AST/ASTExpr.h b/dawn/src/dawn/AST/ASTExpr.h index 11987b8bb..bc61218a6 100644 --- a/dawn/src/dawn/AST/ASTExpr.h +++ b/dawn/src/dawn/AST/ASTExpr.h @@ -647,7 +647,7 @@ class ReductionOverNeighborExpr : public Expr { // hold a copy of the (shared pointer to) the weights std::vector> operands_ = std::vector>(2); bool chainIsValid() const; - std::vector offsets_; + std::vector offsets_ = {}; public: /// @name Constructor & Destructor @@ -673,7 +673,7 @@ class ReductionOverNeighborExpr : public Expr { std::vector getNbhChain() const { return iterSpace_; }; ast::LocationType getLhsLocation() const { return iterSpace_.Chain.front(); }; const std::optional>>& getWeights() const { return weights_; }; - const std::vector getOffsets() const { return offsets_; }; + const std::vector& getOffsets() const { return offsets_; }; bool getIncludeCenter() const { return iterSpace_.IncludeCenter; }; ast::UnstructuredIterationSpace getIterSpace() const { return iterSpace_; } diff --git a/dawn/src/dawn/CodeGen/CXXNaive-ico/ASTStencilBody.cpp b/dawn/src/dawn/CodeGen/CXXNaive-ico/ASTStencilBody.cpp index 045c28f4e..05461912b 100644 --- a/dawn/src/dawn/CodeGen/CXXNaive-ico/ASTStencilBody.cpp +++ b/dawn/src/dawn/CodeGen/CXXNaive-ico/ASTStencilBody.cpp @@ -427,7 +427,7 @@ void ASTStencilBody::visit(const std::shared_ptr ss_ << "return lhs;\n"; ss_ << "}"; if(!expr->getOffsets().empty()) { - offsets_ = std::deque(std::begin(expr->getOffsets()), std::end(expr->getOffsets())); + offsets_ = std::deque(expr->getOffsets().begin(), expr->getOffsets().end()); } if(hasWeights) { auto weights = expr->getWeights().value(); diff --git a/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp b/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp index dcea77296..323db2301 100644 --- a/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp +++ b/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp @@ -230,8 +230,9 @@ void ASTStencilBody::visit(const std::shared_ptr expr->getInit()->accept(*this); ss_ << ";\n"; auto weights = expr->getWeights(); + if(!expr->getOffsets().empty()) { - offsets_ = std::deque(std::begin(expr->getOffsets()), std::end(expr->getOffsets())); + offsets_ = std::deque(expr->getOffsets().begin(), expr->getOffsets().end()); } if(weights.has_value()) { ss_ << "::dawn::float_type " << weights_name << "[" << weights->size() << "] = {"; diff --git a/dawn/test/integration-test/dawn4py-tests/offset_reduction.py b/dawn/test/integration-test/dawn4py-tests/offset_reduction.py index a6885ff42..a59c74648 100644 --- a/dawn/test/integration-test/dawn4py-tests/offset_reduction.py +++ b/dawn/test/integration-test/dawn4py-tests/offset_reduction.py @@ -119,7 +119,7 @@ def main(args: argparse.Namespace): f.close() # compile - code = dawn4py.compile(sir, backend=dawn4py.CodeGenBackend.CXXNaiveIco) + code = dawn4py.compile(sir, backend=dawn4py.CodeGenBackend.CUDAIco) # write to file print(f"Writing generated code to '{OUTPUT_PATH}'") From 695687fcd94d265b4401dc6cc6847bfac864759f Mon Sep 17 00:00:00 2001 From: mroethlin Date: Mon, 22 Feb 2021 12:18:55 +0100 Subject: [PATCH 07/13] handling the case where lower AND upper level is end --- dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp b/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp index f9b5c4ff2..a6d46a59a 100644 --- a/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp +++ b/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp @@ -282,7 +282,10 @@ void CudaIcoCodeGen::generateRunFun( DAWN_ASSERT_MSG(intervalsConsistent(*stage), "intervals in a stage must have same Levels for now!\n"); auto interval = stage->getChild(0)->getInterval(); - if(interval.levelIsEnd(iir::Interval::Bound::upper)) { + printf("%d %d %d %d\n", interval.lowerLevel(), interval.lowerOffset(), interval.upperLevel(), interval.upperOffset()); + if(interval.levelIsEnd(iir::Interval::Bound::upper) && interval.levelIsEnd(iir::Interval::Bound::lower)) { + k_size << (interval.upperOffset() - interval.lowerOffset()); + } else if(interval.levelIsEnd(iir::Interval::Bound::upper)) { k_size << "kSize_ + " << interval.upperOffset() << " - " << (interval.lowerOffset() + interval.lowerLevel()); } else { From 0b910bb9539944e5e5369adce2043f38eacb78b6 Mon Sep 17 00:00:00 2001 From: mroethlin Date: Fri, 26 Feb 2021 11:57:56 +0100 Subject: [PATCH 08/13] sparse full field indexing bug fixed --- dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp | 6 ++---- dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp | 3 +-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp b/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp index a5e445c3e..f1abf03a2 100644 --- a/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp +++ b/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp @@ -126,10 +126,8 @@ std::string ASTStencilBody::makeIndexString(const std::shared_ptrgetChild(0)->getInterval(); - printf("%d %d %d %d\n", interval.lowerLevel(), interval.lowerOffset(), interval.upperLevel(), interval.upperOffset()); + auto interval = stage->getChild(0)->getInterval(); if(interval.levelIsEnd(iir::Interval::Bound::upper) && interval.levelIsEnd(iir::Interval::Bound::lower)) { k_size << (interval.upperOffset() - interval.lowerOffset()); } else if(interval.levelIsEnd(iir::Interval::Bound::upper)) { From 52e407b81cb3a5db83c6b65d94fce20df9d8dfea Mon Sep 17 00:00:00 2001 From: mroethlin Date: Fri, 5 Mar 2021 11:15:29 +0100 Subject: [PATCH 09/13] only check errors in debug build --- dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp b/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp index aee42b0c3..a54987a6f 100644 --- a/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp +++ b/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp @@ -422,8 +422,10 @@ void CudaIcoCodeGen::generateRunFun( } kernelCall << ")"; runFun.addStatement(kernelCall.str()); + runFun.addPreprocessorDirective("ifndef NDEBUG\n"); runFun.addStatement("gpuErrchk(cudaPeekAtLastError())"); runFun.addStatement("gpuErrchk(cudaDeviceSynchronize())"); + runFun.addPreprocessorDirective("endif\n"); } } From d1263a1f2b084b12f01e883239b47426bff783f3 Mon Sep 17 00:00:00 2001 From: Matthias Roethlin Date: Tue, 16 Mar 2021 16:44:35 +0100 Subject: [PATCH 10/13] fix bug in cudaico-codegen with offset reductions --- dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp b/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp index fa5f74b89..810d4fd99 100644 --- a/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp +++ b/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp @@ -137,7 +137,7 @@ std::string ASTStencilBody::makeIndexString(const std::shared_ptr Date: Tue, 16 Mar 2021 17:08:31 +0100 Subject: [PATCH 11/13] fixed a bug in the space collector for offsetReductions --- dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp b/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp index ddd568cc9..33bf76b63 100644 --- a/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp +++ b/dawn/src/dawn/CodeGen/Cuda-ico/CudaIcoCodeGen.cpp @@ -200,6 +200,9 @@ class CollectIterationSpaces : public ast::ASTVisitorForwardingNonConst { } void visit(const std::shared_ptr& expr) override { + if(metadata_.getFieldDimensions(iir::getAccessID(expr)).isVertical()) { + return; + } auto unstrDim = ast::dimension_cast( metadata_.getFieldDimensions(iir::getAccessID(expr)).getHorizontalFieldDimension()); if(unstrDim.isSparse()) { @@ -299,8 +302,9 @@ void CudaIcoCodeGen::generateRunFun( std::stringstream k_size; DAWN_ASSERT_MSG(intervalsConsistent(*stage), "intervals in a stage must have same Levels for now!\n"); - auto interval = stage->getChild(0)->getInterval(); - if(interval.levelIsEnd(iir::Interval::Bound::upper) && interval.levelIsEnd(iir::Interval::Bound::lower)) { + auto interval = stage->getChild(0)->getInterval(); + if(interval.levelIsEnd(iir::Interval::Bound::upper) && + interval.levelIsEnd(iir::Interval::Bound::lower)) { k_size << (interval.upperOffset() - interval.lowerOffset()); } else if(interval.levelIsEnd(iir::Interval::Bound::upper)) { k_size << "kSize_ + " << interval.upperOffset() << " - " From 9a3a80faaaff9eafc49fc5dfcd42ebe1cbb6b392 Mon Sep 17 00:00:00 2001 From: Matthias Roethlin Date: Wed, 17 Mar 2021 11:26:24 +0100 Subject: [PATCH 12/13] fix code generation for indices, again. add integration test --- .../CodeGen/CXXNaive-ico/ASTStencilBody.cpp | 10 +- .../dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp | 24 +- .../dawn4py-tests/CMakeLists.txt | 2 + .../data/offset_reduction_cpp_ref.cpp | 114 +++++++ .../data/offset_reduction_cuda_ref.cpp | 286 ++++++++++++++++++ .../dawn4py-tests/offset_reduction_cpp.py | 169 +++++++++++ ..._reduction.py => offset_reduction_cuda.py} | 65 ++-- 7 files changed, 630 insertions(+), 40 deletions(-) create mode 100644 dawn/test/integration-test/dawn4py-tests/data/offset_reduction_cpp_ref.cpp create mode 100644 dawn/test/integration-test/dawn4py-tests/data/offset_reduction_cuda_ref.cpp create mode 100644 dawn/test/integration-test/dawn4py-tests/offset_reduction_cpp.py rename dawn/test/integration-test/dawn4py-tests/{offset_reduction.py => offset_reduction_cuda.py} (61%) diff --git a/dawn/src/dawn/CodeGen/CXXNaive-ico/ASTStencilBody.cpp b/dawn/src/dawn/CodeGen/CXXNaive-ico/ASTStencilBody.cpp index 487cc8427..213ca6443 100644 --- a/dawn/src/dawn/CodeGen/CXXNaive-ico/ASTStencilBody.cpp +++ b/dawn/src/dawn/CodeGen/CXXNaive-ico/ASTStencilBody.cpp @@ -221,10 +221,6 @@ std::string ASTStencilBody::makeIndexString(const std::shared_ptrfront()); - offsets_->pop_front(); - } return "deref(LibTag{}, " + resArgName + "), " + kiterStr; } @@ -257,10 +253,6 @@ std::string ASTStencilBody::makeIndexString(const std::shared_ptrfront()); - offsets_->pop_front(); - } return "deref(LibTag{}, " + resArgName + ")"; } @@ -271,7 +263,7 @@ std::string ASTStencilBody::makeIndexString(const std::shared_ptrfront()); + sparseIdx = std::to_string(offsets_->front()); offsets_->pop_front(); } return "deref(LibTag{}, " + sparseArgName_ + ")," + sparseIdx; diff --git a/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp b/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp index 810d4fd99..87e44c2d5 100644 --- a/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp +++ b/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.cpp @@ -115,20 +115,11 @@ std::string ASTStencilBody::makeIndexString(const std::shared_ptrfront()); - nbhIter = std::to_string(offsets_->front()); - offsets_->pop_front(); - } - if(isFullField && isDense) { if((parentIsReduction_ || parentIsForLoop_) && ast::offset_cast(expr->getOffset().horizontalOffset()) .hasOffset()) { - return kiterStr + "*" + denseSize + " + " + nbhIdx; + return kiterStr + "*" + denseSize + "+ nbhIdx"; } else { return kiterStr + "*" + denseSize + "+ pidx"; } @@ -137,6 +128,11 @@ std::string ASTStencilBody::makeIndexString(const std::shared_ptrfront()); + offsets_->pop_front(); + } return nbhIter + " * kSize * " + denseSize + " + " + kiterStr + "*" + denseSize + " + pidx"; } @@ -144,7 +140,7 @@ std::string ASTStencilBody::makeIndexString(const std::shared_ptr(expr->getOffset().horizontalOffset()) .hasOffset()) { - return nbhIdx; + return "nbhIdx"; } else { return "pidx"; } @@ -153,7 +149,11 @@ std::string ASTStencilBody::makeIndexString(const std::shared_ptrfront()); + offsets_->pop_front(); + } return nbhIter + " * " + denseSize + " + pidx"; } diff --git a/dawn/test/integration-test/dawn4py-tests/CMakeLists.txt b/dawn/test/integration-test/dawn4py-tests/CMakeLists.txt index 480e301a8..ce8993a42 100644 --- a/dawn/test/integration-test/dawn4py-tests/CMakeLists.txt +++ b/dawn/test/integration-test/dawn4py-tests/CMakeLists.txt @@ -56,3 +56,5 @@ add_python_example(NAME vertical_indirection) add_python_example(NAME global_var) add_python_example(NAME global_var_unstructured) add_python_example(NAME ICON_gradient) +add_python_example(NAME offset_reduction_cpp VERIFY) +add_python_example(NAME offset_reduction_cuda VERIFY) diff --git a/dawn/test/integration-test/dawn4py-tests/data/offset_reduction_cpp_ref.cpp b/dawn/test/integration-test/dawn4py-tests/data/offset_reduction_cpp_ref.cpp new file mode 100644 index 000000000..db9de35f1 --- /dev/null +++ b/dawn/test/integration-test/dawn4py-tests/data/offset_reduction_cpp_ref.cpp @@ -0,0 +1,114 @@ +#define DAWN_GENERATED 1 +#undef DAWN_BACKEND_T +#define DAWN_BACKEND_T CXXNAIVEICO +#define GRIDTOOLS_DAWN_NO_INCLUDE +#include +#include +#include + +namespace dawn_generated { +namespace cxxnaiveico { +template +class offset_reduction_cpp { +private: + struct stencil_34 { + ::dawn::mesh_t const& m_mesh; + int m_k_size; + ::dawn::edge_field_t& m_out_vn_e; + ::dawn::sparse_edge_field_t& m_raw_diam_coeff; + ::dawn::edge_field_t& m_prism_thick_e; + ::dawn::sparse_edge_field_t& m_e2c_aux; + ::dawn::sparse_edge_field_t& m_e2c_aux_h; + ::dawn::unstructured_domain m_unstructured_domain; + + public: + stencil_34(::dawn::mesh_t const& mesh, int k_size, + ::dawn::edge_field_t& out_vn_e, + ::dawn::sparse_edge_field_t& raw_diam_coeff, + ::dawn::edge_field_t& prism_thick_e, + ::dawn::sparse_edge_field_t& e2c_aux, + ::dawn::sparse_edge_field_t& e2c_aux_h) + : m_mesh(mesh), m_k_size(k_size), m_out_vn_e(out_vn_e), m_raw_diam_coeff(raw_diam_coeff), + m_prism_thick_e(prism_thick_e), m_e2c_aux(e2c_aux), m_e2c_aux_h(e2c_aux_h) {} + + ~stencil_34() {} + + void sync_storages() {} + static constexpr ::dawn::driver::unstructured_extent out_vn_e_extent = {false, 0, 0}; + static constexpr ::dawn::driver::unstructured_extent raw_diam_coeff_extent = {true, 0, 0}; + static constexpr ::dawn::driver::unstructured_extent prism_thick_e_extent = {true, 0, 0}; + static constexpr ::dawn::driver::unstructured_extent e2c_aux_extent = {true, 0, 0}; + static constexpr ::dawn::driver::unstructured_extent e2c_aux_h_extent = {true, 0, 0}; + + void run() { + using ::dawn::deref; + { + for(int k = 0 + 0; k <= (m_k_size == 0 ? 0 : (m_k_size - 1)) + 0 + 0; ++k) { + for(auto const& loc : getEdges(LibTag{}, m_mesh)) { + m_out_vn_e(deref(LibTag{}, loc), (k + 0)) = reduce( + LibTag{}, m_mesh, loc, (::dawn::float_type).0, + std::vector<::dawn::LocationType>{::dawn::LocationType::Edges, + ::dawn::LocationType::Cells, + ::dawn::LocationType::Edges}, + [&, sparse_dimension_idx0 = int(0)](auto& lhs, auto red_loc1, + auto const& weight) mutable { + lhs += weight * + (m_raw_diam_coeff(deref(LibTag{}, loc), sparse_dimension_idx0, (k + 0)) * + m_prism_thick_e(deref(LibTag{}, red_loc1), (k + 0))); + sparse_dimension_idx0++; + return lhs; + }, + std::vector<::dawn::float_type>({m_e2c_aux(deref(LibTag{}, loc), 0, (k + 0)), + m_e2c_aux(deref(LibTag{}, loc), 0, (k + 0)), + m_e2c_aux(deref(LibTag{}, loc), 1, (k + 0)), + m_e2c_aux(deref(LibTag{}, loc), 1, (k + 0))})); + m_out_vn_e(deref(LibTag{}, loc), (k + 0)) = reduce( + LibTag{}, m_mesh, loc, (::dawn::float_type).0, + std::vector<::dawn::LocationType>{::dawn::LocationType::Edges, + ::dawn::LocationType::Cells, + ::dawn::LocationType::Edges}, + [&, sparse_dimension_idx0 = int(0)](auto& lhs, auto red_loc1, + auto const& weight) mutable { + lhs += weight * + (m_raw_diam_coeff(deref(LibTag{}, loc), sparse_dimension_idx0, (k + 0)) * + m_prism_thick_e(deref(LibTag{}, red_loc1), (k + 0))); + sparse_dimension_idx0++; + return lhs; + }, + std::vector<::dawn::float_type>( + {m_e2c_aux_h(deref(LibTag{}, loc), 0), m_e2c_aux_h(deref(LibTag{}, loc), 0), + m_e2c_aux_h(deref(LibTag{}, loc), 1), m_e2c_aux_h(deref(LibTag{}, loc), 1)})); + } + } + } + sync_storages(); + } + }; + static constexpr const char* s_name = "offset_reduction_cpp"; + stencil_34 m_stencil_34; + +public: + offset_reduction_cpp(const offset_reduction_cpp&) = delete; + + // Members + + void set_splitter_index(::dawn::LocationType loc, ::dawn::UnstructuredSubdomain subdomain, + int offset, int index) { + m_stencil_34.m_unstructured_domain.set_splitter_index({loc, subdomain, offset}, index); + } + + offset_reduction_cpp(const ::dawn::mesh_t& mesh, int k_size, + ::dawn::edge_field_t& out_vn_e, + ::dawn::sparse_edge_field_t& raw_diam_coeff, + ::dawn::edge_field_t& prism_thick_e, + ::dawn::sparse_edge_field_t& e2c_aux, + ::dawn::sparse_edge_field_t& e2c_aux_h) + : m_stencil_34(mesh, k_size, out_vn_e, raw_diam_coeff, prism_thick_e, e2c_aux, e2c_aux_h) {} + + void run() { + m_stencil_34.run(); + ; + } +}; +} // namespace cxxnaiveico +} // namespace dawn_generated diff --git a/dawn/test/integration-test/dawn4py-tests/data/offset_reduction_cuda_ref.cpp b/dawn/test/integration-test/dawn4py-tests/data/offset_reduction_cuda_ref.cpp new file mode 100644 index 000000000..bb1aa64ed --- /dev/null +++ b/dawn/test/integration-test/dawn4py-tests/data/offset_reduction_cuda_ref.cpp @@ -0,0 +1,286 @@ +#include "driver-includes/unstructured_interface.hpp" +#include "driver-includes/unstructured_domain.hpp" +#include "driver-includes/defs.hpp" +#include "driver-includes/cuda_utils.hpp" +#include "driver-includes/cuda_verify.hpp" +#include "driver-includes/to_vtk.h" +#define GRIDTOOLS_DAWN_NO_INCLUDE +#include "driver-includes/math.hpp" +#include "driver-includes/timer_cuda.hpp" +#include +#define BLOCK_SIZE 16 +#define LEVELS_PER_THREAD 1 +using namespace gridtools::dawn; + +namespace dawn_generated { +namespace cuda_ico { +template +__global__ void offset_reduction_cuda_stencil34_ms47_s52_kernel( + int EdgeStride, int kSize, int hSize, const int* ecTable, const int* eceTable, + ::dawn::float_type* __restrict__ out_vn_e, + const ::dawn::float_type* __restrict__ raw_diam_coeff, + const ::dawn::float_type* __restrict__ prism_thick_e, + const ::dawn::float_type* __restrict__ e2c_aux, + const ::dawn::float_type* __restrict__ e2c_aux_h) { + unsigned int pidx = blockIdx.x * blockDim.x + threadIdx.x; + unsigned int kidx = blockIdx.y * blockDim.y + threadIdx.y; + int klo = kidx * LEVELS_PER_THREAD + 0; + int khi = (kidx + 1) * LEVELS_PER_THREAD + 0; + if(pidx >= hSize) { + return; + } + for(int kIter = klo; kIter < khi; kIter++) { + if(kIter >= kSize + 0) { + return; + } + ::dawn::float_type lhs_36 = (::dawn::float_type).0; + ::dawn::float_type weights_36[4] = { + e2c_aux[0 * kSize * EdgeStride + (kIter + 0) * EdgeStride + pidx], + e2c_aux[0 * kSize * EdgeStride + (kIter + 0) * EdgeStride + pidx], + e2c_aux[1 * kSize * EdgeStride + (kIter + 0) * EdgeStride + pidx], + e2c_aux[1 * kSize * EdgeStride + (kIter + 0) * EdgeStride + pidx]}; + for(int nbhIter = 0; nbhIter < E_C_E_SIZE; nbhIter++) { + int nbhIdx = eceTable[pidx * E_C_E_SIZE + nbhIter]; + if(nbhIdx == DEVICE_MISSING_VALUE) { + continue; + } + lhs_36 += weights_36[nbhIter] * + (raw_diam_coeff[nbhIter * kSize * EdgeStride + (kIter + 0) * EdgeStride + pidx] * + prism_thick_e[(kIter + 0) * EdgeStride + nbhIdx]); + } + out_vn_e[(kIter + 0) * EdgeStride + pidx] = lhs_36; + ::dawn::float_type lhs_40 = (::dawn::float_type).0; + ::dawn::float_type weights_40[4] = { + e2c_aux_h[0 * EdgeStride + pidx], e2c_aux_h[0 * EdgeStride + pidx], + e2c_aux_h[1 * EdgeStride + pidx], e2c_aux_h[1 * EdgeStride + pidx]}; + for(int nbhIter = 0; nbhIter < E_C_E_SIZE; nbhIter++) { + int nbhIdx = eceTable[pidx * E_C_E_SIZE + nbhIter]; + if(nbhIdx == DEVICE_MISSING_VALUE) { + continue; + } + lhs_40 += weights_40[nbhIter] * + (raw_diam_coeff[nbhIter * kSize * EdgeStride + (kIter + 0) * EdgeStride + pidx] * + prism_thick_e[(kIter + 0) * EdgeStride + nbhIdx]); + } + out_vn_e[(kIter + 0) * EdgeStride + pidx] = lhs_40; + } +} + +class offset_reduction_cuda { +public: + static const int E_C_E_SIZE = 4; + static const int E_C_SIZE = 2; + + struct sbase : public timer_cuda { + + sbase(std::string name) : timer_cuda(name) {} + + double get_time() { return total_time(); } + }; + + struct GpuTriMesh { + int NumVertices; + int NumEdges; + int NumCells; + dawn::unstructured_domain Domain; + int* eceTable; + int* ecTable; + + GpuTriMesh() {} + + GpuTriMesh(const dawn::GlobalGpuTriMesh* mesh) { + NumVertices = mesh->NumVertices; + NumCells = mesh->NumCells; + NumEdges = mesh->NumEdges; + Domain = mesh->Domain; + eceTable = mesh->NeighborTables.at(std::tuple, bool>{ + {dawn::LocationType::Edges, dawn::LocationType::Cells, dawn::LocationType::Edges}, 0}); + ecTable = mesh->NeighborTables.at(std::tuple, bool>{ + {dawn::LocationType::Edges, dawn::LocationType::Cells}, 0}); + } + }; + + struct stencil_34 : public sbase { + private: + ::dawn::float_type* out_vn_e_; + ::dawn::float_type* raw_diam_coeff_; + ::dawn::float_type* prism_thick_e_; + ::dawn::float_type* e2c_aux_; + ::dawn::float_type* e2c_aux_h_; + static int kSize_; + static GpuTriMesh mesh_; + static bool is_setup_; + + public: + static const GpuTriMesh& getMesh() { return mesh_; } + + static int getKSize() { return kSize_; } + + static void free() {} + + static void setup(const dawn::GlobalGpuTriMesh* mesh, int kSize) { + mesh_ = GpuTriMesh(mesh); + kSize_ = kSize; + is_setup_ = true; + } + + dim3 grid(int kSize, int elSize) { + int dK = (kSize + LEVELS_PER_THREAD - 1) / LEVELS_PER_THREAD; + return dim3((elSize + BLOCK_SIZE - 1) / BLOCK_SIZE, (dK + BLOCK_SIZE - 1) / BLOCK_SIZE, 1); + } + + stencil_34() : sbase("stencil_34") {} + + void run() { + if(!is_setup_) { + printf( + "offset_reduction_cuda has not been set up! make sure setup() is called before run!\n"); + return; + } + dim3 dB(BLOCK_SIZE, BLOCK_SIZE, 1); + sbase::start(); + int hsize52 = mesh_.NumEdges; + dim3 dG52 = grid(kSize_ + 0 - 0, hsize52); + offset_reduction_cuda_stencil34_ms47_s52_kernel + <<>>(mesh_.NumEdges + 0, kSize_, hsize52, mesh_.ecTable, mesh_.eceTable, + out_vn_e_, raw_diam_coeff_, prism_thick_e_, e2c_aux_, e2c_aux_h_); +#ifndef NDEBUG + + gpuErrchk(cudaPeekAtLastError()); + gpuErrchk(cudaDeviceSynchronize()); +#endif + + sbase::pause(); + } + + void CopyResultToHost(::dawn::float_type* out_vn_e, bool do_reshape) { + if(do_reshape) { + ::dawn::float_type* host_buf = new ::dawn::float_type[(mesh_.NumEdges + 0) * kSize_]; + gpuErrchk(cudaMemcpy((::dawn::float_type*)host_buf, out_vn_e_, + (mesh_.NumEdges + 0) * kSize_ * sizeof(::dawn::float_type), + cudaMemcpyDeviceToHost)); + dawn::reshape_back(host_buf, out_vn_e, kSize_, mesh_.NumEdges + 0); + delete[] host_buf; + } else { + gpuErrchk(cudaMemcpy(out_vn_e, out_vn_e_, + (mesh_.NumEdges + 0) * kSize_ * sizeof(::dawn::float_type), + cudaMemcpyDeviceToHost)); + } + } + + void copy_memory(::dawn::float_type* out_vn_e, ::dawn::float_type* raw_diam_coeff, + ::dawn::float_type* prism_thick_e, ::dawn::float_type* e2c_aux, + ::dawn::float_type* e2c_aux_h, bool do_reshape) { + dawn::initField(out_vn_e, &out_vn_e_, mesh_.NumEdges + 0, kSize_, do_reshape); + dawn::initSparseField(raw_diam_coeff, &raw_diam_coeff_, mesh_.NumEdges + 0, E_C_E_SIZE, + kSize_, do_reshape); + dawn::initField(prism_thick_e, &prism_thick_e_, mesh_.NumEdges + 0, kSize_, do_reshape); + dawn::initSparseField(e2c_aux, &e2c_aux_, mesh_.NumEdges + 0, E_C_SIZE, kSize_, do_reshape); + dawn::initSparseField(e2c_aux_h, &e2c_aux_h_, mesh_.NumEdges + 0, E_C_SIZE, 1, do_reshape); + } + + void copy_pointers(::dawn::float_type* out_vn_e, ::dawn::float_type* raw_diam_coeff, + ::dawn::float_type* prism_thick_e, ::dawn::float_type* e2c_aux, + ::dawn::float_type* e2c_aux_h) { + out_vn_e_ = out_vn_e; + raw_diam_coeff_ = raw_diam_coeff; + prism_thick_e_ = prism_thick_e; + e2c_aux_ = e2c_aux; + e2c_aux_h_ = e2c_aux_h; + } + }; +}; +} // namespace cuda_ico +} // namespace dawn_generated +extern "C" { +double run_offset_reduction_cuda_from_c_host(dawn::GlobalGpuTriMesh* mesh, int k_size, + ::dawn::float_type* out_vn_e, + ::dawn::float_type* raw_diam_coeff, + ::dawn::float_type* prism_thick_e, + ::dawn::float_type* e2c_aux, + ::dawn::float_type* e2c_aux_h) { + dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34 s; + dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34::setup(mesh, k_size); + s.copy_memory(out_vn_e, raw_diam_coeff, prism_thick_e, e2c_aux, e2c_aux_h, true); + s.run(); + double time = s.get_time(); + s.reset(); + s.CopyResultToHost(out_vn_e, true); + dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34::free(); + return time; +} +double run_offset_reduction_cuda_from_fort_host(dawn::GlobalGpuTriMesh* mesh, int k_size, + ::dawn::float_type* out_vn_e, + ::dawn::float_type* raw_diam_coeff, + ::dawn::float_type* prism_thick_e, + ::dawn::float_type* e2c_aux, + ::dawn::float_type* e2c_aux_h) { + dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34 s; + dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34::setup(mesh, k_size); + s.copy_memory(out_vn_e, raw_diam_coeff, prism_thick_e, e2c_aux, e2c_aux_h, false); + s.run(); + double time = s.get_time(); + s.reset(); + s.CopyResultToHost(out_vn_e, false); + dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34::free(); + return time; +} +double run_offset_reduction_cuda(::dawn::float_type* out_vn_e, ::dawn::float_type* raw_diam_coeff, + ::dawn::float_type* prism_thick_e, ::dawn::float_type* e2c_aux, + ::dawn::float_type* e2c_aux_h) { + dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34 s; + s.copy_pointers(out_vn_e, raw_diam_coeff, prism_thick_e, e2c_aux, e2c_aux_h); + s.run(); + double time = s.get_time(); + s.reset(); + return time; +} +bool verify_offset_reduction_cuda(const ::dawn::float_type* out_vn_e_dsl, + const ::dawn::float_type* out_vn_e, const int iteration, + const double rel_err_threshold) { + using namespace std::chrono; + const auto& mesh = dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34::getMesh(); + int kSize = dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34::getKSize(); + high_resolution_clock::time_point t_start = high_resolution_clock::now(); + bool isValid = true; + double relErr; + relErr = ::dawn::verify_field((mesh.NumEdges + 0) * kSize, out_vn_e_dsl, out_vn_e, "out_vn_e"); + if(relErr > rel_err_threshold) { + isValid = false; +#ifdef __SERIALIZE_ON_ERROR + serialize_dense_edges(0, (mesh.NumEdges - 1), kSize, (mesh.NumEdges + 0), out_vn_e, + "offset_reduction_cuda", "out_vn_e", iteration); + serialize_dense_edges(0, (mesh.NumEdges - 1), kSize, (mesh.NumEdges + 0), out_vn_e_dsl, + "offset_reduction_cuda", "out_vn_e_dsl", iteration); + std::cout << "[DSL] serializing out_vn_e as error is high.\n" << std::flush; +#endif + } + high_resolution_clock::time_point t_end = high_resolution_clock::now(); + duration timing = duration_cast>(t_end - t_start); + std::cout << "[DSL] Verification took " << timing.count() << " seconds.\n" << std::flush; + return isValid; +} +void run_and_verify_offset_reduction_cuda( + ::dawn::float_type* out_vn_e, ::dawn::float_type* raw_diam_coeff, + ::dawn::float_type* prism_thick_e, ::dawn::float_type* e2c_aux, ::dawn::float_type* e2c_aux_h, + ::dawn::float_type* out_vn_e_before, const double rel_err_threshold) { + static int iteration = 0; + std::cout << "[DSL] Running stencil offset_reduction_cuda...\n" << std::flush; + double time = + run_offset_reduction_cuda(out_vn_e_before, raw_diam_coeff, prism_thick_e, e2c_aux, e2c_aux_h); + std::cout << "[DSL] offset_reduction_cuda run time: " << time << "s\n" << std::flush; + std::cout << "[DSL] Verifying stencil offset_reduction_cuda...\n" << std::flush; + verify_offset_reduction_cuda(out_vn_e_before, out_vn_e, iteration, rel_err_threshold); + iteration++; +} +void setup_offset_reduction_cuda(dawn::GlobalGpuTriMesh* mesh, int k_size) { + dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34::setup(mesh, k_size); +} +void free_offset_reduction_cuda() { + dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34::free(); +} +} +int dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34::kSize_; +bool dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34::is_setup_ = false; +dawn_generated::cuda_ico::offset_reduction_cuda::GpuTriMesh + dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34::mesh_; diff --git a/dawn/test/integration-test/dawn4py-tests/offset_reduction_cpp.py b/dawn/test/integration-test/dawn4py-tests/offset_reduction_cpp.py new file mode 100644 index 000000000..206ed11e1 --- /dev/null +++ b/dawn/test/integration-test/dawn4py-tests/offset_reduction_cpp.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python + +# ===-----------------------------------------------------------------------------*- Python -*-===## +# _ +# | | +# __| | __ ___ ___ ___ +# / _` |/ _` \ \ /\ / / '_ | +# | (_| | (_| |\ V V /| | | | +# \__,_|\__,_| \_/\_/ |_| |_| - Compiler Toolchain +# +# +# This file is distributed under the MIT License (MIT). +# See LICENSE.txt for details. +# +# ===------------------------------------------------------------------------------------------===## + +"""Copy stencil HIR generator + +This program creates the HIR corresponding to an unstructured stencil using the SIR serialization Python API. +The code is meant as an example for high-level DSLs that could generate HIR from their own +internal IR. +""" + +import argparse +import os + +import dawn4py +from dawn4py.serialization import SIR, AST +from dawn4py.serialization import utils as sir_utils +from google.protobuf.json_format import MessageToJson, Parse + +OUTPUT_NAME = "offset_reduction_cpp" +OUTPUT_FILE = f"{OUTPUT_NAME}.cpp" +OUTPUT_PATH = f"{OUTPUT_NAME}.cpp" + + +def main(args: argparse.Namespace): + interval = sir_utils.make_interval( + AST.Interval.Start, AST.Interval.End, 0, 0) + + # create the out = in[i+1] statement + body_ast = sir_utils.make_ast( + [ + sir_utils.make_assignment_stmt( + sir_utils.make_unstructured_field_access_expr("out_vn_e"), + sir_utils.make_reduction_over_neighbor_expr( + "+", + sir_utils.make_binary_operator( + sir_utils.make_unstructured_field_access_expr( + "raw_diam_coeff", horizontal_offset=sir_utils.make_unstructured_offset(True)), + "*", + sir_utils.make_unstructured_field_access_expr( + "prism_thick_e", horizontal_offset=sir_utils.make_unstructured_offset(True)), + ), + sir_utils.make_literal_access_expr( + ".0", AST.BuiltinType.Float), + chain=[AST.LocationType.Value("Edge"), AST.LocationType.Value("Cell"), AST.LocationType.Value("Edge")], + weights=[sir_utils.make_unstructured_field_access_expr( + "e2c_aux", horizontal_offset=sir_utils.make_unstructured_offset(True)), + sir_utils.make_unstructured_field_access_expr( + "e2c_aux", horizontal_offset=sir_utils.make_unstructured_offset(True)), + sir_utils.make_unstructured_field_access_expr( + "e2c_aux", horizontal_offset=sir_utils.make_unstructured_offset(True)), + sir_utils.make_unstructured_field_access_expr( + "e2c_aux", horizontal_offset=sir_utils.make_unstructured_offset(True))], + offsets=[0, 0, 1, 1] + ), + "=", + ), + sir_utils.make_assignment_stmt( + sir_utils.make_unstructured_field_access_expr("out_vn_e"), + sir_utils.make_reduction_over_neighbor_expr( + "+", + sir_utils.make_binary_operator( + sir_utils.make_unstructured_field_access_expr( + "raw_diam_coeff", horizontal_offset=sir_utils.make_unstructured_offset(True)), + "*", + sir_utils.make_unstructured_field_access_expr( + "prism_thick_e", horizontal_offset=sir_utils.make_unstructured_offset(True)), + ), + sir_utils.make_literal_access_expr( + ".0", AST.BuiltinType.Float), + chain=[AST.LocationType.Value("Edge"), AST.LocationType.Value("Cell"), AST.LocationType.Value("Edge")], + weights=[sir_utils.make_unstructured_field_access_expr( + "e2c_aux_h", horizontal_offset=sir_utils.make_unstructured_offset(True)), + sir_utils.make_unstructured_field_access_expr( + "e2c_aux_h", horizontal_offset=sir_utils.make_unstructured_offset(True)), + sir_utils.make_unstructured_field_access_expr( + "e2c_aux_h", horizontal_offset=sir_utils.make_unstructured_offset(True)), + sir_utils.make_unstructured_field_access_expr( + "e2c_aux_h", horizontal_offset=sir_utils.make_unstructured_offset(True))], + offsets=[0, 0, 1, 1] + ), + "=", + ) + ] + ) + + vertical_region_stmt = sir_utils.make_vertical_region_decl_stmt( + body_ast, interval, AST.VerticalRegion.Forward + ) + + sir = sir_utils.make_sir( + OUTPUT_FILE, + AST.GridType.Value("Unstructured"), + [ + sir_utils.make_stencil( + OUTPUT_NAME, + sir_utils.make_ast([vertical_region_stmt]), + [ + sir_utils.make_field( + "out_vn_e", + sir_utils.make_field_dimensions_unstructured( + [AST.LocationType.Value("Edge")], 1 + ), + ), + sir_utils.make_field( + "raw_diam_coeff", + sir_utils.make_field_dimensions_unstructured( + [AST.LocationType.Value("Edge"), + AST.LocationType.Value("Cell"), + AST.LocationType.Value("Edge")], 1 + ), + ), + sir_utils.make_field( + "prism_thick_e", + sir_utils.make_field_dimensions_unstructured( + [AST.LocationType.Value("Edge")], 1 + ), + ), + sir_utils.make_field( + "e2c_aux", + sir_utils.make_field_dimensions_unstructured( + [AST.LocationType.Value("Edge"), AST.LocationType.Value("Cell")], 1 + ), + ), + sir_utils.make_field( + "e2c_aux_h", + sir_utils.make_field_dimensions_unstructured( + [AST.LocationType.Value("Edge"), AST.LocationType.Value("Cell")], 0 + ), + ), + ], + ), + ], + ) + + # compile + code = dawn4py.compile(sir, backend=dawn4py.CodeGenBackend.CXXNaiveIco) + + # write to file + print(f"Writing generated code to '{OUTPUT_PATH}'") + with open(OUTPUT_PATH, "w") as f: + f.write(code) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Generate a simple unstructured copy stencil using Dawn compiler" + ) + parser.add_argument( + "-v", + "--verbose", + dest="verbose", + action="store_true", + default=False, + help="Print the generated SIR", + ) + main(parser.parse_args()) diff --git a/dawn/test/integration-test/dawn4py-tests/offset_reduction.py b/dawn/test/integration-test/dawn4py-tests/offset_reduction_cuda.py similarity index 61% rename from dawn/test/integration-test/dawn4py-tests/offset_reduction.py rename to dawn/test/integration-test/dawn4py-tests/offset_reduction_cuda.py index a59c74648..2fb3f4a23 100644 --- a/dawn/test/integration-test/dawn4py-tests/offset_reduction.py +++ b/dawn/test/integration-test/dawn4py-tests/offset_reduction_cuda.py @@ -25,18 +25,18 @@ import os import dawn4py -from dawn4py.serialization import SIR +from dawn4py.serialization import SIR, AST from dawn4py.serialization import utils as sir_utils from google.protobuf.json_format import MessageToJson, Parse -OUTPUT_NAME = "offset_reduction" +OUTPUT_NAME = "offset_reduction_cuda" OUTPUT_FILE = f"{OUTPUT_NAME}.cpp" OUTPUT_PATH = f"{OUTPUT_NAME}.cpp" def main(args: argparse.Namespace): interval = sir_utils.make_interval( - SIR.Interval.Start, SIR.Interval.End, 0, 0) + AST.Interval.Start, AST.Interval.End, 0, 0) # create the out = in[i+1] statement body_ast = sir_utils.make_ast( @@ -53,8 +53,8 @@ def main(args: argparse.Namespace): "prism_thick_e", horizontal_offset=sir_utils.make_unstructured_offset(True)), ), sir_utils.make_literal_access_expr( - ".0", SIR.BuiltinType.Float), - chain=[SIR.LocationType.Value("Edge"), SIR.LocationType.Value("Cell"), SIR.LocationType.Value("Edge")], + ".0", AST.BuiltinType.Float), + chain=[AST.LocationType.Value("Edge"), AST.LocationType.Value("Cell"), AST.LocationType.Value("Edge")], weights=[sir_utils.make_unstructured_field_access_expr( "e2c_aux", horizontal_offset=sir_utils.make_unstructured_offset(True)), sir_utils.make_unstructured_field_access_expr( @@ -66,17 +66,43 @@ def main(args: argparse.Namespace): offsets=[0, 0, 1, 1] ), "=", + ), + sir_utils.make_assignment_stmt( + sir_utils.make_unstructured_field_access_expr("out_vn_e"), + sir_utils.make_reduction_over_neighbor_expr( + "+", + sir_utils.make_binary_operator( + sir_utils.make_unstructured_field_access_expr( + "raw_diam_coeff", horizontal_offset=sir_utils.make_unstructured_offset(True)), + "*", + sir_utils.make_unstructured_field_access_expr( + "prism_thick_e", horizontal_offset=sir_utils.make_unstructured_offset(True)), + ), + sir_utils.make_literal_access_expr( + ".0", AST.BuiltinType.Float), + chain=[AST.LocationType.Value("Edge"), AST.LocationType.Value("Cell"), AST.LocationType.Value("Edge")], + weights=[sir_utils.make_unstructured_field_access_expr( + "e2c_aux_h", horizontal_offset=sir_utils.make_unstructured_offset(True)), + sir_utils.make_unstructured_field_access_expr( + "e2c_aux_h", horizontal_offset=sir_utils.make_unstructured_offset(True)), + sir_utils.make_unstructured_field_access_expr( + "e2c_aux_h", horizontal_offset=sir_utils.make_unstructured_offset(True)), + sir_utils.make_unstructured_field_access_expr( + "e2c_aux_h", horizontal_offset=sir_utils.make_unstructured_offset(True))], + offsets=[0, 0, 1, 1] + ), + "=", ) ] ) vertical_region_stmt = sir_utils.make_vertical_region_decl_stmt( - body_ast, interval, SIR.VerticalRegion.Forward + body_ast, interval, AST.VerticalRegion.Forward ) sir = sir_utils.make_sir( OUTPUT_FILE, - SIR.GridType.Value("Unstructured"), + AST.GridType.Value("Unstructured"), [ sir_utils.make_stencil( OUTPUT_NAME, @@ -85,38 +111,39 @@ def main(args: argparse.Namespace): sir_utils.make_field( "out_vn_e", sir_utils.make_field_dimensions_unstructured( - [SIR.LocationType.Value("Edge")], 1 + [AST.LocationType.Value("Edge")], 1 ), ), sir_utils.make_field( "raw_diam_coeff", sir_utils.make_field_dimensions_unstructured( - [SIR.LocationType.Value("Edge"), - SIR.LocationType.Value("Cell"), - SIR.LocationType.Value("Edge")], 1 + [AST.LocationType.Value("Edge"), + AST.LocationType.Value("Cell"), + AST.LocationType.Value("Edge")], 1 ), ), sir_utils.make_field( "prism_thick_e", sir_utils.make_field_dimensions_unstructured( - [SIR.LocationType.Value("Edge")], 1 + [AST.LocationType.Value("Edge")], 1 ), ), sir_utils.make_field( "e2c_aux", sir_utils.make_field_dimensions_unstructured( - [SIR.LocationType.Value("Edge"), SIR.LocationType.Value("Cell")], 1 + [AST.LocationType.Value("Edge"), AST.LocationType.Value("Cell")], 1 + ), + ), + sir_utils.make_field( + "e2c_aux_h", + sir_utils.make_field_dimensions_unstructured( + [AST.LocationType.Value("Edge"), AST.LocationType.Value("Cell")], 0 ), ), ], ), ], - ) - - # print the SIR - f = open("offset_reduction.sir", "w") - f.write(MessageToJson(sir)) - f.close() + ) # compile code = dawn4py.compile(sir, backend=dawn4py.CodeGenBackend.CUDAIco) From 8b67de2da990161a5f4bf1216878395f687077e6 Mon Sep 17 00:00:00 2001 From: Matthias Roethlin Date: Wed, 19 May 2021 17:05:25 +0200 Subject: [PATCH 13/13] update ref --- .../dawn/CodeGen/Cuda-ico/ASTStencilBody.h | 3 - .../data/offset_reduction_cpp_ref.cpp | 2 +- .../data/offset_reduction_cuda_ref.cpp | 88 +++++++++++-------- 3 files changed, 50 insertions(+), 43 deletions(-) diff --git a/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.h b/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.h index 5d0f0e9de..9ec7df2f3 100644 --- a/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.h +++ b/dawn/src/dawn/CodeGen/Cuda-ico/ASTStencilBody.h @@ -84,11 +84,8 @@ class ASTStencilBody : public ASTCodeGenCXX { bool parentIsReduction_ = false; int parentReductionID_ = -1; bool parentIsForLoop_ = false; -<<<<<<< HEAD std::optional> offsets_; -======= ->>>>>>> master bool firstPass_ = true; int currentBlock_ = -1; diff --git a/dawn/test/integration-test/dawn4py-tests/data/offset_reduction_cpp_ref.cpp b/dawn/test/integration-test/dawn4py-tests/data/offset_reduction_cpp_ref.cpp index db9de35f1..d9e65e6d2 100644 --- a/dawn/test/integration-test/dawn4py-tests/data/offset_reduction_cpp_ref.cpp +++ b/dawn/test/integration-test/dawn4py-tests/data/offset_reduction_cpp_ref.cpp @@ -43,7 +43,7 @@ class offset_reduction_cpp { void run() { using ::dawn::deref; { - for(int k = 0 + 0; k <= (m_k_size == 0 ? 0 : (m_k_size - 1)) + 0 + 0; ++k) { + for(int k = 0 + 0; k <= (m_k_size == 0 ? 0 : (m_k_size)) + 0 - 1 + 0; ++k) { for(auto const& loc : getEdges(LibTag{}, m_mesh)) { m_out_vn_e(deref(LibTag{}, loc), (k + 0)) = reduce( LibTag{}, m_mesh, loc, (::dawn::float_type).0, diff --git a/dawn/test/integration-test/dawn4py-tests/data/offset_reduction_cuda_ref.cpp b/dawn/test/integration-test/dawn4py-tests/data/offset_reduction_cuda_ref.cpp index bb1aa64ed..9cd6af76e 100644 --- a/dawn/test/integration-test/dawn4py-tests/data/offset_reduction_cuda_ref.cpp +++ b/dawn/test/integration-test/dawn4py-tests/data/offset_reduction_cuda_ref.cpp @@ -8,7 +8,7 @@ #include "driver-includes/math.hpp" #include "driver-includes/timer_cuda.hpp" #include -#define BLOCK_SIZE 16 +#define BLOCK_SIZE 128 #define LEVELS_PER_THREAD 1 using namespace gridtools::dawn; @@ -23,9 +23,8 @@ __global__ void offset_reduction_cuda_stencil34_ms47_s52_kernel( const ::dawn::float_type* __restrict__ e2c_aux, const ::dawn::float_type* __restrict__ e2c_aux_h) { unsigned int pidx = blockIdx.x * blockDim.x + threadIdx.x; - unsigned int kidx = blockIdx.y * blockDim.y + threadIdx.y; - int klo = kidx * LEVELS_PER_THREAD + 0; - int khi = (kidx + 1) * LEVELS_PER_THREAD + 0; + int klo = 0; + int khi = kSize + 0; if(pidx >= hSize) { return; } @@ -41,9 +40,6 @@ __global__ void offset_reduction_cuda_stencil34_ms47_s52_kernel( e2c_aux[1 * kSize * EdgeStride + (kIter + 0) * EdgeStride + pidx]}; for(int nbhIter = 0; nbhIter < E_C_E_SIZE; nbhIter++) { int nbhIdx = eceTable[pidx * E_C_E_SIZE + nbhIter]; - if(nbhIdx == DEVICE_MISSING_VALUE) { - continue; - } lhs_36 += weights_36[nbhIter] * (raw_diam_coeff[nbhIter * kSize * EdgeStride + (kIter + 0) * EdgeStride + pidx] * prism_thick_e[(kIter + 0) * EdgeStride + nbhIdx]); @@ -55,9 +51,6 @@ __global__ void offset_reduction_cuda_stencil34_ms47_s52_kernel( e2c_aux_h[1 * EdgeStride + pidx], e2c_aux_h[1 * EdgeStride + pidx]}; for(int nbhIter = 0; nbhIter < E_C_E_SIZE; nbhIter++) { int nbhIdx = eceTable[pidx * E_C_E_SIZE + nbhIter]; - if(nbhIdx == DEVICE_MISSING_VALUE) { - continue; - } lhs_40 += weights_40[nbhIter] * (raw_diam_coeff[nbhIter * kSize * EdgeStride + (kIter + 0) * EdgeStride + pidx] * prism_thick_e[(kIter + 0) * EdgeStride + nbhIdx]); @@ -82,7 +75,11 @@ class offset_reduction_cuda { int NumVertices; int NumEdges; int NumCells; - dawn::unstructured_domain Domain; + int VertexStride; + int EdgeStride; + int CellStride; + dawn::unstructured_domain DomainLower; + dawn::unstructured_domain DomainUpper; int* eceTable; int* ecTable; @@ -92,7 +89,11 @@ class offset_reduction_cuda { NumVertices = mesh->NumVertices; NumCells = mesh->NumCells; NumEdges = mesh->NumEdges; - Domain = mesh->Domain; + VertexStride = mesh->VertexStride; + CellStride = mesh->CellStride; + EdgeStride = mesh->EdgeStride; + DomainLower = mesh->DomainLower; + DomainUpper = mesh->DomainUpper; eceTable = mesh->NeighborTables.at(std::tuple, bool>{ {dawn::LocationType::Edges, dawn::LocationType::Cells, dawn::LocationType::Edges}, 0}); ecTable = mesh->NeighborTables.at(std::tuple, bool>{ @@ -124,9 +125,13 @@ class offset_reduction_cuda { is_setup_ = true; } - dim3 grid(int kSize, int elSize) { - int dK = (kSize + LEVELS_PER_THREAD - 1) / LEVELS_PER_THREAD; - return dim3((elSize + BLOCK_SIZE - 1) / BLOCK_SIZE, (dK + BLOCK_SIZE - 1) / BLOCK_SIZE, 1); + dim3 grid(int kSize, int elSize, bool kparallel) { + if(kparallel) { + int dK = (kSize + LEVELS_PER_THREAD - 1) / LEVELS_PER_THREAD; + return dim3((elSize + BLOCK_SIZE - 1) / BLOCK_SIZE, dK, 1); + } else { + return dim3((elSize + BLOCK_SIZE - 1) / BLOCK_SIZE, 1, 1); + } } stencil_34() : sbase("stencil_34") {} @@ -137,12 +142,12 @@ class offset_reduction_cuda { "offset_reduction_cuda has not been set up! make sure setup() is called before run!\n"); return; } - dim3 dB(BLOCK_SIZE, BLOCK_SIZE, 1); + dim3 dB(BLOCK_SIZE, 1, 1); sbase::start(); int hsize52 = mesh_.NumEdges; - dim3 dG52 = grid(kSize_ + 0 - 0, hsize52); + dim3 dG52 = grid(kSize_ + 0 - 0, hsize52, false); offset_reduction_cuda_stencil34_ms47_s52_kernel - <<>>(mesh_.NumEdges + 0, kSize_, hsize52, mesh_.ecTable, mesh_.eceTable, + <<>>(mesh_.EdgeStride, kSize_, hsize52, mesh_.ecTable, mesh_.eceTable, out_vn_e_, raw_diam_coeff_, prism_thick_e_, e2c_aux_, e2c_aux_h_); #ifndef NDEBUG @@ -155,15 +160,15 @@ class offset_reduction_cuda { void CopyResultToHost(::dawn::float_type* out_vn_e, bool do_reshape) { if(do_reshape) { - ::dawn::float_type* host_buf = new ::dawn::float_type[(mesh_.NumEdges + 0) * kSize_]; + ::dawn::float_type* host_buf = new ::dawn::float_type[(mesh_.EdgeStride) * kSize_]; gpuErrchk(cudaMemcpy((::dawn::float_type*)host_buf, out_vn_e_, - (mesh_.NumEdges + 0) * kSize_ * sizeof(::dawn::float_type), + (mesh_.EdgeStride) * kSize_ * sizeof(::dawn::float_type), cudaMemcpyDeviceToHost)); - dawn::reshape_back(host_buf, out_vn_e, kSize_, mesh_.NumEdges + 0); + dawn::reshape_back(host_buf, out_vn_e, kSize_, mesh_.EdgeStride); delete[] host_buf; } else { gpuErrchk(cudaMemcpy(out_vn_e, out_vn_e_, - (mesh_.NumEdges + 0) * kSize_ * sizeof(::dawn::float_type), + (mesh_.EdgeStride) * kSize_ * sizeof(::dawn::float_type), cudaMemcpyDeviceToHost)); } } @@ -171,12 +176,12 @@ class offset_reduction_cuda { void copy_memory(::dawn::float_type* out_vn_e, ::dawn::float_type* raw_diam_coeff, ::dawn::float_type* prism_thick_e, ::dawn::float_type* e2c_aux, ::dawn::float_type* e2c_aux_h, bool do_reshape) { - dawn::initField(out_vn_e, &out_vn_e_, mesh_.NumEdges + 0, kSize_, do_reshape); - dawn::initSparseField(raw_diam_coeff, &raw_diam_coeff_, mesh_.NumEdges + 0, E_C_E_SIZE, - kSize_, do_reshape); - dawn::initField(prism_thick_e, &prism_thick_e_, mesh_.NumEdges + 0, kSize_, do_reshape); - dawn::initSparseField(e2c_aux, &e2c_aux_, mesh_.NumEdges + 0, E_C_SIZE, kSize_, do_reshape); - dawn::initSparseField(e2c_aux_h, &e2c_aux_h_, mesh_.NumEdges + 0, E_C_SIZE, 1, do_reshape); + dawn::initField(out_vn_e, &out_vn_e_, mesh_.EdgeStride, kSize_, do_reshape); + dawn::initSparseField(raw_diam_coeff, &raw_diam_coeff_, mesh_.EdgeStride, E_C_E_SIZE, kSize_, + do_reshape); + dawn::initField(prism_thick_e, &prism_thick_e_, mesh_.EdgeStride, kSize_, do_reshape); + dawn::initSparseField(e2c_aux, &e2c_aux_, mesh_.EdgeStride, E_C_SIZE, kSize_, do_reshape); + dawn::initSparseField(e2c_aux_h, &e2c_aux_h_, mesh_.EdgeStride, E_C_SIZE, 1, do_reshape); } void copy_pointers(::dawn::float_type* out_vn_e, ::dawn::float_type* raw_diam_coeff, @@ -236,25 +241,28 @@ double run_offset_reduction_cuda(::dawn::float_type* out_vn_e, ::dawn::float_typ return time; } bool verify_offset_reduction_cuda(const ::dawn::float_type* out_vn_e_dsl, - const ::dawn::float_type* out_vn_e, const int iteration, - const double rel_err_threshold) { + const ::dawn::float_type* out_vn_e, const double out_vn_e_rel_tol, + const double out_vn_e_abs_tol, const int iteration) { using namespace std::chrono; const auto& mesh = dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34::getMesh(); int kSize = dawn_generated::cuda_ico::offset_reduction_cuda::stencil_34::getKSize(); high_resolution_clock::time_point t_start = high_resolution_clock::now(); - bool isValid = true; - double relErr; - relErr = ::dawn::verify_field((mesh.NumEdges + 0) * kSize, out_vn_e_dsl, out_vn_e, "out_vn_e"); - if(relErr > rel_err_threshold) { - isValid = false; + bool isValid; + isValid = ::dawn::verify_field((mesh.EdgeStride) * kSize, out_vn_e_dsl, out_vn_e, "out_vn_e", + out_vn_e_rel_tol, out_vn_e_abs_tol); + if(!isValid) { #ifdef __SERIALIZE_ON_ERROR - serialize_dense_edges(0, (mesh.NumEdges - 1), kSize, (mesh.NumEdges + 0), out_vn_e, + serialize_dense_edges(0, (mesh.NumEdges - 1), kSize, (mesh.EdgeStride), out_vn_e, "offset_reduction_cuda", "out_vn_e", iteration); - serialize_dense_edges(0, (mesh.NumEdges - 1), kSize, (mesh.NumEdges + 0), out_vn_e_dsl, + serialize_dense_edges(0, (mesh.NumEdges - 1), kSize, (mesh.EdgeStride), out_vn_e_dsl, "offset_reduction_cuda", "out_vn_e_dsl", iteration); std::cout << "[DSL] serializing out_vn_e as error is high.\n" << std::flush; #endif } +#ifdef __SERIALIZE_ON_ERROR + + serialize_flush_iter("offset_reduction_cuda", iteration); +#endif high_resolution_clock::time_point t_end = high_resolution_clock::now(); duration timing = duration_cast>(t_end - t_start); std::cout << "[DSL] Verification took " << timing.count() << " seconds.\n" << std::flush; @@ -263,14 +271,16 @@ bool verify_offset_reduction_cuda(const ::dawn::float_type* out_vn_e_dsl, void run_and_verify_offset_reduction_cuda( ::dawn::float_type* out_vn_e, ::dawn::float_type* raw_diam_coeff, ::dawn::float_type* prism_thick_e, ::dawn::float_type* e2c_aux, ::dawn::float_type* e2c_aux_h, - ::dawn::float_type* out_vn_e_before, const double rel_err_threshold) { + ::dawn::float_type* out_vn_e_before, const double out_vn_e_rel_tol, + const double out_vn_e_abs_tol) { static int iteration = 0; std::cout << "[DSL] Running stencil offset_reduction_cuda...\n" << std::flush; double time = run_offset_reduction_cuda(out_vn_e_before, raw_diam_coeff, prism_thick_e, e2c_aux, e2c_aux_h); std::cout << "[DSL] offset_reduction_cuda run time: " << time << "s\n" << std::flush; std::cout << "[DSL] Verifying stencil offset_reduction_cuda...\n" << std::flush; - verify_offset_reduction_cuda(out_vn_e_before, out_vn_e, iteration, rel_err_threshold); + verify_offset_reduction_cuda(out_vn_e_before, out_vn_e, out_vn_e_rel_tol, out_vn_e_abs_tol, + iteration); iteration++; } void setup_offset_reduction_cuda(dawn::GlobalGpuTriMesh* mesh, int k_size) {