From a78fdbd4386229d40ccbd12f7f488fd56cf203ee Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 23 Jun 2020 09:29:01 -0700 Subject: [PATCH 01/66] add stencil DPC++ skeletons --- Cxx11/Makefile | 2 +- Cxx11/stencil-dpcpp.cc | 229 ++++++++++++++++++++++++++++++++ Cxx11/stencil-multigpu-dpcpp.cc | 229 ++++++++++++++++++++++++++++++++ 3 files changed, 459 insertions(+), 1 deletion(-) create mode 100644 Cxx11/stencil-dpcpp.cc create mode 100644 Cxx11/stencil-multigpu-dpcpp.cc diff --git a/Cxx11/Makefile b/Cxx11/Makefile index f0624da81..6c0e08e3e 100644 --- a/Cxx11/Makefile +++ b/Cxx11/Makefile @@ -97,7 +97,7 @@ sycl-usm: nstream-sycl-usm nstream-sycl-explicit-usm stencil-sycl-usm transpose- sycl-explicit: nstream-sycl-explicit transpose-sycl-explicit -dpcpp: nstream-dpcpp nstream-multigpu-dpcpp transpose-dpcpp +dpcpp: nstream-dpcpp nstream-multigpu-dpcpp stencil-dpcpp stencil-multigpu-dpcpp transpose-dpcpp tbb: p2p-innerloop-tbb p2p-tbb stencil-tbb transpose-tbb nstream-tbb \ p2p-hyperplane-tbb p2p-tasks-tbb diff --git a/Cxx11/stencil-dpcpp.cc b/Cxx11/stencil-dpcpp.cc new file mode 100644 index 000000000..0dbb5764d --- /dev/null +++ b/Cxx11/stencil-dpcpp.cc @@ -0,0 +1,229 @@ + +/// +/// Copyright (c) 2013, Intel Corporation +/// +/// Redistribution and use in source and binary forms, with or without +/// modification, are permitted provided that the following conditions +/// are met: +/// +/// * Redistributions of source code must retain the above copyright +/// notice, this list of conditions and the following disclaimer. +/// * Redistributions in binary form must reproduce the above +/// copyright notice, this list of conditions and the following +/// disclaimer in the documentation and/or other materials provided +/// with the distribution. +/// * Neither the name of Intel Corporation nor the names of its +/// contributors may be used to endorse or promote products +/// derived from this software without specific prior written +/// permission. +/// +/// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +/// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +/// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +/// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +/// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +/// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +/// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +/// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +/// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +/// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +/// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +/// POSSIBILITY OF SUCH DAMAGE. + +////////////////////////////////////////////////////////////////////// +/// +/// NAME: Stencil +/// +/// PURPOSE: This program tests the efficiency with which a space-invariant, +/// linear, symmetric filter (stencil) can be applied to a square +/// grid or image. +/// +/// USAGE: The program takes as input the linear +/// dimension of the grid, and the number of iterations on the grid +/// +/// +/// +/// The output consists of diagnostics to make sure the +/// algorithm worked, and of timing statistics. +/// +/// FUNCTIONS CALLED: +/// +/// Other than standard C functions, the following functions are used in +/// this program: +/// wtime() +/// +/// HISTORY: - Written by Rob Van der Wijngaart, February 2009. +/// - RvdW: Removed unrolling pragmas for clarity; +/// added constant to array "in" at end of each iteration to force +/// refreshing of neighbor data in parallel versions; August 2013 +/// C++11-ification by Jeff Hammond, May 2017. +/// +////////////////////////////////////////////////////////////////////// + +#include "prk_util.h" +#include "stencil_seq.hpp" + +void nothing(const int n, const int t, prk::vector & in, prk::vector & out) +{ + std::cout << "You are trying to use a stencil that does not exist.\n"; + std::cout << "Please generate the new stencil using the code generator\n"; + std::cout << "and add it to the case-switch in the driver." << std::endl; + // n will never be zero - this is to silence compiler warnings. + if (n==0 || t==0) std::cout << in.size() << out.size() << std::endl; + std::abort(); +} + +int main(int argc, char* argv[]) +{ + std::cout << "Parallel Research Kernels version " << PRKVERSION << std::endl; + std::cout << "C++11 Stencil execution on 2D grid" << std::endl; + + ////////////////////////////////////////////////////////////////////// + // Process and test input parameters + ////////////////////////////////////////////////////////////////////// + + int iterations, n, radius, tile_size; + bool star = true; + try { + if (argc < 3) { + throw "Usage: <# iterations> [ ]"; + } + + // number of times to run the algorithm + iterations = std::atoi(argv[1]); + if (iterations < 1) { + throw "ERROR: iterations must be >= 1"; + } + + // linear grid dimension + n = std::atoi(argv[2]); + if (n < 1) { + throw "ERROR: grid dimension must be positive"; + } else if (n > prk::get_max_matrix_size()) { + throw "ERROR: grid dimension too large - overflow risk"; + } + + // default tile size for tiling of local transpose + tile_size = 32; + if (argc > 3) { + tile_size = std::atoi(argv[3]); + if (tile_size <= 0) tile_size = n; + if (tile_size > n) tile_size = n; + } + + // stencil pattern + if (argc > 4) { + auto stencil = std::string(argv[4]); + auto grid = std::string("grid"); + star = (stencil == grid) ? false : true; + } + + // stencil radius + radius = 2; + if (argc > 5) { + radius = std::atoi(argv[5]); + } + + if ( (radius < 1) || (2*radius+1 > n) ) { + throw "ERROR: Stencil radius negative or too large"; + } + } + catch (const char * e) { + std::cout << e << std::endl; + return 1; + } + + std::cout << "Number of iterations = " << iterations << std::endl; + std::cout << "Grid size = " << n << std::endl; + std::cout << "Tile size = " << tile_size << std::endl; + std::cout << "Type of stencil = " << (star ? "star" : "grid") << std::endl; + std::cout << "Radius of stencil = " << radius << std::endl; + + auto stencil = nothing; + if (star) { + switch (radius) { + case 1: stencil = star1; break; + case 2: stencil = star2; break; + case 3: stencil = star3; break; + case 4: stencil = star4; break; + case 5: stencil = star5; break; + } + } else { + switch (radius) { + case 1: stencil = grid1; break; + case 2: stencil = grid2; break; + case 3: stencil = grid3; break; + case 4: stencil = grid4; break; + case 5: stencil = grid5; break; + } + } + + ////////////////////////////////////////////////////////////////////// + // Allocate space and perform the computation + ////////////////////////////////////////////////////////////////////// + + auto stencil_time = 0.0; + + prk::vector in(n*n); + prk::vector out(n*n); + + { + for (int it=0; it(i+j); + out[i*n+j] = 0.0; + } + } + } + } + + for (int iter = 0; iter<=iterations; iter++) { + + if (iter==1) stencil_time = prk::wtime(); + // Apply the stencil operator + stencil(n, tile_size, in, out); + // Add constant to solution to force refresh of neighbor data, if any + std::transform(in.begin(), in.end(), in.begin(), [](double c) { return c+=1.0; }); + } + stencil_time = prk::wtime() - stencil_time; + } + + ////////////////////////////////////////////////////////////////////// + // Analyze and output results. + ////////////////////////////////////////////////////////////////////// + + // interior of grid with respect to stencil + size_t active_points = static_cast(n-2*radius)*static_cast(n-2*radius); + double norm = 0.0; + for (int i=radius; i epsilon) { + std::cout << "ERROR: L1 norm = " << norm + << " Reference L1 norm = " << reference_norm << std::endl; + return 1; + } else { + std::cout << "Solution validates" << std::endl; +#ifdef VERBOSE + std::cout << "L1 norm = " << norm + << " Reference L1 norm = " << reference_norm << std::endl; +#endif + const int stencil_size = star ? 4*radius+1 : (2*radius+1)*(2*radius+1); + size_t flops = (2L*(size_t)stencil_size+1L) * active_points; + auto avgtime = stencil_time/iterations; + std::cout << "Rate (MFlops/s): " << 1.0e-6 * static_cast(flops)/avgtime + << " Avg time (s): " << avgtime << std::endl; + } + + return 0; +} diff --git a/Cxx11/stencil-multigpu-dpcpp.cc b/Cxx11/stencil-multigpu-dpcpp.cc new file mode 100644 index 000000000..0dbb5764d --- /dev/null +++ b/Cxx11/stencil-multigpu-dpcpp.cc @@ -0,0 +1,229 @@ + +/// +/// Copyright (c) 2013, Intel Corporation +/// +/// Redistribution and use in source and binary forms, with or without +/// modification, are permitted provided that the following conditions +/// are met: +/// +/// * Redistributions of source code must retain the above copyright +/// notice, this list of conditions and the following disclaimer. +/// * Redistributions in binary form must reproduce the above +/// copyright notice, this list of conditions and the following +/// disclaimer in the documentation and/or other materials provided +/// with the distribution. +/// * Neither the name of Intel Corporation nor the names of its +/// contributors may be used to endorse or promote products +/// derived from this software without specific prior written +/// permission. +/// +/// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +/// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +/// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +/// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +/// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +/// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +/// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +/// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +/// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +/// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +/// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +/// POSSIBILITY OF SUCH DAMAGE. + +////////////////////////////////////////////////////////////////////// +/// +/// NAME: Stencil +/// +/// PURPOSE: This program tests the efficiency with which a space-invariant, +/// linear, symmetric filter (stencil) can be applied to a square +/// grid or image. +/// +/// USAGE: The program takes as input the linear +/// dimension of the grid, and the number of iterations on the grid +/// +/// +/// +/// The output consists of diagnostics to make sure the +/// algorithm worked, and of timing statistics. +/// +/// FUNCTIONS CALLED: +/// +/// Other than standard C functions, the following functions are used in +/// this program: +/// wtime() +/// +/// HISTORY: - Written by Rob Van der Wijngaart, February 2009. +/// - RvdW: Removed unrolling pragmas for clarity; +/// added constant to array "in" at end of each iteration to force +/// refreshing of neighbor data in parallel versions; August 2013 +/// C++11-ification by Jeff Hammond, May 2017. +/// +////////////////////////////////////////////////////////////////////// + +#include "prk_util.h" +#include "stencil_seq.hpp" + +void nothing(const int n, const int t, prk::vector & in, prk::vector & out) +{ + std::cout << "You are trying to use a stencil that does not exist.\n"; + std::cout << "Please generate the new stencil using the code generator\n"; + std::cout << "and add it to the case-switch in the driver." << std::endl; + // n will never be zero - this is to silence compiler warnings. + if (n==0 || t==0) std::cout << in.size() << out.size() << std::endl; + std::abort(); +} + +int main(int argc, char* argv[]) +{ + std::cout << "Parallel Research Kernels version " << PRKVERSION << std::endl; + std::cout << "C++11 Stencil execution on 2D grid" << std::endl; + + ////////////////////////////////////////////////////////////////////// + // Process and test input parameters + ////////////////////////////////////////////////////////////////////// + + int iterations, n, radius, tile_size; + bool star = true; + try { + if (argc < 3) { + throw "Usage: <# iterations> [ ]"; + } + + // number of times to run the algorithm + iterations = std::atoi(argv[1]); + if (iterations < 1) { + throw "ERROR: iterations must be >= 1"; + } + + // linear grid dimension + n = std::atoi(argv[2]); + if (n < 1) { + throw "ERROR: grid dimension must be positive"; + } else if (n > prk::get_max_matrix_size()) { + throw "ERROR: grid dimension too large - overflow risk"; + } + + // default tile size for tiling of local transpose + tile_size = 32; + if (argc > 3) { + tile_size = std::atoi(argv[3]); + if (tile_size <= 0) tile_size = n; + if (tile_size > n) tile_size = n; + } + + // stencil pattern + if (argc > 4) { + auto stencil = std::string(argv[4]); + auto grid = std::string("grid"); + star = (stencil == grid) ? false : true; + } + + // stencil radius + radius = 2; + if (argc > 5) { + radius = std::atoi(argv[5]); + } + + if ( (radius < 1) || (2*radius+1 > n) ) { + throw "ERROR: Stencil radius negative or too large"; + } + } + catch (const char * e) { + std::cout << e << std::endl; + return 1; + } + + std::cout << "Number of iterations = " << iterations << std::endl; + std::cout << "Grid size = " << n << std::endl; + std::cout << "Tile size = " << tile_size << std::endl; + std::cout << "Type of stencil = " << (star ? "star" : "grid") << std::endl; + std::cout << "Radius of stencil = " << radius << std::endl; + + auto stencil = nothing; + if (star) { + switch (radius) { + case 1: stencil = star1; break; + case 2: stencil = star2; break; + case 3: stencil = star3; break; + case 4: stencil = star4; break; + case 5: stencil = star5; break; + } + } else { + switch (radius) { + case 1: stencil = grid1; break; + case 2: stencil = grid2; break; + case 3: stencil = grid3; break; + case 4: stencil = grid4; break; + case 5: stencil = grid5; break; + } + } + + ////////////////////////////////////////////////////////////////////// + // Allocate space and perform the computation + ////////////////////////////////////////////////////////////////////// + + auto stencil_time = 0.0; + + prk::vector in(n*n); + prk::vector out(n*n); + + { + for (int it=0; it(i+j); + out[i*n+j] = 0.0; + } + } + } + } + + for (int iter = 0; iter<=iterations; iter++) { + + if (iter==1) stencil_time = prk::wtime(); + // Apply the stencil operator + stencil(n, tile_size, in, out); + // Add constant to solution to force refresh of neighbor data, if any + std::transform(in.begin(), in.end(), in.begin(), [](double c) { return c+=1.0; }); + } + stencil_time = prk::wtime() - stencil_time; + } + + ////////////////////////////////////////////////////////////////////// + // Analyze and output results. + ////////////////////////////////////////////////////////////////////// + + // interior of grid with respect to stencil + size_t active_points = static_cast(n-2*radius)*static_cast(n-2*radius); + double norm = 0.0; + for (int i=radius; i epsilon) { + std::cout << "ERROR: L1 norm = " << norm + << " Reference L1 norm = " << reference_norm << std::endl; + return 1; + } else { + std::cout << "Solution validates" << std::endl; +#ifdef VERBOSE + std::cout << "L1 norm = " << norm + << " Reference L1 norm = " << reference_norm << std::endl; +#endif + const int stencil_size = star ? 4*radius+1 : (2*radius+1)*(2*radius+1); + size_t flops = (2L*(size_t)stencil_size+1L) * active_points; + auto avgtime = stencil_time/iterations; + std::cout << "Rate (MFlops/s): " << 1.0e-6 * static_cast(flops)/avgtime + << " Avg time (s): " << avgtime << std::endl; + } + + return 0; +} From 7af1bebf702a5d3b03c028c517353ac6db029fe4 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 23 Jun 2020 11:52:12 -0700 Subject: [PATCH 02/66] cleanup comments --- Cxx11/stencil-sycl.cc | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/Cxx11/stencil-sycl.cc b/Cxx11/stencil-sycl.cc index a375e1f05..9795cc638 100644 --- a/Cxx11/stencil-sycl.cc +++ b/Cxx11/stencil-sycl.cc @@ -46,12 +46,6 @@ /// The output consists of diagnostics to make sure the /// algorithm worked, and of timing statistics. /// -/// FUNCTIONS CALLED: -/// -/// Other than standard C functions, the following functions are used in -/// this program: -/// wtime() -/// /// HISTORY: - Written by Rob Van der Wijngaart, February 2009. /// - RvdW: Removed unrolling pragmas for clarity; /// added constant to array "in" at end of each iteration to force @@ -298,6 +292,7 @@ int main(int argc, char * argv[]) std::cout << "Number of iterations = " << iterations << std::endl; std::cout << "Grid size = " << n << std::endl; + std::cout << "Tile size = " << tile_size << std::endl; std::cout << "Type of stencil = " << (star ? "star" : "grid") << std::endl; std::cout << "Radius of stencil = " << radius << std::endl; From 684ac413e047f55335ced69c0f2376b4ae794cb8 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 23 Jun 2020 11:53:56 -0700 Subject: [PATCH 03/66] some fixes - GPU broken --- Cxx11/stencil-sycl-usm.cc | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/Cxx11/stencil-sycl-usm.cc b/Cxx11/stencil-sycl-usm.cc index 7224991f0..ba56f9ffe 100644 --- a/Cxx11/stencil-sycl-usm.cc +++ b/Cxx11/stencil-sycl-usm.cc @@ -105,25 +105,21 @@ void run(sycl::queue & q, int iterations, size_t n, size_t tile_size, bool star, // Allocate space and perform the computation ////////////////////////////////////////////////////////////////////// - auto ctx = q.get_context(); - double stencil_time(0); T * out; try { - auto dev = q.get_device(); - - T * in = static_cast(syclx::malloc_shared(n * n * sizeof(T), dev, ctx)); - out = static_cast(syclx::malloc_shared(n * n * sizeof(T), dev, ctx)); + T * in = static_cast(syclx::malloc_shared(n * n * sizeof(T), q)); + out = static_cast(syclx::malloc_shared(n * n * sizeof(T), q)); q.submit([&](sycl::handler& h) { - h.parallel_for>(sycl::range<2> {n, n}, [=] (sycl::id<2> it) { const auto i = it[0]; const auto j = it[1]; - in[i*n+j] = static_cast(i+j); + in[i*n+j] = static_cast(i+j); + out[i*n+j] = static_cast(0); }); }); q.wait(); @@ -133,9 +129,9 @@ void run(sycl::queue & q, int iterations, size_t n, size_t tile_size, bool star, if (iter==1) stencil_time = prk::wtime(); stencil(q, n, in, out); + q.wait(); q.submit([&](sycl::handler& h) { - // Add constant to solution to force refresh of neighbor data, if any h.parallel_for>(sycl::range<2> {n, n}, sycl::id<2> {0, 0}, [=] (sycl::id<2> it) { const auto i = it[0]; const auto j = it[1]; @@ -146,7 +142,7 @@ void run(sycl::queue & q, int iterations, size_t n, size_t tile_size, bool star, } stencil_time = prk::wtime() - stencil_time; - syclx::free(in, ctx); + syclx::free(in, q); } catch (sycl::exception & e) { std::cout << e.what() << std::endl; @@ -178,7 +174,7 @@ void run(sycl::queue & q, int iterations, size_t n, size_t tile_size, bool star, } norm /= active_points; - syclx::free(out, ctx); + syclx::free(out, q); // verify correctness const double epsilon = 1.0e-8; @@ -186,6 +182,7 @@ void run(sycl::queue & q, int iterations, size_t n, size_t tile_size, bool star, if (prk::abs(norm-reference_norm) > epsilon) { std::cout << "ERROR: L1 norm = " << norm << " Reference L1 norm = " << reference_norm << std::endl; + std::cout << "===================================" << std::endl; } else { std::cout << "Solution validates" << std::endl; #ifdef VERBOSE @@ -265,6 +262,7 @@ int main(int argc, char * argv[]) std::cout << "Number of iterations = " << iterations << std::endl; std::cout << "Grid size = " << n << std::endl; + std::cout << "Tile size = " << tile_size << std::endl; std::cout << "Type of stencil = " << (star ? "star" : "grid") << std::endl; std::cout << "Radius of stencil = " << radius << std::endl; From d3717be7b60cdcb9e96cb452fedd449eb9c3bcec Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 23 Jun 2020 11:54:22 -0700 Subject: [PATCH 04/66] cleanup --- Cxx11/nstream-dpcpp.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/Cxx11/nstream-dpcpp.cc b/Cxx11/nstream-dpcpp.cc index addcbf6f1..e72a14cd5 100644 --- a/Cxx11/nstream-dpcpp.cc +++ b/Cxx11/nstream-dpcpp.cc @@ -122,9 +122,10 @@ int main(int argc, char * argv[]) double * d_A = syclx::malloc_device(length, q); double * d_B = syclx::malloc_device(length, q); double * d_C = syclx::malloc_device(length, q); - q.memcpy(d_A, &(h_A[0]), bytes).wait(); - q.memcpy(d_B, &(h_B[0]), bytes).wait(); - q.memcpy(d_C, &(h_C[0]), bytes).wait(); + q.memcpy(d_A, &(h_A[0]), bytes); + q.memcpy(d_B, &(h_B[0]), bytes); + q.memcpy(d_C, &(h_C[0]), bytes); + q.wait(); double scalar(3); { @@ -133,9 +134,7 @@ int main(int argc, char * argv[]) if (iter==1) nstream_time = prk::wtime(); q.submit([&](sycl::handler& h) { - - h.parallel_for( sycl::range<1>{length}, [=] (sycl::id<1> it) { - const size_t i = it[0]; + h.parallel_for( sycl::range<1>{length}, [=] (sycl::id<1> i) { d_A[i] += d_B[i] + scalar * d_C[i]; }); }); From add9e23d2ea0cff9cd7c3098c7c66e987e1729dd Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 23 Jun 2020 11:54:44 -0700 Subject: [PATCH 05/66] workaround Level Zero SPIR detection --- Cxx11/prk_sycl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cxx11/prk_sycl.h b/Cxx11/prk_sycl.h index 6d2b417f8..2335c1c07 100644 --- a/Cxx11/prk_sycl.h +++ b/Cxx11/prk_sycl.h @@ -59,8 +59,8 @@ namespace prk { #if ! ( defined(TRISYCL) || defined(__HIPSYCL__) ) auto device = q.get_device(); auto platform = device.get_platform(); - std::cout << "SYCL Device: " << device.get_info() << std::endl; std::cout << "SYCL Platform: " << platform.get_info() << std::endl; + std::cout << "SYCL Device: " << device.get_info() << std::endl; #endif } From 691938cce9318bac0371dbc70dfb0beb02e4a5a6 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 23 Jun 2020 11:55:03 -0700 Subject: [PATCH 06/66] some fixes - GPU broken --- Cxx11/stencil-dpcpp.cc | 121 +++++++++++++++++++++++++---------------- 1 file changed, 74 insertions(+), 47 deletions(-) diff --git a/Cxx11/stencil-dpcpp.cc b/Cxx11/stencil-dpcpp.cc index 0dbb5764d..421823e9e 100644 --- a/Cxx11/stencil-dpcpp.cc +++ b/Cxx11/stencil-dpcpp.cc @@ -1,6 +1,6 @@ /// -/// Copyright (c) 2013, Intel Corporation +/// Copyright (c) 2020, Intel Corporation /// /// Redistribution and use in source and binary forms, with or without /// modification, are permitted provided that the following conditions @@ -46,12 +46,6 @@ /// The output consists of diagnostics to make sure the /// algorithm worked, and of timing statistics. /// -/// FUNCTIONS CALLED: -/// -/// Other than standard C functions, the following functions are used in -/// this program: -/// wtime() -/// /// HISTORY: - Written by Rob Van der Wijngaart, February 2009. /// - RvdW: Removed unrolling pragmas for clarity; /// added constant to array "in" at end of each iteration to force @@ -60,30 +54,32 @@ /// ////////////////////////////////////////////////////////////////////// +#include "prk_sycl.h" #include "prk_util.h" -#include "stencil_seq.hpp" +#include "stencil_sycl.hpp" -void nothing(const int n, const int t, prk::vector & in, prk::vector & out) +template +void nothing(sycl::queue & q, const size_t n, const T * in, T * out) { std::cout << "You are trying to use a stencil that does not exist.\n"; std::cout << "Please generate the new stencil using the code generator\n"; std::cout << "and add it to the case-switch in the driver." << std::endl; - // n will never be zero - this is to silence compiler warnings. - if (n==0 || t==0) std::cout << in.size() << out.size() << std::endl; - std::abort(); + prk::Abort(); } int main(int argc, char* argv[]) { std::cout << "Parallel Research Kernels version " << PRKVERSION << std::endl; - std::cout << "C++11 Stencil execution on 2D grid" << std::endl; + std::cout << "C++11/DPC++ Stencil execution on 2D grid" << std::endl; ////////////////////////////////////////////////////////////////////// // Process and test input parameters ////////////////////////////////////////////////////////////////////// - int iterations, n, radius, tile_size; + int iterations; + size_t n, tile_size; bool star = true; + size_t radius = 2; try { if (argc < 3) { throw "Usage: <# iterations> [ ]"; @@ -139,16 +135,18 @@ int main(int argc, char* argv[]) std::cout << "Type of stencil = " << (star ? "star" : "grid") << std::endl; std::cout << "Radius of stencil = " << radius << std::endl; - auto stencil = nothing; + auto stencil = nothing; if (star) { switch (radius) { - case 1: stencil = star1; break; - case 2: stencil = star2; break; - case 3: stencil = star3; break; - case 4: stencil = star4; break; - case 5: stencil = star5; break; + case 1: stencil = star1; break; + case 2: stencil = star2; break; + case 3: stencil = star3; break; + case 4: stencil = star4; break; + case 5: stencil = star5; break; } - } else { + } +#if 0 + else { switch (radius) { case 1: stencil = grid1; break; case 2: stencil = grid2; break; @@ -157,39 +155,63 @@ int main(int argc, char* argv[]) case 5: stencil = grid5; break; } } +#endif + + sycl::queue q(sycl::default_selector{}); + prk::SYCL::print_device_platform(q); ////////////////////////////////////////////////////////////////////// // Allocate space and perform the computation ////////////////////////////////////////////////////////////////////// - auto stencil_time = 0.0; - - prk::vector in(n*n); - prk::vector out(n*n); - - { - for (int it=0; it(i+j); - out[i*n+j] = 0.0; - } - } - } - } + double stencil_time(0); - for (int iter = 0; iter<=iterations; iter++) { + prk::vector h_in(n*n, 0); + prk::vector h_out(n*n, 0); - if (iter==1) stencil_time = prk::wtime(); - // Apply the stencil operator - stencil(n, tile_size, in, out); - // Add constant to solution to force refresh of neighbor data, if any - std::transform(in.begin(), in.end(), in.begin(), [](double c) { return c+=1.0; }); - } - stencil_time = prk::wtime() - stencil_time; + const size_t bytes = n * n * sizeof(double); + + double * d_in = syclx::malloc_device(n*n, q); + double * d_out = syclx::malloc_device(n*n, q); + q.wait(); + + q.submit([&](sycl::handler& h) { + h.parallel_for(sycl::range<2> {n,n}, [=] (sycl::item<2> it) { + const auto i = it[0]; + const auto j = it[1]; + d_in[i*n+j] = static_cast(i+j); + d_out[i*n+j] = static_cast(0); + }); + }); + q.wait(); + + for (int iter = 0; iter<=iterations; iter++) { + + if (iter==1) stencil_time = prk::wtime(); + + // Apply the stencil operator + stencil(q, n, d_in, d_out); + q.wait(); + + // Add constant to solution to force refresh of neighbor data, if any + q.submit([&](sycl::handler& h) { + h.parallel_for(sycl::range<2> {n,n}, [=] (sycl::item<2> it) { + const auto i = it[0]; + const auto j = it[1]; + d_in[i*n+j] += static_cast(1); + }); + }); + q.wait(); } + stencil_time = prk::wtime() - stencil_time; + + q.memcpy(&(h_in[0]), d_in, bytes); + q.memcpy(&(h_out[0]), d_out, bytes); + q.wait(); + + syclx::free(d_in, q); + syclx::free(d_out,q); + q.wait(); ////////////////////////////////////////////////////////////////////// // Analyze and output results. @@ -200,7 +222,7 @@ int main(int argc, char* argv[]) double norm = 0.0; for (int i=radius; i epsilon) { std::cout << "ERROR: L1 norm = " << norm << " Reference L1 norm = " << reference_norm << std::endl; + for (int i=0; i Date: Wed, 24 Jun 2020 08:00:56 -0700 Subject: [PATCH 07/66] undo minor mistake --- Cxx11/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cxx11/Makefile b/Cxx11/Makefile index 6c0e08e3e..e24b39163 100644 --- a/Cxx11/Makefile +++ b/Cxx11/Makefile @@ -128,7 +128,7 @@ cblas: transpose-cblas dgemm-cblas onemkl: nstream-onemkl dgemm-onemkl dgemm-multigpu-onemkl -oneapi: onemkl dpcpp sycl sycl-usm sycl-explicit sycl-explicit-usm +oneapi: onemkl dpcpp sycl sycl-usm sycl-explicit occa: transpose-occa nstream-occa From 583795ddd67370124f7f2cc46b528628d66737da Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 24 Jun 2020 08:04:59 -0700 Subject: [PATCH 08/66] add transpose multi-GPU DPC++ skeleton --- Cxx11/Makefile | 2 +- Cxx11/transpose-multigpu-dpcpp.cc | 181 ++++++++++++++++++++++++++++++ 2 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 Cxx11/transpose-multigpu-dpcpp.cc diff --git a/Cxx11/Makefile b/Cxx11/Makefile index e24b39163..a20cdfbb4 100644 --- a/Cxx11/Makefile +++ b/Cxx11/Makefile @@ -97,7 +97,7 @@ sycl-usm: nstream-sycl-usm nstream-sycl-explicit-usm stencil-sycl-usm transpose- sycl-explicit: nstream-sycl-explicit transpose-sycl-explicit -dpcpp: nstream-dpcpp nstream-multigpu-dpcpp stencil-dpcpp stencil-multigpu-dpcpp transpose-dpcpp +dpcpp: nstream-dpcpp nstream-multigpu-dpcpp stencil-dpcpp stencil-multigpu-dpcpp transpose-dpcpp transpose-multigpu-dpcpp tbb: p2p-innerloop-tbb p2p-tbb stencil-tbb transpose-tbb nstream-tbb \ p2p-hyperplane-tbb p2p-tasks-tbb diff --git a/Cxx11/transpose-multigpu-dpcpp.cc b/Cxx11/transpose-multigpu-dpcpp.cc new file mode 100644 index 000000000..2dec847f9 --- /dev/null +++ b/Cxx11/transpose-multigpu-dpcpp.cc @@ -0,0 +1,181 @@ +/// +/// Copyright (c) 2020, Intel Corporation +/// +/// Redistribution and use in source and binary forms, with or without +/// modification, are permitted provided that the following conditions +/// are met: +/// +/// * Redistributions of source code must retain the above copyright +/// notice, this list of conditions and the following disclaimer. +/// * Redistributions in binary form must reproduce the above +/// copyright notice, this list of conditions and the following +/// disclaimer in the documentation and/or other materials provided +/// with the distribution. +/// * Neither the name of Intel Corporation nor the names of its +/// contributors may be used to endorse or promote products +/// derived from this software without specific prior written +/// permission. +/// +/// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +/// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +/// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +/// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +/// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +/// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +/// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +/// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +/// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +/// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +/// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +/// POSSIBILITY OF SUCH DAMAGE. + +////////////////////////////////////////////////////////////////////// +/// +/// NAME: transpose +/// +/// PURPOSE: This program measures the time for the transpose of a +/// column-major stored matrix into a row-major stored matrix. +/// +/// USAGE: Program input is the matrix order and the number of times to +/// repeat the operation: +/// +/// transpose <# iterations> +/// +/// The output consists of diagnostics to make sure the +/// transpose worked and timing statistics. +/// +/// HISTORY: Written by Rob Van der Wijngaart, February 2009. +/// Converted to C++11 by Jeff Hammond, February 2016 and May 2017. +/// +////////////////////////////////////////////////////////////////////// + +#include "prk_util.h" +#include "prk_sycl.h" + +int main(int argc, char * argv[]) +{ + std::cout << "Parallel Research Kernels version " << PRKVERSION << std::endl; + std::cout << "C++11/DPCT Matrix transpose: B = A^T" << std::endl; + + ////////////////////////////////////////////////////////////////////// + /// Read and test input parameters + ////////////////////////////////////////////////////////////////////// + + int iterations; + size_t order; + try { + if (argc < 3) { + throw "Usage: <# iterations> "; + } + + iterations = std::atoi(argv[1]); + if (iterations < 1) { + throw "ERROR: iterations must be >= 1"; + } + + order = std::atoi(argv[2]); + if (order <= 0) { + throw "ERROR: Matrix Order must be greater than 0"; + } else if (order > prk::get_max_matrix_size()) { + throw "ERROR: matrix dimension too large - overflow risk"; + } + } + catch (const char * e) { + std::cout << e << std::endl; + return 1; + } + + std::cout << "Number of iterations = " << iterations << std::endl; + std::cout << "Matrix order = " << order << std::endl; + + sycl::queue q(sycl::default_selector{}); + prk::SYCL::print_device_platform(q); + + ////////////////////////////////////////////////////////////////////// + // Allocate space for the input and transpose matrix + ////////////////////////////////////////////////////////////////////// + + const size_t nelems = (size_t)order * (size_t)order; + const size_t bytes = nelems * sizeof(double); + double * h_a = syclx::malloc_host( nelems, q); + double * h_b = syclx::malloc_host( nelems, q); + + // fill A with the sequence 0 to order^2-1 + for (int j=0; j(order*j+i); + h_b[j*order+i] = static_cast(0); + } + } + + // copy input from host to device + double * A = syclx::malloc_device( nelems, q); + double * B = syclx::malloc_device( nelems, q); + q.memcpy(A, &(h_a[0]), bytes).wait(); + q.memcpy(B, &(h_b[0]), bytes).wait(); + + auto trans_time = 0.0; + + for (int iter = 0; iter<=iterations; iter++) { + + if (iter==1) trans_time = prk::wtime(); + + q.submit([&](sycl::handler& h) { + + h.parallel_for( sycl::range<2>{order,order}, [=] (sycl::id<2> it) { +#if USE_2D_INDEXING + sycl::id<2> ij{it[0],it[1]}; + sycl::id<2> ji{it[1],it[0]}; + B[ij] += A[ji]; + A[ji] += (T)1; +#else + B[it[0] * order + it[1]] += A[it[1] * order + it[0]]; + A[it[1] * order + it[0]] += 1.0; +#endif + }); + }); + q.wait(); + } + trans_time = prk::wtime() - trans_time; + + // copy output back to host + q.memcpy(&(h_b[0]), B, bytes).wait(); + + syclx::free(B, q); + syclx::free(A, q); + + ////////////////////////////////////////////////////////////////////// + /// Analyze and output results + ////////////////////////////////////////////////////////////////////// + + const double addit = (iterations+1.) * (iterations/2.); + double abserr(0); + for (int j=0; j(ij)*(1.+iterations)+addit; + abserr += prk::abs(h_b[ji] - reference); + } + } + + syclx::free(h_b, q); + syclx::free(h_a, q); + + const auto epsilon = 1.0e-8; + if (abserr < epsilon) { + std::cout << "Solution validates" << std::endl; + auto avgtime = trans_time/iterations; + auto bytes = (size_t)order * (size_t)order * sizeof(double); + std::cout << "Rate (MB/s): " << 1.0e-6 * (2L*bytes)/avgtime + << " Avg time (s): " << avgtime << std::endl; + } else { + std::cout << "ERROR: Aggregate squared error " << abserr + << " exceeds threshold " << epsilon << std::endl; + return 1; + } + + return 0; +} + + From 053cb6d621c4e85b666c71f3fe6b3b978652a745 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Mon, 29 Jun 2020 15:43:12 -0700 Subject: [PATCH 09/66] WIP --- Cxx11/transpose-multigpu-dpcpp.cc | 72 ++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 15 deletions(-) diff --git a/Cxx11/transpose-multigpu-dpcpp.cc b/Cxx11/transpose-multigpu-dpcpp.cc index 2dec847f9..a26244496 100644 --- a/Cxx11/transpose-multigpu-dpcpp.cc +++ b/Cxx11/transpose-multigpu-dpcpp.cc @@ -63,9 +63,10 @@ int main(int argc, char * argv[]) int iterations; size_t order; + int use_ngpu = 1; try { if (argc < 3) { - throw "Usage: <# iterations> "; + throw "Usage: <# iterations> []"; } iterations = std::atoi(argv[1]); @@ -79,6 +80,15 @@ int main(int argc, char * argv[]) } else if (order > prk::get_max_matrix_size()) { throw "ERROR: matrix dimension too large - overflow risk"; } + + if (argc > 3) { + use_ngpu = std::atoi(argv[3]); + } + + if (order % use_ngpu) { + std::cerr << "order = " << order << ", device count = " << use_ngpu << std::endl; + throw "ERROR: matrix order should be divisible by device count!"; + } } catch (const char * e) { std::cout << e << std::endl; @@ -87,34 +97,66 @@ int main(int argc, char * argv[]) std::cout << "Number of iterations = " << iterations << std::endl; std::cout << "Matrix order = " << order << std::endl; + std::cout << "Number of GPUs to use = " << use_ngpu << std::endl; + + std::vector qs; + + auto platforms = sycl::platform::get_platforms(); + for (auto & p : platforms) { + auto pname = p.get_info(); + std::cout << "*Platform: " << pname << std::endl; + if ( pname.find("Level-Zero") != std::string::npos) { + std::cout << "*Level Zero GPU skipped" << std::endl; + break; + } + if ( pname.find("Intel") == std::string::npos) { + std::cout << "*non-Intel skipped" << std::endl; + break; + } + auto devices = p.get_devices(); + for (auto & d : devices ) { + std::cout << "**Device: " << d.get_info() << std::endl; + if ( d.is_gpu() || d.is_cpu() ) { + std::cout << "**Device is CPU or GPU - adding to vector of queues" << std::endl; + qs.push_back(sycl::queue(d)); + } + } + } + + int haz_ngpu = qs.size(); + std::cout << "Number of CPUs and GPUs found = " << haz_ngpu << std::endl; - sycl::queue q(sycl::default_selector{}); - prk::SYCL::print_device_platform(q); + if (use_ngpu > haz_ngpu) { + std::cout << "You cannot use more GPUs (" << use_ngpu << ") than you have (" << haz_ngpu << ")" << std::endl; + } + + int ngpus = use_ngpu; ////////////////////////////////////////////////////////////////////// // Allocate space for the input and transpose matrix ////////////////////////////////////////////////////////////////////// - const size_t nelems = (size_t)order * (size_t)order; - const size_t bytes = nelems * sizeof(double); - double * h_a = syclx::malloc_host( nelems, q); - double * h_b = syclx::malloc_host( nelems, q); + double trans_time(0); + + auto h_a = prk::vector(order * order); + auto h_b = prk::vector(order * order); // fill A with the sequence 0 to order^2-1 - for (int j=0; j(order*j+i); h_b[j*order+i] = static_cast(0); } } - // copy input from host to device - double * A = syclx::malloc_device( nelems, q); - double * B = syclx::malloc_device( nelems, q); - q.memcpy(A, &(h_a[0]), bytes).wait(); - q.memcpy(B, &(h_b[0]), bytes).wait(); + const size_t bytes = order * order * sizeof(double); - auto trans_time = 0.0; + // copy input from host to device + double * A = syclx::malloc_device(order * order, q); + double * B = syclx::malloc_device(order * order, q); + q.memcpy(A, &(h_a[0]), bytes); + q.memcpy(B, &(h_b[0]), bytes); + q.wait(); for (int iter = 0; iter<=iterations; iter++) { From 17ff29248fe028ce50e391aeb841084f79be131d Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Mon, 20 Jul 2020 14:31:26 -0700 Subject: [PATCH 10/66] add ignores --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index d4e9d72bc..3c78c8721 100644 --- a/.gitignore +++ b/.gitignore @@ -166,6 +166,8 @@ Cxx11/dgemm-mpi Cxx11/dgemm-sycl Cxx11/dgemm-blas-sycl Cxx11/dgemm-mkl-sycl +Cxx11/dgemm-multigpu-onemkl +Cxx11/dgemm-onemkl Cxx11/dgemm-kokkos Cxx11/dgemm-kernels-kokkos Cxx11/dgemm-raja @@ -217,6 +219,8 @@ Cxx11/nstream-sycl-usm Cxx11/nstream-sycl-explicit Cxx11/nstream-sycl-explicit-usm Cxx11/nstream-dpcpp +Cxx11/nstream-multigpu-dpcpp +Cxx11/nstream-onemkl Cxx11/nstream-celerity Cxx11/nstream-hpx Cxx11/nstream-upcxx From 9a95639d98d9e1a2dddd850bdfc92cbc7346122f Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Mon, 20 Jul 2020 14:54:59 -0700 Subject: [PATCH 11/66] fix message --- Cxx11/transpose-multigpu-dpcpp.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cxx11/transpose-multigpu-dpcpp.cc b/Cxx11/transpose-multigpu-dpcpp.cc index a26244496..215ae23d1 100644 --- a/Cxx11/transpose-multigpu-dpcpp.cc +++ b/Cxx11/transpose-multigpu-dpcpp.cc @@ -127,7 +127,7 @@ int main(int argc, char * argv[]) std::cout << "Number of CPUs and GPUs found = " << haz_ngpu << std::endl; if (use_ngpu > haz_ngpu) { - std::cout << "You cannot use more GPUs (" << use_ngpu << ") than you have (" << haz_ngpu << ")" << std::endl; + std::cout << "You cannot use more CPUs and GPUs (" << use_ngpu << ") than you have (" << haz_ngpu << ")" << std::endl; } int ngpus = use_ngpu; From 3113c031bde6eca7a78384e5bd1316c5a47cca83 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 21 Jul 2020 12:43:44 -0700 Subject: [PATCH 12/66] add fill, cleanup dead code --- Cxx11/prk_util.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Cxx11/prk_util.h b/Cxx11/prk_util.h index 176c1186b..98ea627ef 100644 --- a/Cxx11/prk_util.h +++ b/Cxx11/prk_util.h @@ -203,20 +203,17 @@ namespace prk { public: vector(size_t n) { - //this->data_ = new T[n]; this->data_ = prk::malloc(n); this->size_ = n; } vector(size_t n, T v) { - //this->data_ = new T[n]; this->data_ = prk::malloc(n); for (size_t i=0; idata_[i] = v; this->size_ = n; } ~vector() { - //delete[] this->data_; prk::free(this->data_); } @@ -258,6 +255,10 @@ namespace prk { return &(this->data_[this->size_]); } + void fill(T v) { + for (size_t i=0; isize_; ++i) this->data_[i] = v; + } + #if 0 T & begin() { return this->data_[0]; From 3d33f2a820218776601a2e099fd7d2cd312f3a92 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 21 Jul 2020 12:44:12 -0700 Subject: [PATCH 13/66] start decomposition --- Cxx11/transpose-multigpu-dpcpp.cc | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/Cxx11/transpose-multigpu-dpcpp.cc b/Cxx11/transpose-multigpu-dpcpp.cc index 215ae23d1..adf6c49b3 100644 --- a/Cxx11/transpose-multigpu-dpcpp.cc +++ b/Cxx11/transpose-multigpu-dpcpp.cc @@ -95,10 +95,6 @@ int main(int argc, char * argv[]) return 1; } - std::cout << "Number of iterations = " << iterations << std::endl; - std::cout << "Matrix order = " << order << std::endl; - std::cout << "Number of GPUs to use = " << use_ngpu << std::endl; - std::vector qs; auto platforms = sycl::platform::get_platforms(); @@ -132,6 +128,17 @@ int main(int argc, char * argv[]) int ngpus = use_ngpu; + if (order % ngpus != 0) { + std::cout << "ERROR: matrix order " << order << " should be divisible by # procs" << ngpus << std::endl; + return 2; + } + size_t block_order = order / ngpus; + + std::cout << "Number of GPUs to use = " << use_ngpu << std::endl; + std::cout << "Number of iterations = " << iterations << std::endl; + std::cout << "Matrix order = " << order << std::endl; + std::cout << "Block order = " << block_order << std::endl; + ////////////////////////////////////////////////////////////////////// // Allocate space for the input and transpose matrix ////////////////////////////////////////////////////////////////////// From 74e286bae457bd1b7eb7e5ef68e1f0eab1d0e21d Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 21 Jul 2020 12:44:52 -0700 Subject: [PATCH 14/66] device queues class and related methods --- Cxx11/prk_sycl.h | 150 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) diff --git a/Cxx11/prk_sycl.h b/Cxx11/prk_sycl.h index 2335c1c07..599d424c7 100644 --- a/Cxx11/prk_sycl.h +++ b/Cxx11/prk_sycl.h @@ -4,12 +4,17 @@ #include #include +//#include // std::distance +#include + #include "CL/sycl.hpp" #ifdef __COMPUTECPP__ #include "SYCL/experimental/usm.h" #endif +#include "prk_util.h" // prk::vector + namespace sycl = cl::sycl; #ifdef __COMPUTECPP__ @@ -83,6 +88,151 @@ namespace prk { #endif } + class queues { + + private: + std::vector list; + + public: + queues(bool use_cpu = true, bool use_gpu = true) + { + auto platforms = sycl::platform::get_platforms(); + for (auto & p : platforms) { + auto pname = p.get_info(); + std::cout << "*Platform: " << pname << std::endl; + if ( pname.find("Level-Zero") != std::string::npos) { + std::cout << "*Level Zero GPU skipped" << std::endl; + break; + } + if ( pname.find("Intel") == std::string::npos) { + std::cout << "*non-Intel skipped" << std::endl; + break; + } + auto devices = p.get_devices(); + for (auto & d : devices ) { + std::cout << "**Device: " << d.get_info() << std::endl; + if ( d.is_cpu() && use_cpu ) { + std::cout << "**Device is CPU - adding to vector of queues" << std::endl; + list.push_back(sycl::queue(d)); + } + if ( d.is_gpu() && use_gpu ) { + std::cout << "**Device is GPU - adding to vector of queues" << std::endl; + list.push_back(sycl::queue(d)); + } + } + } + } + + int size(void) + { + return list.size(); + } + + void wait(int i) + { + list[i].wait(); + } + + void waitall(void) + { + for (auto & i : list) { + i.wait(); + } + } + + template + void allocate(std::vector & device_pointers, + size_t num_elements) + { + std::cout << "allocate" << std::endl; + for (const auto & l : list | boost::adaptors::indexed(0) ) { + auto i = l.index(); + auto v = l.value(); + device_pointers[i] = syclx::malloc_device(num_elements, v); + std::cout << i << ": " << device_pointers[i] << ", " << num_elements << std::endl; + } + } + + template + void free(std::vector & device_pointers) + { + std::cout << "free" << std::endl; + for (const auto & l : list | boost::adaptors::indexed(0) ) { + auto i = l.index(); + auto v = l.value(); + syclx::free(device_pointers[i], v); + } + } + + template + void gather(T * host_pointer, + const std::vector & device_pointers, + size_t num_elements) + { + std::cout << "gather" << std::endl; + for (const auto & l : list | boost::adaptors::indexed(0) ) { + auto i = l.index(); + auto v = l.value(); + auto bytes = num_elements * sizeof(T); + auto target = &host_pointer[i * bytes]; + auto source = device_pointers[i]; + v.memcpy(target, source, bytes); + } + } + + template + void gather(prk::vector & host_pointer, + const std::vector & device_pointers, + size_t num_elements) + { + std::cout << "gather" << std::endl; + for (const auto & l : list | boost::adaptors::indexed(0) ) { + auto i = l.index(); + auto v = l.value(); + auto bytes = num_elements * sizeof(T); + auto target = &host_pointer[i * bytes]; + auto source = device_pointers[i]; + v.memcpy(target, source, bytes); + } + } + + template + void scatter(std::vector & device_pointers, + const T * host_pointer, + size_t num_elements) + { + std::cout << "scatter" << std::endl; + for (const auto & l : list | boost::adaptors::indexed(0) ) { + auto i = l.index(); + auto v = l.value(); + auto bytes = num_elements * sizeof(T); + auto target = device_pointers[i]; + auto source = &host_pointer[i * bytes]; + v.memcpy(target, source, bytes); + } + } + + template + void scatter(std::vector & device_pointers, + prk::vector & host_pointer, + size_t num_elements) + { + std::cout << "scatter" << std::endl; + for (const auto & l : list | boost::adaptors::indexed(0) ) { + auto i = l.index(); + auto v = l.value(); + auto bytes = num_elements * sizeof(T); + auto target = device_pointers[i]; + auto source = &host_pointer[i * bytes]; + std::cout << i << ": " << target << ", " << source << std::endl; + v.memcpy(target, source, bytes); + } + } + + + + }; + } // namespace SYCL } // namespace prk From da01de4ac8ef2f699ea62c8ee2352dfde4ddde2c Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 21 Jul 2020 12:45:07 -0700 Subject: [PATCH 15/66] use device queues stuff - currently broken --- Cxx11/nstream-multigpu-dpcpp.cc | 148 +++++++++++--------------------- 1 file changed, 50 insertions(+), 98 deletions(-) diff --git a/Cxx11/nstream-multigpu-dpcpp.cc b/Cxx11/nstream-multigpu-dpcpp.cc index 5a717683e..99f5d68a3 100644 --- a/Cxx11/nstream-multigpu-dpcpp.cc +++ b/Cxx11/nstream-multigpu-dpcpp.cc @@ -69,12 +69,14 @@ int main(int argc, char * argv[]) std::cout << "Parallel Research Kernels version " << PRKVERSION << std::endl; std::cout << "C++11/DPC++ STREAM triad: A = B + scalar * C" << std::endl; + auto qs = prk::SYCL::queues(); + ////////////////////////////////////////////////////////////////////// /// Read and test input parameters ////////////////////////////////////////////////////////////////////// int iterations; - size_t length; + size_t length, local_length; int use_ngpu = 1; try { if (argc < 3) { @@ -94,48 +96,34 @@ int main(int argc, char * argv[]) if (argc > 3) { use_ngpu = std::atoi(argv[3]); } + if ( use_ngpu > qs.size() ) { + std::string error = "You cannot use more devices (" + + std::to_string(use_ngpu) + + ") than you have (" + + std::to_string(qs.size()) + ")"; + throw error; + } + + if (length % use_ngpu != 0) { + std::string error = "ERROR: vector length (" + + std::to_string(length) + + ") should be divisible by # procs (" + + std::to_string(use_ngpu) + ")"; + throw error; + } + local_length = length / use_ngpu; } catch (const char * e) { std::cout << e << std::endl; return 1; } + std::cout << "Number of devices = " << use_ngpu << std::endl; std::cout << "Number of iterations = " << iterations << std::endl; std::cout << "Vector length = " << length << std::endl; - std::cout << "Number of GPUs to use = " << use_ngpu << std::endl; - - std::vector qs; - - auto platforms = sycl::platform::get_platforms(); - for (auto & p : platforms) { - auto pname = p.get_info(); - std::cout << "*Platform: " << pname << std::endl; - if ( pname.find("Level-Zero") != std::string::npos) { - std::cout << "*Level Zero GPU skipped" << std::endl; - break; - } - if ( pname.find("Intel") == std::string::npos) { - std::cout << "*non-Intel skipped" << std::endl; - break; - } - auto devices = p.get_devices(); - for (auto & d : devices ) { - std::cout << "**Device: " << d.get_info() << std::endl; - if ( d.is_gpu() || d.is_cpu() ) { - std::cout << "**Device is CPU or GPU - adding to vector of queues" << std::endl; - qs.push_back(sycl::queue(d)); - } - } - } + std::cout << "Vector length (local) = " << local_length << std::endl; - int haz_ngpu = qs.size(); - std::cout << "Number of CPUs and GPUs found = " << haz_ngpu << std::endl; - - if (use_ngpu > haz_ngpu) { - std::cout << "You cannot use more GPUs (" << use_ngpu << ") than you have (" << haz_ngpu << ")" << std::endl; - } - - int ngpus = use_ngpu; + int np = use_ngpu; ////////////////////////////////////////////////////////////////////// // Allocate space and perform the computation @@ -143,55 +131,26 @@ int main(int argc, char * argv[]) double nstream_time(0); - const size_t bytes = length * sizeof(double); + auto h_A = prk::vector(length, 0); + auto h_B = prk::vector(length, 2); + auto h_C = prk::vector(length, 2); - auto h_A = prk::vector(length); - auto h_B = prk::vector(length); - auto h_C = prk::vector(length); + auto d_A = std::vector (np, nullptr); + auto d_B = std::vector (np, nullptr); + auto d_C = std::vector (np, nullptr); - for (size_t i=0; i(d_A, local_length); + qs.allocate(d_B, local_length); + qs.allocate(d_C, local_length); + qs.waitall(); - std::vector ls(ngpus,0); - { - const size_t elements_per_gpu = prk::divceil(length, ngpus); - for (int g=0; g length) { - ls[ngpus-1] = length - (ngpus-1) * elements_per_gpu; - } - } - - auto d_A = std::vector (ngpus, nullptr); - auto d_B = std::vector (ngpus, nullptr); - auto d_C = std::vector (ngpus, nullptr); - - for (int g=0; g(local_length, q); - d_B[g] = syclx::malloc_device(local_length, q); - d_C[g] = syclx::malloc_device(local_length, q); - q.wait(); - - const size_t start = (g>0) ? ls[g-1] : 0; - const size_t size = ls[g] * sizeof(double); - q.memcpy(d_A[g], &(h_A[start]), size); - q.memcpy(d_B[g], &(h_B[start]), size); - q.memcpy(d_C[g], &(h_C[start]), size); - q.wait(); - } + qs.scatter(d_A, h_A, local_length); + qs.scatter(d_B, h_B, local_length); + qs.scatter(d_C, h_C, local_length); + qs.waitall(); - for (size_t i=0; i{size}, [=] (sycl::id<1> i) { @@ -214,27 +174,19 @@ int main(int argc, char * argv[]) }); }); } - for (auto & q : qs) { - q.wait(); - } + qs.waitall(); +#endif } nstream_time = prk::wtime() - nstream_time; } - for (int g=0; g(h_A, d_A, local_length); + qs.waitall(); - const size_t start = (g>0) ? ls[g-1] : 0; - const size_t size = ls[g] * sizeof(double); - - q.memcpy(&(h_A[start]), d_A[g], size); - q.wait(); - - syclx::free(d_C[g], q); - syclx::free(d_B[g], q); - syclx::free(d_A[g], q); - q.wait(); - } + qs.free(d_A); + qs.free(d_B); + qs.free(d_C); + qs.waitall(); ////////////////////////////////////////////////////////////////////// /// Analyze and output results From cc311875b89a5205fe78055690947ed5be29dea2 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 21 Jul 2020 14:13:26 -0700 Subject: [PATCH 16/66] trying to fix --- Cxx11/nstream-multigpu-dpcpp.cc | 119 ++++++++++++++++++++++++-------- 1 file changed, 89 insertions(+), 30 deletions(-) diff --git a/Cxx11/nstream-multigpu-dpcpp.cc b/Cxx11/nstream-multigpu-dpcpp.cc index 99f5d68a3..d06cee517 100644 --- a/Cxx11/nstream-multigpu-dpcpp.cc +++ b/Cxx11/nstream-multigpu-dpcpp.cc @@ -69,7 +69,32 @@ int main(int argc, char * argv[]) std::cout << "Parallel Research Kernels version " << PRKVERSION << std::endl; std::cout << "C++11/DPC++ STREAM triad: A = B + scalar * C" << std::endl; - auto qs = prk::SYCL::queues(); + std::vector qs; + + auto platforms = sycl::platform::get_platforms(); + for (auto & p : platforms) { + auto pname = p.get_info(); + std::cout << "*Platform: " << pname << std::endl; + if ( pname.find("Level-Zero") != std::string::npos) { + std::cout << "*Level Zero GPU skipped" << std::endl; + break; + } + if ( pname.find("Intel") == std::string::npos) { + std::cout << "*non-Intel skipped" << std::endl; + break; + } + auto devices = p.get_devices(); + for (auto & d : devices ) { + std::cout << "**Device: " << d.get_info() << std::endl; + if ( d.is_gpu() || d.is_cpu() ) { + std::cout << "**Device is CPU or GPU - adding to vector of queues" << std::endl; + qs.push_back(sycl::queue(d)); + } + } + } + + int haz_ngpu = qs.size(); + std::cout << "Number of CPUs and GPUs found = " << haz_ngpu << std::endl; ////////////////////////////////////////////////////////////////////// /// Read and test input parameters @@ -123,7 +148,7 @@ int main(int argc, char * argv[]) std::cout << "Vector length = " << length << std::endl; std::cout << "Vector length (local) = " << local_length << std::endl; - int np = use_ngpu; + int ngpus = use_ngpu; ////////////////////////////////////////////////////////////////////// // Allocate space and perform the computation @@ -131,26 +156,53 @@ int main(int argc, char * argv[]) double nstream_time(0); - auto h_A = prk::vector(length, 0); - auto h_B = prk::vector(length, 2); - auto h_C = prk::vector(length, 2); + auto h_A = prk::vector(length); + auto h_B = prk::vector(length); + auto h_C = prk::vector(length); + + for (size_t i=0; i ls(ngpus,0); + { + const size_t elements_per_gpu = prk::divceil(length, ngpus); + for (int g=0; g length) { + ls[ngpus-1] = length - (ngpus-1) * elements_per_gpu; + } + } + + auto d_A = std::vector (ngpus, nullptr); + auto d_B = std::vector (ngpus, nullptr); + auto d_C = std::vector (ngpus, nullptr); - auto d_A = std::vector (np, nullptr); - auto d_B = std::vector (np, nullptr); - auto d_C = std::vector (np, nullptr); + for (int g=0; g(d_A, local_length); - qs.allocate(d_B, local_length); - qs.allocate(d_C, local_length); - qs.waitall(); + const auto local_length = ls[g]; + const auto local_bytes = local_length * sizeof(double); - qs.scatter(d_A, h_A, local_length); - qs.scatter(d_B, h_B, local_length); - qs.scatter(d_C, h_C, local_length); - qs.waitall(); + d_A[g] = syclx::malloc_device(local_length, q); + d_B[g] = syclx::malloc_device(local_length, q); + d_C[g] = syclx::malloc_device(local_length, q); + q.wait(); - // overwrite host buffer with garbage to detect bugs - h_A.fill(-77777777); + const size_t start = (g>0) ? ls[g-1] : 0; + const size_t size = ls[g] * sizeof(double); + q.memcpy(d_A[g], &(h_A[start]), size); + q.memcpy(d_B[g], &(h_B[start]), size); + q.memcpy(d_C[g], &(h_C[start]), size); + q.wait(); + } + + for (size_t i=0; i{size}, [=] (sycl::id<1> i) { @@ -174,19 +225,27 @@ int main(int argc, char * argv[]) }); }); } - qs.waitall(); -#endif + for (auto & q : qs) { + q.wait(); + } } nstream_time = prk::wtime() - nstream_time; } - qs.gather(h_A, d_A, local_length); - qs.waitall(); + for (int g=0; g0) ? ls[g-1] : 0; + const size_t size = ls[g] * sizeof(double); - qs.free(d_A); - qs.free(d_B); - qs.free(d_C); - qs.waitall(); + q.memcpy(&(h_A[start]), d_A[g], size); + q.wait(); + + syclx::free(d_C[g], q); + syclx::free(d_B[g], q); + syclx::free(d_A[g], q); + q.wait(); + } ////////////////////////////////////////////////////////////////////// /// Analyze and output results From 18931c4d029aa4eb84bc578b2c8ee2957cb6da45 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 21 Jul 2020 14:13:48 -0700 Subject: [PATCH 17/66] trying to fix --- Cxx11/nstream-multigpu-dpcpp.cc | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/Cxx11/nstream-multigpu-dpcpp.cc b/Cxx11/nstream-multigpu-dpcpp.cc index d06cee517..391b417f5 100644 --- a/Cxx11/nstream-multigpu-dpcpp.cc +++ b/Cxx11/nstream-multigpu-dpcpp.cc @@ -166,16 +166,7 @@ int main(int argc, char * argv[]) h_C[i] = 2; } - std::vector ls(ngpus,0); - { - const size_t elements_per_gpu = prk::divceil(length, ngpus); - for (int g=0; g length) { - ls[ngpus-1] = length - (ngpus-1) * elements_per_gpu; - } - } + std::vector ls(ngpus,local_length); auto d_A = std::vector (ngpus, nullptr); auto d_B = std::vector (ngpus, nullptr); From 5351212238e8456dde8b17366bfeafd78274fd11 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 21 Jul 2020 14:15:44 -0700 Subject: [PATCH 18/66] trying to fix --- Cxx11/nstream-multigpu-dpcpp.cc | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/Cxx11/nstream-multigpu-dpcpp.cc b/Cxx11/nstream-multigpu-dpcpp.cc index 391b417f5..b2a981b30 100644 --- a/Cxx11/nstream-multigpu-dpcpp.cc +++ b/Cxx11/nstream-multigpu-dpcpp.cc @@ -156,15 +156,9 @@ int main(int argc, char * argv[]) double nstream_time(0); - auto h_A = prk::vector(length); - auto h_B = prk::vector(length); - auto h_C = prk::vector(length); - - for (size_t i=0; i(length, 0); + auto h_B = prk::vector(length, 2); + auto h_C = prk::vector(length, 2); std::vector ls(ngpus,local_length); @@ -225,13 +219,14 @@ int main(int argc, char * argv[]) for (int g=0; g0) ? ls[g-1] : 0; const size_t size = ls[g] * sizeof(double); - q.memcpy(&(h_A[start]), d_A[g], size); q.wait(); + } + for (int g=0; g Date: Tue, 21 Jul 2020 15:36:08 -0700 Subject: [PATCH 19/66] working with inlined methods --- Cxx11/nstream-multigpu-dpcpp.cc | 114 ++++++++++++++++++++++++++------ 1 file changed, 94 insertions(+), 20 deletions(-) diff --git a/Cxx11/nstream-multigpu-dpcpp.cc b/Cxx11/nstream-multigpu-dpcpp.cc index b2a981b30..716615044 100644 --- a/Cxx11/nstream-multigpu-dpcpp.cc +++ b/Cxx11/nstream-multigpu-dpcpp.cc @@ -148,7 +148,7 @@ int main(int argc, char * argv[]) std::cout << "Vector length = " << length << std::endl; std::cout << "Vector length (local) = " << local_length << std::endl; - int ngpus = use_ngpu; + int np = use_ngpu; ////////////////////////////////////////////////////////////////////// // Allocate space and perform the computation @@ -160,49 +160,96 @@ int main(int argc, char * argv[]) auto h_B = prk::vector(length, 2); auto h_C = prk::vector(length, 2); - std::vector ls(ngpus,local_length); + auto d_A = std::vector (np, nullptr); + auto d_B = std::vector (np, nullptr); + auto d_C = std::vector (np, nullptr); - auto d_A = std::vector (ngpus, nullptr); - auto d_B = std::vector (ngpus, nullptr); - auto d_C = std::vector (ngpus, nullptr); - - for (int g=0; g(local_length, q); d_B[g] = syclx::malloc_device(local_length, q); d_C[g] = syclx::malloc_device(local_length, q); q.wait(); + } +#else + auto list(qs); + for (const auto & l : list | boost::adaptors::indexed(0) ) { + auto i = l.index(); + auto v = l.value(); + d_A[i] = syclx::malloc_device(local_length, v); + d_B[i] = syclx::malloc_device(local_length, v); + d_C[i] = syclx::malloc_device(local_length, v); + } + for (auto & i : list) { + i.wait(); + } +#endif - const size_t start = (g>0) ? ls[g-1] : 0; - const size_t size = ls[g] * sizeof(double); +#if 0 + for (int g=0; g{size}, [=] (sycl::id<1> i) { @@ -217,19 +264,46 @@ int main(int argc, char * argv[]) nstream_time = prk::wtime() - nstream_time; } - for (int g=0; g ls(np,local_length); + for (int g=0; g0) ? ls[g-1] : 0; const size_t size = ls[g] * sizeof(double); q.memcpy(&(h_A[start]), d_A[g], size); + } +#else + for (const auto & l : list | boost::adaptors::indexed(0) ) { + auto i = l.index(); + auto v = l.value(); + auto bytes = local_length * sizeof(double); + auto start = local_length * i; + auto target = &h_A[i * local_length]; + auto source = d_A[i]; + v.memcpy(target, source, bytes); + } +#endif + for (auto & q : qs) { q.wait(); } - for (int g=0; g Date: Tue, 21 Jul 2020 15:36:17 -0700 Subject: [PATCH 20/66] working with inlined methods --- Cxx11/nstream-multigpu-dpcpp.cc | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/Cxx11/nstream-multigpu-dpcpp.cc b/Cxx11/nstream-multigpu-dpcpp.cc index 716615044..3f9bc9489 100644 --- a/Cxx11/nstream-multigpu-dpcpp.cc +++ b/Cxx11/nstream-multigpu-dpcpp.cc @@ -264,15 +264,6 @@ int main(int argc, char * argv[]) nstream_time = prk::wtime() - nstream_time; } -#if 0 - std::vector ls(np,local_length); - for (int g=0; g0) ? ls[g-1] : 0; - const size_t size = ls[g] * sizeof(double); - q.memcpy(&(h_A[start]), d_A[g], size); - } -#else for (const auto & l : list | boost::adaptors::indexed(0) ) { auto i = l.index(); auto v = l.value(); @@ -282,7 +273,6 @@ int main(int argc, char * argv[]) auto source = d_A[i]; v.memcpy(target, source, bytes); } -#endif for (auto & q : qs) { q.wait(); } From f2c874e54d89500ca728b96d4d259b1d458957bb Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 21 Jul 2020 15:39:15 -0700 Subject: [PATCH 21/66] cleaned up --- Cxx11/nstream-multigpu-dpcpp.cc | 43 +-------------------------------- 1 file changed, 1 insertion(+), 42 deletions(-) diff --git a/Cxx11/nstream-multigpu-dpcpp.cc b/Cxx11/nstream-multigpu-dpcpp.cc index 3f9bc9489..876c07d6b 100644 --- a/Cxx11/nstream-multigpu-dpcpp.cc +++ b/Cxx11/nstream-multigpu-dpcpp.cc @@ -164,16 +164,6 @@ int main(int argc, char * argv[]) auto d_B = std::vector (np, nullptr); auto d_C = std::vector (np, nullptr); -#if 0 - for (int g=0; g(local_length, q); - d_B[g] = syclx::malloc_device(local_length, q); - d_C[g] = syclx::malloc_device(local_length, q); - q.wait(); - } -#else auto list(qs); for (const auto & l : list | boost::adaptors::indexed(0) ) { auto i = l.index(); @@ -185,28 +175,7 @@ int main(int argc, char * argv[]) for (auto & i : list) { i.wait(); } -#endif - -#if 0 - for (int g=0; g Date: Tue, 21 Jul 2020 15:39:18 -0700 Subject: [PATCH 22/66] cleaned up --- Cxx11/prk_sycl.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Cxx11/prk_sycl.h b/Cxx11/prk_sycl.h index 599d424c7..94f8de9d7 100644 --- a/Cxx11/prk_sycl.h +++ b/Cxx11/prk_sycl.h @@ -164,6 +164,7 @@ namespace prk { } } +#if 0 template void gather(T * host_pointer, const std::vector & device_pointers, @@ -179,6 +180,7 @@ namespace prk { v.memcpy(target, source, bytes); } } +#endif template void gather(prk::vector & host_pointer, @@ -196,6 +198,7 @@ namespace prk { } } +#if 0 template void scatter(std::vector & device_pointers, const T * host_pointer, @@ -211,6 +214,7 @@ namespace prk { v.memcpy(target, source, bytes); } } +#endif template void scatter(std::vector & device_pointers, From a0ac723faafc19bfcb202fe955698ce050306457 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 21 Jul 2020 16:49:49 -0700 Subject: [PATCH 23/66] fixed bugs --- Cxx11/prk_sycl.h | 50 ++++++------------------------------------------ 1 file changed, 6 insertions(+), 44 deletions(-) diff --git a/Cxx11/prk_sycl.h b/Cxx11/prk_sycl.h index 94f8de9d7..ec61f0679 100644 --- a/Cxx11/prk_sycl.h +++ b/Cxx11/prk_sycl.h @@ -140,23 +140,24 @@ namespace prk { } } + sycl::queue queue(int i) { + return this->list[i]; + } + template void allocate(std::vector & device_pointers, size_t num_elements) { - std::cout << "allocate" << std::endl; for (const auto & l : list | boost::adaptors::indexed(0) ) { auto i = l.index(); auto v = l.value(); device_pointers[i] = syclx::malloc_device(num_elements, v); - std::cout << i << ": " << device_pointers[i] << ", " << num_elements << std::endl; } } template void free(std::vector & device_pointers) { - std::cout << "free" << std::endl; for (const auto & l : list | boost::adaptors::indexed(0) ) { auto i = l.index(); auto v = l.value(); @@ -164,71 +165,32 @@ namespace prk { } } -#if 0 - template - void gather(T * host_pointer, - const std::vector & device_pointers, - size_t num_elements) - { - std::cout << "gather" << std::endl; - for (const auto & l : list | boost::adaptors::indexed(0) ) { - auto i = l.index(); - auto v = l.value(); - auto bytes = num_elements * sizeof(T); - auto target = &host_pointer[i * bytes]; - auto source = device_pointers[i]; - v.memcpy(target, source, bytes); - } - } -#endif - template void gather(prk::vector & host_pointer, const std::vector & device_pointers, size_t num_elements) { - std::cout << "gather" << std::endl; for (const auto & l : list | boost::adaptors::indexed(0) ) { auto i = l.index(); auto v = l.value(); auto bytes = num_elements * sizeof(T); - auto target = &host_pointer[i * bytes]; + auto target = &host_pointer[i * num_elements]; auto source = device_pointers[i]; v.memcpy(target, source, bytes); } } -#if 0 - template - void scatter(std::vector & device_pointers, - const T * host_pointer, - size_t num_elements) - { - std::cout << "scatter" << std::endl; - for (const auto & l : list | boost::adaptors::indexed(0) ) { - auto i = l.index(); - auto v = l.value(); - auto bytes = num_elements * sizeof(T); - auto target = device_pointers[i]; - auto source = &host_pointer[i * bytes]; - v.memcpy(target, source, bytes); - } - } -#endif - template void scatter(std::vector & device_pointers, prk::vector & host_pointer, size_t num_elements) { - std::cout << "scatter" << std::endl; for (const auto & l : list | boost::adaptors::indexed(0) ) { auto i = l.index(); auto v = l.value(); auto bytes = num_elements * sizeof(T); auto target = device_pointers[i]; - auto source = &host_pointer[i * bytes]; - std::cout << i << ": " << target << ", " << source << std::endl; + auto source = &host_pointer[i * num_elements]; v.memcpy(target, source, bytes); } } From fe2d82926601310458d18bc921377391f32eec0e Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 21 Jul 2020 16:49:58 -0700 Subject: [PATCH 24/66] working but to be replaced --- Cxx11/nstream-multigpu-dpcpp.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/Cxx11/nstream-multigpu-dpcpp.cc b/Cxx11/nstream-multigpu-dpcpp.cc index 876c07d6b..2ab474853 100644 --- a/Cxx11/nstream-multigpu-dpcpp.cc +++ b/Cxx11/nstream-multigpu-dpcpp.cc @@ -180,7 +180,6 @@ int main(int argc, char * argv[]) auto i = l.index(); auto v = l.value(); auto bytes = local_length * sizeof(double); - auto start = local_length * i; { auto target = d_A[i]; auto source = &h_A[i * local_length]; @@ -236,7 +235,6 @@ int main(int argc, char * argv[]) auto i = l.index(); auto v = l.value(); auto bytes = local_length * sizeof(double); - auto start = local_length * i; auto target = &h_A[i * local_length]; auto source = d_A[i]; v.memcpy(target, source, bytes); From 6239694e60c1b369ba39f1bb6b3b5e5b9a26e433 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 21 Jul 2020 16:50:50 -0700 Subject: [PATCH 25/66] working --- Cxx11/nstream-multigpu-dpcpp.cc | 104 ++++++-------------------------- 1 file changed, 19 insertions(+), 85 deletions(-) diff --git a/Cxx11/nstream-multigpu-dpcpp.cc b/Cxx11/nstream-multigpu-dpcpp.cc index 2ab474853..3de7a5d01 100644 --- a/Cxx11/nstream-multigpu-dpcpp.cc +++ b/Cxx11/nstream-multigpu-dpcpp.cc @@ -69,32 +69,7 @@ int main(int argc, char * argv[]) std::cout << "Parallel Research Kernels version " << PRKVERSION << std::endl; std::cout << "C++11/DPC++ STREAM triad: A = B + scalar * C" << std::endl; - std::vector qs; - - auto platforms = sycl::platform::get_platforms(); - for (auto & p : platforms) { - auto pname = p.get_info(); - std::cout << "*Platform: " << pname << std::endl; - if ( pname.find("Level-Zero") != std::string::npos) { - std::cout << "*Level Zero GPU skipped" << std::endl; - break; - } - if ( pname.find("Intel") == std::string::npos) { - std::cout << "*non-Intel skipped" << std::endl; - break; - } - auto devices = p.get_devices(); - for (auto & d : devices ) { - std::cout << "**Device: " << d.get_info() << std::endl; - if ( d.is_gpu() || d.is_cpu() ) { - std::cout << "**Device is CPU or GPU - adding to vector of queues" << std::endl; - qs.push_back(sycl::queue(d)); - } - } - } - - int haz_ngpu = qs.size(); - std::cout << "Number of CPUs and GPUs found = " << haz_ngpu << std::endl; + auto qs = prk::SYCL::queues(); ////////////////////////////////////////////////////////////////////// /// Read and test input parameters @@ -164,41 +139,15 @@ int main(int argc, char * argv[]) auto d_B = std::vector (np, nullptr); auto d_C = std::vector (np, nullptr); - auto list(qs); - for (const auto & l : list | boost::adaptors::indexed(0) ) { - auto i = l.index(); - auto v = l.value(); - d_A[i] = syclx::malloc_device(local_length, v); - d_B[i] = syclx::malloc_device(local_length, v); - d_C[i] = syclx::malloc_device(local_length, v); - } - for (auto & i : list) { - i.wait(); - } + qs.allocate(d_A, local_length); + qs.allocate(d_B, local_length); + qs.allocate(d_C, local_length); + qs.waitall(); - for (const auto & l : list | boost::adaptors::indexed(0) ) { - auto i = l.index(); - auto v = l.value(); - auto bytes = local_length * sizeof(double); - { - auto target = d_A[i]; - auto source = &h_A[i * local_length]; - v.memcpy(target, source, bytes); - } - { - auto target = d_B[i]; - auto source = &h_B[i * local_length]; - v.memcpy(target, source, bytes); - } - { - auto target = d_C[i]; - auto source = &h_C[i * local_length]; - v.memcpy(target, source, bytes); - } - } - for (auto & i : list) { - i.wait(); - } + qs.scatter(d_A, h_A, local_length); + qs.scatter(d_B, h_B, local_length); + qs.scatter(d_C, h_C, local_length); + qs.waitall(); // overwrite host buffer with garbage to detect bugs h_A.fill(-77777777); @@ -209,8 +158,9 @@ int main(int argc, char * argv[]) if (iter==1) nstream_time = prk::wtime(); +#if 1 for (int g=0; g(h_A, d_A, local_length); + qs.waitall(); - for (const auto & l : list | boost::adaptors::indexed(0) ) { - auto i = l.index(); - auto v = l.value(); - syclx::free(d_A[i], v); - syclx::free(d_B[i], v); - syclx::free(d_C[i], v); - } - for (auto & q : qs) { - q.wait(); - } + qs.free(d_A); + qs.free(d_B); + qs.free(d_C); + qs.waitall(); ////////////////////////////////////////////////////////////////////// /// Analyze and output results From d299f957506126f9b49b894f55f1c6b6781b18fc Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 22 Jul 2020 08:46:23 -0700 Subject: [PATCH 26/66] fix input helper comment --- Cxx11/nstream-dpcpp.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cxx11/nstream-dpcpp.cc b/Cxx11/nstream-dpcpp.cc index e72a14cd5..268ab4b23 100644 --- a/Cxx11/nstream-dpcpp.cc +++ b/Cxx11/nstream-dpcpp.cc @@ -77,7 +77,7 @@ int main(int argc, char * argv[]) size_t length; try { if (argc < 3) { - throw "Usage: <# iterations> []"; + throw "Usage: <# iterations> "; } iterations = std::atoi(argv[1]); From e7bb773aef4f5e72e3d29d9955a70bc31158599d Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 22 Jul 2020 09:02:37 -0700 Subject: [PATCH 27/66] add broadcast and reduce (unused and untested) --- Cxx11/prk_sycl.h | 45 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/Cxx11/prk_sycl.h b/Cxx11/prk_sycl.h index ec61f0679..6e4356ab6 100644 --- a/Cxx11/prk_sycl.h +++ b/Cxx11/prk_sycl.h @@ -165,8 +165,44 @@ namespace prk { } } - template - void gather(prk::vector & host_pointer, + // UNUSED and UNTESTED + template + void broadcast(std::vector & device_pointers, + const B & host_pointer, + size_t num_elements) + { + for (const auto & l : list | boost::adaptors::indexed(0) ) { + auto i = l.index(); + auto v = l.value(); + auto bytes = num_elements * sizeof(T); + auto target = device_pointers[i]; + auto source = &host_pointer[0]; + v.memcpy(target, source, bytes); + } + } + + // UNUSED and UNTESTED + template + void reduce(B & host_pointer, + const std::vector & device_pointers, + size_t num_elements) + { + auto temp = std::vector(num_elements, 0); + for (const auto & l : list | boost::adaptors::indexed(0) ) { + auto i = l.index(); + auto v = l.value(); + auto bytes = num_elements * sizeof(T); + auto target = device_pointers[i]; + auto source = &host_pointer[0]; + v.memcpy(temp, source, bytes); + for (size_t e=0; e + void gather(B & host_pointer, const std::vector & device_pointers, size_t num_elements) { @@ -180,9 +216,9 @@ namespace prk { } } - template + template void scatter(std::vector & device_pointers, - prk::vector & host_pointer, + const B & host_pointer, size_t num_elements) { for (const auto & l : list | boost::adaptors::indexed(0) ) { @@ -196,7 +232,6 @@ namespace prk { } - }; } // namespace SYCL From bc0ce206ca26b0416693f1d714e934c43ba54f92 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 23 Jun 2020 09:29:01 -0700 Subject: [PATCH 28/66] add stencil DPC++ skeletons --- Cxx11/Makefile | 2 +- Cxx11/stencil-dpcpp.cc | 229 ++++++++++++++++++++++++++++++++ Cxx11/stencil-multigpu-dpcpp.cc | 229 ++++++++++++++++++++++++++++++++ 3 files changed, 459 insertions(+), 1 deletion(-) create mode 100644 Cxx11/stencil-dpcpp.cc create mode 100644 Cxx11/stencil-multigpu-dpcpp.cc diff --git a/Cxx11/Makefile b/Cxx11/Makefile index f0624da81..6c0e08e3e 100644 --- a/Cxx11/Makefile +++ b/Cxx11/Makefile @@ -97,7 +97,7 @@ sycl-usm: nstream-sycl-usm nstream-sycl-explicit-usm stencil-sycl-usm transpose- sycl-explicit: nstream-sycl-explicit transpose-sycl-explicit -dpcpp: nstream-dpcpp nstream-multigpu-dpcpp transpose-dpcpp +dpcpp: nstream-dpcpp nstream-multigpu-dpcpp stencil-dpcpp stencil-multigpu-dpcpp transpose-dpcpp tbb: p2p-innerloop-tbb p2p-tbb stencil-tbb transpose-tbb nstream-tbb \ p2p-hyperplane-tbb p2p-tasks-tbb diff --git a/Cxx11/stencil-dpcpp.cc b/Cxx11/stencil-dpcpp.cc new file mode 100644 index 000000000..0dbb5764d --- /dev/null +++ b/Cxx11/stencil-dpcpp.cc @@ -0,0 +1,229 @@ + +/// +/// Copyright (c) 2013, Intel Corporation +/// +/// Redistribution and use in source and binary forms, with or without +/// modification, are permitted provided that the following conditions +/// are met: +/// +/// * Redistributions of source code must retain the above copyright +/// notice, this list of conditions and the following disclaimer. +/// * Redistributions in binary form must reproduce the above +/// copyright notice, this list of conditions and the following +/// disclaimer in the documentation and/or other materials provided +/// with the distribution. +/// * Neither the name of Intel Corporation nor the names of its +/// contributors may be used to endorse or promote products +/// derived from this software without specific prior written +/// permission. +/// +/// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +/// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +/// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +/// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +/// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +/// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +/// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +/// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +/// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +/// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +/// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +/// POSSIBILITY OF SUCH DAMAGE. + +////////////////////////////////////////////////////////////////////// +/// +/// NAME: Stencil +/// +/// PURPOSE: This program tests the efficiency with which a space-invariant, +/// linear, symmetric filter (stencil) can be applied to a square +/// grid or image. +/// +/// USAGE: The program takes as input the linear +/// dimension of the grid, and the number of iterations on the grid +/// +/// +/// +/// The output consists of diagnostics to make sure the +/// algorithm worked, and of timing statistics. +/// +/// FUNCTIONS CALLED: +/// +/// Other than standard C functions, the following functions are used in +/// this program: +/// wtime() +/// +/// HISTORY: - Written by Rob Van der Wijngaart, February 2009. +/// - RvdW: Removed unrolling pragmas for clarity; +/// added constant to array "in" at end of each iteration to force +/// refreshing of neighbor data in parallel versions; August 2013 +/// C++11-ification by Jeff Hammond, May 2017. +/// +////////////////////////////////////////////////////////////////////// + +#include "prk_util.h" +#include "stencil_seq.hpp" + +void nothing(const int n, const int t, prk::vector & in, prk::vector & out) +{ + std::cout << "You are trying to use a stencil that does not exist.\n"; + std::cout << "Please generate the new stencil using the code generator\n"; + std::cout << "and add it to the case-switch in the driver." << std::endl; + // n will never be zero - this is to silence compiler warnings. + if (n==0 || t==0) std::cout << in.size() << out.size() << std::endl; + std::abort(); +} + +int main(int argc, char* argv[]) +{ + std::cout << "Parallel Research Kernels version " << PRKVERSION << std::endl; + std::cout << "C++11 Stencil execution on 2D grid" << std::endl; + + ////////////////////////////////////////////////////////////////////// + // Process and test input parameters + ////////////////////////////////////////////////////////////////////// + + int iterations, n, radius, tile_size; + bool star = true; + try { + if (argc < 3) { + throw "Usage: <# iterations> [ ]"; + } + + // number of times to run the algorithm + iterations = std::atoi(argv[1]); + if (iterations < 1) { + throw "ERROR: iterations must be >= 1"; + } + + // linear grid dimension + n = std::atoi(argv[2]); + if (n < 1) { + throw "ERROR: grid dimension must be positive"; + } else if (n > prk::get_max_matrix_size()) { + throw "ERROR: grid dimension too large - overflow risk"; + } + + // default tile size for tiling of local transpose + tile_size = 32; + if (argc > 3) { + tile_size = std::atoi(argv[3]); + if (tile_size <= 0) tile_size = n; + if (tile_size > n) tile_size = n; + } + + // stencil pattern + if (argc > 4) { + auto stencil = std::string(argv[4]); + auto grid = std::string("grid"); + star = (stencil == grid) ? false : true; + } + + // stencil radius + radius = 2; + if (argc > 5) { + radius = std::atoi(argv[5]); + } + + if ( (radius < 1) || (2*radius+1 > n) ) { + throw "ERROR: Stencil radius negative or too large"; + } + } + catch (const char * e) { + std::cout << e << std::endl; + return 1; + } + + std::cout << "Number of iterations = " << iterations << std::endl; + std::cout << "Grid size = " << n << std::endl; + std::cout << "Tile size = " << tile_size << std::endl; + std::cout << "Type of stencil = " << (star ? "star" : "grid") << std::endl; + std::cout << "Radius of stencil = " << radius << std::endl; + + auto stencil = nothing; + if (star) { + switch (radius) { + case 1: stencil = star1; break; + case 2: stencil = star2; break; + case 3: stencil = star3; break; + case 4: stencil = star4; break; + case 5: stencil = star5; break; + } + } else { + switch (radius) { + case 1: stencil = grid1; break; + case 2: stencil = grid2; break; + case 3: stencil = grid3; break; + case 4: stencil = grid4; break; + case 5: stencil = grid5; break; + } + } + + ////////////////////////////////////////////////////////////////////// + // Allocate space and perform the computation + ////////////////////////////////////////////////////////////////////// + + auto stencil_time = 0.0; + + prk::vector in(n*n); + prk::vector out(n*n); + + { + for (int it=0; it(i+j); + out[i*n+j] = 0.0; + } + } + } + } + + for (int iter = 0; iter<=iterations; iter++) { + + if (iter==1) stencil_time = prk::wtime(); + // Apply the stencil operator + stencil(n, tile_size, in, out); + // Add constant to solution to force refresh of neighbor data, if any + std::transform(in.begin(), in.end(), in.begin(), [](double c) { return c+=1.0; }); + } + stencil_time = prk::wtime() - stencil_time; + } + + ////////////////////////////////////////////////////////////////////// + // Analyze and output results. + ////////////////////////////////////////////////////////////////////// + + // interior of grid with respect to stencil + size_t active_points = static_cast(n-2*radius)*static_cast(n-2*radius); + double norm = 0.0; + for (int i=radius; i epsilon) { + std::cout << "ERROR: L1 norm = " << norm + << " Reference L1 norm = " << reference_norm << std::endl; + return 1; + } else { + std::cout << "Solution validates" << std::endl; +#ifdef VERBOSE + std::cout << "L1 norm = " << norm + << " Reference L1 norm = " << reference_norm << std::endl; +#endif + const int stencil_size = star ? 4*radius+1 : (2*radius+1)*(2*radius+1); + size_t flops = (2L*(size_t)stencil_size+1L) * active_points; + auto avgtime = stencil_time/iterations; + std::cout << "Rate (MFlops/s): " << 1.0e-6 * static_cast(flops)/avgtime + << " Avg time (s): " << avgtime << std::endl; + } + + return 0; +} diff --git a/Cxx11/stencil-multigpu-dpcpp.cc b/Cxx11/stencil-multigpu-dpcpp.cc new file mode 100644 index 000000000..0dbb5764d --- /dev/null +++ b/Cxx11/stencil-multigpu-dpcpp.cc @@ -0,0 +1,229 @@ + +/// +/// Copyright (c) 2013, Intel Corporation +/// +/// Redistribution and use in source and binary forms, with or without +/// modification, are permitted provided that the following conditions +/// are met: +/// +/// * Redistributions of source code must retain the above copyright +/// notice, this list of conditions and the following disclaimer. +/// * Redistributions in binary form must reproduce the above +/// copyright notice, this list of conditions and the following +/// disclaimer in the documentation and/or other materials provided +/// with the distribution. +/// * Neither the name of Intel Corporation nor the names of its +/// contributors may be used to endorse or promote products +/// derived from this software without specific prior written +/// permission. +/// +/// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +/// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +/// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +/// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +/// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +/// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +/// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +/// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +/// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +/// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +/// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +/// POSSIBILITY OF SUCH DAMAGE. + +////////////////////////////////////////////////////////////////////// +/// +/// NAME: Stencil +/// +/// PURPOSE: This program tests the efficiency with which a space-invariant, +/// linear, symmetric filter (stencil) can be applied to a square +/// grid or image. +/// +/// USAGE: The program takes as input the linear +/// dimension of the grid, and the number of iterations on the grid +/// +/// +/// +/// The output consists of diagnostics to make sure the +/// algorithm worked, and of timing statistics. +/// +/// FUNCTIONS CALLED: +/// +/// Other than standard C functions, the following functions are used in +/// this program: +/// wtime() +/// +/// HISTORY: - Written by Rob Van der Wijngaart, February 2009. +/// - RvdW: Removed unrolling pragmas for clarity; +/// added constant to array "in" at end of each iteration to force +/// refreshing of neighbor data in parallel versions; August 2013 +/// C++11-ification by Jeff Hammond, May 2017. +/// +////////////////////////////////////////////////////////////////////// + +#include "prk_util.h" +#include "stencil_seq.hpp" + +void nothing(const int n, const int t, prk::vector & in, prk::vector & out) +{ + std::cout << "You are trying to use a stencil that does not exist.\n"; + std::cout << "Please generate the new stencil using the code generator\n"; + std::cout << "and add it to the case-switch in the driver." << std::endl; + // n will never be zero - this is to silence compiler warnings. + if (n==0 || t==0) std::cout << in.size() << out.size() << std::endl; + std::abort(); +} + +int main(int argc, char* argv[]) +{ + std::cout << "Parallel Research Kernels version " << PRKVERSION << std::endl; + std::cout << "C++11 Stencil execution on 2D grid" << std::endl; + + ////////////////////////////////////////////////////////////////////// + // Process and test input parameters + ////////////////////////////////////////////////////////////////////// + + int iterations, n, radius, tile_size; + bool star = true; + try { + if (argc < 3) { + throw "Usage: <# iterations> [ ]"; + } + + // number of times to run the algorithm + iterations = std::atoi(argv[1]); + if (iterations < 1) { + throw "ERROR: iterations must be >= 1"; + } + + // linear grid dimension + n = std::atoi(argv[2]); + if (n < 1) { + throw "ERROR: grid dimension must be positive"; + } else if (n > prk::get_max_matrix_size()) { + throw "ERROR: grid dimension too large - overflow risk"; + } + + // default tile size for tiling of local transpose + tile_size = 32; + if (argc > 3) { + tile_size = std::atoi(argv[3]); + if (tile_size <= 0) tile_size = n; + if (tile_size > n) tile_size = n; + } + + // stencil pattern + if (argc > 4) { + auto stencil = std::string(argv[4]); + auto grid = std::string("grid"); + star = (stencil == grid) ? false : true; + } + + // stencil radius + radius = 2; + if (argc > 5) { + radius = std::atoi(argv[5]); + } + + if ( (radius < 1) || (2*radius+1 > n) ) { + throw "ERROR: Stencil radius negative or too large"; + } + } + catch (const char * e) { + std::cout << e << std::endl; + return 1; + } + + std::cout << "Number of iterations = " << iterations << std::endl; + std::cout << "Grid size = " << n << std::endl; + std::cout << "Tile size = " << tile_size << std::endl; + std::cout << "Type of stencil = " << (star ? "star" : "grid") << std::endl; + std::cout << "Radius of stencil = " << radius << std::endl; + + auto stencil = nothing; + if (star) { + switch (radius) { + case 1: stencil = star1; break; + case 2: stencil = star2; break; + case 3: stencil = star3; break; + case 4: stencil = star4; break; + case 5: stencil = star5; break; + } + } else { + switch (radius) { + case 1: stencil = grid1; break; + case 2: stencil = grid2; break; + case 3: stencil = grid3; break; + case 4: stencil = grid4; break; + case 5: stencil = grid5; break; + } + } + + ////////////////////////////////////////////////////////////////////// + // Allocate space and perform the computation + ////////////////////////////////////////////////////////////////////// + + auto stencil_time = 0.0; + + prk::vector in(n*n); + prk::vector out(n*n); + + { + for (int it=0; it(i+j); + out[i*n+j] = 0.0; + } + } + } + } + + for (int iter = 0; iter<=iterations; iter++) { + + if (iter==1) stencil_time = prk::wtime(); + // Apply the stencil operator + stencil(n, tile_size, in, out); + // Add constant to solution to force refresh of neighbor data, if any + std::transform(in.begin(), in.end(), in.begin(), [](double c) { return c+=1.0; }); + } + stencil_time = prk::wtime() - stencil_time; + } + + ////////////////////////////////////////////////////////////////////// + // Analyze and output results. + ////////////////////////////////////////////////////////////////////// + + // interior of grid with respect to stencil + size_t active_points = static_cast(n-2*radius)*static_cast(n-2*radius); + double norm = 0.0; + for (int i=radius; i epsilon) { + std::cout << "ERROR: L1 norm = " << norm + << " Reference L1 norm = " << reference_norm << std::endl; + return 1; + } else { + std::cout << "Solution validates" << std::endl; +#ifdef VERBOSE + std::cout << "L1 norm = " << norm + << " Reference L1 norm = " << reference_norm << std::endl; +#endif + const int stencil_size = star ? 4*radius+1 : (2*radius+1)*(2*radius+1); + size_t flops = (2L*(size_t)stencil_size+1L) * active_points; + auto avgtime = stencil_time/iterations; + std::cout << "Rate (MFlops/s): " << 1.0e-6 * static_cast(flops)/avgtime + << " Avg time (s): " << avgtime << std::endl; + } + + return 0; +} From 4de1f8323331913a5889a7639aa83379f3f8d7be Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 23 Jun 2020 11:52:12 -0700 Subject: [PATCH 29/66] cleanup comments --- Cxx11/stencil-sycl.cc | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/Cxx11/stencil-sycl.cc b/Cxx11/stencil-sycl.cc index a375e1f05..9795cc638 100644 --- a/Cxx11/stencil-sycl.cc +++ b/Cxx11/stencil-sycl.cc @@ -46,12 +46,6 @@ /// The output consists of diagnostics to make sure the /// algorithm worked, and of timing statistics. /// -/// FUNCTIONS CALLED: -/// -/// Other than standard C functions, the following functions are used in -/// this program: -/// wtime() -/// /// HISTORY: - Written by Rob Van der Wijngaart, February 2009. /// - RvdW: Removed unrolling pragmas for clarity; /// added constant to array "in" at end of each iteration to force @@ -298,6 +292,7 @@ int main(int argc, char * argv[]) std::cout << "Number of iterations = " << iterations << std::endl; std::cout << "Grid size = " << n << std::endl; + std::cout << "Tile size = " << tile_size << std::endl; std::cout << "Type of stencil = " << (star ? "star" : "grid") << std::endl; std::cout << "Radius of stencil = " << radius << std::endl; From af3de74714fc0debb2167a5a6138401f2f695ee3 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 23 Jun 2020 11:53:56 -0700 Subject: [PATCH 30/66] some fixes - GPU broken --- Cxx11/stencil-sycl-usm.cc | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/Cxx11/stencil-sycl-usm.cc b/Cxx11/stencil-sycl-usm.cc index 7224991f0..ba56f9ffe 100644 --- a/Cxx11/stencil-sycl-usm.cc +++ b/Cxx11/stencil-sycl-usm.cc @@ -105,25 +105,21 @@ void run(sycl::queue & q, int iterations, size_t n, size_t tile_size, bool star, // Allocate space and perform the computation ////////////////////////////////////////////////////////////////////// - auto ctx = q.get_context(); - double stencil_time(0); T * out; try { - auto dev = q.get_device(); - - T * in = static_cast(syclx::malloc_shared(n * n * sizeof(T), dev, ctx)); - out = static_cast(syclx::malloc_shared(n * n * sizeof(T), dev, ctx)); + T * in = static_cast(syclx::malloc_shared(n * n * sizeof(T), q)); + out = static_cast(syclx::malloc_shared(n * n * sizeof(T), q)); q.submit([&](sycl::handler& h) { - h.parallel_for>(sycl::range<2> {n, n}, [=] (sycl::id<2> it) { const auto i = it[0]; const auto j = it[1]; - in[i*n+j] = static_cast(i+j); + in[i*n+j] = static_cast(i+j); + out[i*n+j] = static_cast(0); }); }); q.wait(); @@ -133,9 +129,9 @@ void run(sycl::queue & q, int iterations, size_t n, size_t tile_size, bool star, if (iter==1) stencil_time = prk::wtime(); stencil(q, n, in, out); + q.wait(); q.submit([&](sycl::handler& h) { - // Add constant to solution to force refresh of neighbor data, if any h.parallel_for>(sycl::range<2> {n, n}, sycl::id<2> {0, 0}, [=] (sycl::id<2> it) { const auto i = it[0]; const auto j = it[1]; @@ -146,7 +142,7 @@ void run(sycl::queue & q, int iterations, size_t n, size_t tile_size, bool star, } stencil_time = prk::wtime() - stencil_time; - syclx::free(in, ctx); + syclx::free(in, q); } catch (sycl::exception & e) { std::cout << e.what() << std::endl; @@ -178,7 +174,7 @@ void run(sycl::queue & q, int iterations, size_t n, size_t tile_size, bool star, } norm /= active_points; - syclx::free(out, ctx); + syclx::free(out, q); // verify correctness const double epsilon = 1.0e-8; @@ -186,6 +182,7 @@ void run(sycl::queue & q, int iterations, size_t n, size_t tile_size, bool star, if (prk::abs(norm-reference_norm) > epsilon) { std::cout << "ERROR: L1 norm = " << norm << " Reference L1 norm = " << reference_norm << std::endl; + std::cout << "===================================" << std::endl; } else { std::cout << "Solution validates" << std::endl; #ifdef VERBOSE @@ -265,6 +262,7 @@ int main(int argc, char * argv[]) std::cout << "Number of iterations = " << iterations << std::endl; std::cout << "Grid size = " << n << std::endl; + std::cout << "Tile size = " << tile_size << std::endl; std::cout << "Type of stencil = " << (star ? "star" : "grid") << std::endl; std::cout << "Radius of stencil = " << radius << std::endl; From 12abe88e3d8f6b4076a9e53e0810e7bfb32b6e77 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 23 Jun 2020 11:54:22 -0700 Subject: [PATCH 31/66] cleanup --- Cxx11/nstream-dpcpp.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/Cxx11/nstream-dpcpp.cc b/Cxx11/nstream-dpcpp.cc index addcbf6f1..e72a14cd5 100644 --- a/Cxx11/nstream-dpcpp.cc +++ b/Cxx11/nstream-dpcpp.cc @@ -122,9 +122,10 @@ int main(int argc, char * argv[]) double * d_A = syclx::malloc_device(length, q); double * d_B = syclx::malloc_device(length, q); double * d_C = syclx::malloc_device(length, q); - q.memcpy(d_A, &(h_A[0]), bytes).wait(); - q.memcpy(d_B, &(h_B[0]), bytes).wait(); - q.memcpy(d_C, &(h_C[0]), bytes).wait(); + q.memcpy(d_A, &(h_A[0]), bytes); + q.memcpy(d_B, &(h_B[0]), bytes); + q.memcpy(d_C, &(h_C[0]), bytes); + q.wait(); double scalar(3); { @@ -133,9 +134,7 @@ int main(int argc, char * argv[]) if (iter==1) nstream_time = prk::wtime(); q.submit([&](sycl::handler& h) { - - h.parallel_for( sycl::range<1>{length}, [=] (sycl::id<1> it) { - const size_t i = it[0]; + h.parallel_for( sycl::range<1>{length}, [=] (sycl::id<1> i) { d_A[i] += d_B[i] + scalar * d_C[i]; }); }); From ce6a713fe13ee890e37074f712f3d5179bc8a2f5 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 23 Jun 2020 11:54:44 -0700 Subject: [PATCH 32/66] workaround Level Zero SPIR detection --- Cxx11/prk_sycl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cxx11/prk_sycl.h b/Cxx11/prk_sycl.h index 6d2b417f8..2335c1c07 100644 --- a/Cxx11/prk_sycl.h +++ b/Cxx11/prk_sycl.h @@ -59,8 +59,8 @@ namespace prk { #if ! ( defined(TRISYCL) || defined(__HIPSYCL__) ) auto device = q.get_device(); auto platform = device.get_platform(); - std::cout << "SYCL Device: " << device.get_info() << std::endl; std::cout << "SYCL Platform: " << platform.get_info() << std::endl; + std::cout << "SYCL Device: " << device.get_info() << std::endl; #endif } From 546625177aad6a29fc351a192fdf46bf8576bdad Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 23 Jun 2020 11:55:03 -0700 Subject: [PATCH 33/66] some fixes - GPU broken --- Cxx11/stencil-dpcpp.cc | 121 +++++++++++++++++++++++++---------------- 1 file changed, 74 insertions(+), 47 deletions(-) diff --git a/Cxx11/stencil-dpcpp.cc b/Cxx11/stencil-dpcpp.cc index 0dbb5764d..421823e9e 100644 --- a/Cxx11/stencil-dpcpp.cc +++ b/Cxx11/stencil-dpcpp.cc @@ -1,6 +1,6 @@ /// -/// Copyright (c) 2013, Intel Corporation +/// Copyright (c) 2020, Intel Corporation /// /// Redistribution and use in source and binary forms, with or without /// modification, are permitted provided that the following conditions @@ -46,12 +46,6 @@ /// The output consists of diagnostics to make sure the /// algorithm worked, and of timing statistics. /// -/// FUNCTIONS CALLED: -/// -/// Other than standard C functions, the following functions are used in -/// this program: -/// wtime() -/// /// HISTORY: - Written by Rob Van der Wijngaart, February 2009. /// - RvdW: Removed unrolling pragmas for clarity; /// added constant to array "in" at end of each iteration to force @@ -60,30 +54,32 @@ /// ////////////////////////////////////////////////////////////////////// +#include "prk_sycl.h" #include "prk_util.h" -#include "stencil_seq.hpp" +#include "stencil_sycl.hpp" -void nothing(const int n, const int t, prk::vector & in, prk::vector & out) +template +void nothing(sycl::queue & q, const size_t n, const T * in, T * out) { std::cout << "You are trying to use a stencil that does not exist.\n"; std::cout << "Please generate the new stencil using the code generator\n"; std::cout << "and add it to the case-switch in the driver." << std::endl; - // n will never be zero - this is to silence compiler warnings. - if (n==0 || t==0) std::cout << in.size() << out.size() << std::endl; - std::abort(); + prk::Abort(); } int main(int argc, char* argv[]) { std::cout << "Parallel Research Kernels version " << PRKVERSION << std::endl; - std::cout << "C++11 Stencil execution on 2D grid" << std::endl; + std::cout << "C++11/DPC++ Stencil execution on 2D grid" << std::endl; ////////////////////////////////////////////////////////////////////// // Process and test input parameters ////////////////////////////////////////////////////////////////////// - int iterations, n, radius, tile_size; + int iterations; + size_t n, tile_size; bool star = true; + size_t radius = 2; try { if (argc < 3) { throw "Usage: <# iterations> [ ]"; @@ -139,16 +135,18 @@ int main(int argc, char* argv[]) std::cout << "Type of stencil = " << (star ? "star" : "grid") << std::endl; std::cout << "Radius of stencil = " << radius << std::endl; - auto stencil = nothing; + auto stencil = nothing; if (star) { switch (radius) { - case 1: stencil = star1; break; - case 2: stencil = star2; break; - case 3: stencil = star3; break; - case 4: stencil = star4; break; - case 5: stencil = star5; break; + case 1: stencil = star1; break; + case 2: stencil = star2; break; + case 3: stencil = star3; break; + case 4: stencil = star4; break; + case 5: stencil = star5; break; } - } else { + } +#if 0 + else { switch (radius) { case 1: stencil = grid1; break; case 2: stencil = grid2; break; @@ -157,39 +155,63 @@ int main(int argc, char* argv[]) case 5: stencil = grid5; break; } } +#endif + + sycl::queue q(sycl::default_selector{}); + prk::SYCL::print_device_platform(q); ////////////////////////////////////////////////////////////////////// // Allocate space and perform the computation ////////////////////////////////////////////////////////////////////// - auto stencil_time = 0.0; - - prk::vector in(n*n); - prk::vector out(n*n); - - { - for (int it=0; it(i+j); - out[i*n+j] = 0.0; - } - } - } - } + double stencil_time(0); - for (int iter = 0; iter<=iterations; iter++) { + prk::vector h_in(n*n, 0); + prk::vector h_out(n*n, 0); - if (iter==1) stencil_time = prk::wtime(); - // Apply the stencil operator - stencil(n, tile_size, in, out); - // Add constant to solution to force refresh of neighbor data, if any - std::transform(in.begin(), in.end(), in.begin(), [](double c) { return c+=1.0; }); - } - stencil_time = prk::wtime() - stencil_time; + const size_t bytes = n * n * sizeof(double); + + double * d_in = syclx::malloc_device(n*n, q); + double * d_out = syclx::malloc_device(n*n, q); + q.wait(); + + q.submit([&](sycl::handler& h) { + h.parallel_for(sycl::range<2> {n,n}, [=] (sycl::item<2> it) { + const auto i = it[0]; + const auto j = it[1]; + d_in[i*n+j] = static_cast(i+j); + d_out[i*n+j] = static_cast(0); + }); + }); + q.wait(); + + for (int iter = 0; iter<=iterations; iter++) { + + if (iter==1) stencil_time = prk::wtime(); + + // Apply the stencil operator + stencil(q, n, d_in, d_out); + q.wait(); + + // Add constant to solution to force refresh of neighbor data, if any + q.submit([&](sycl::handler& h) { + h.parallel_for(sycl::range<2> {n,n}, [=] (sycl::item<2> it) { + const auto i = it[0]; + const auto j = it[1]; + d_in[i*n+j] += static_cast(1); + }); + }); + q.wait(); } + stencil_time = prk::wtime() - stencil_time; + + q.memcpy(&(h_in[0]), d_in, bytes); + q.memcpy(&(h_out[0]), d_out, bytes); + q.wait(); + + syclx::free(d_in, q); + syclx::free(d_out,q); + q.wait(); ////////////////////////////////////////////////////////////////////// // Analyze and output results. @@ -200,7 +222,7 @@ int main(int argc, char* argv[]) double norm = 0.0; for (int i=radius; i epsilon) { std::cout << "ERROR: L1 norm = " << norm << " Reference L1 norm = " << reference_norm << std::endl; + for (int i=0; i Date: Wed, 24 Jun 2020 08:00:56 -0700 Subject: [PATCH 34/66] undo minor mistake --- Cxx11/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cxx11/Makefile b/Cxx11/Makefile index 6c0e08e3e..e24b39163 100644 --- a/Cxx11/Makefile +++ b/Cxx11/Makefile @@ -128,7 +128,7 @@ cblas: transpose-cblas dgemm-cblas onemkl: nstream-onemkl dgemm-onemkl dgemm-multigpu-onemkl -oneapi: onemkl dpcpp sycl sycl-usm sycl-explicit sycl-explicit-usm +oneapi: onemkl dpcpp sycl sycl-usm sycl-explicit occa: transpose-occa nstream-occa From 0d358f980640161463fcf98e29cf2a97b51cf762 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 24 Jun 2020 08:04:59 -0700 Subject: [PATCH 35/66] add transpose multi-GPU DPC++ skeleton --- Cxx11/Makefile | 2 +- Cxx11/transpose-multigpu-dpcpp.cc | 181 ++++++++++++++++++++++++++++++ 2 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 Cxx11/transpose-multigpu-dpcpp.cc diff --git a/Cxx11/Makefile b/Cxx11/Makefile index e24b39163..a20cdfbb4 100644 --- a/Cxx11/Makefile +++ b/Cxx11/Makefile @@ -97,7 +97,7 @@ sycl-usm: nstream-sycl-usm nstream-sycl-explicit-usm stencil-sycl-usm transpose- sycl-explicit: nstream-sycl-explicit transpose-sycl-explicit -dpcpp: nstream-dpcpp nstream-multigpu-dpcpp stencil-dpcpp stencil-multigpu-dpcpp transpose-dpcpp +dpcpp: nstream-dpcpp nstream-multigpu-dpcpp stencil-dpcpp stencil-multigpu-dpcpp transpose-dpcpp transpose-multigpu-dpcpp tbb: p2p-innerloop-tbb p2p-tbb stencil-tbb transpose-tbb nstream-tbb \ p2p-hyperplane-tbb p2p-tasks-tbb diff --git a/Cxx11/transpose-multigpu-dpcpp.cc b/Cxx11/transpose-multigpu-dpcpp.cc new file mode 100644 index 000000000..2dec847f9 --- /dev/null +++ b/Cxx11/transpose-multigpu-dpcpp.cc @@ -0,0 +1,181 @@ +/// +/// Copyright (c) 2020, Intel Corporation +/// +/// Redistribution and use in source and binary forms, with or without +/// modification, are permitted provided that the following conditions +/// are met: +/// +/// * Redistributions of source code must retain the above copyright +/// notice, this list of conditions and the following disclaimer. +/// * Redistributions in binary form must reproduce the above +/// copyright notice, this list of conditions and the following +/// disclaimer in the documentation and/or other materials provided +/// with the distribution. +/// * Neither the name of Intel Corporation nor the names of its +/// contributors may be used to endorse or promote products +/// derived from this software without specific prior written +/// permission. +/// +/// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +/// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +/// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +/// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +/// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +/// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +/// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +/// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +/// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +/// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +/// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +/// POSSIBILITY OF SUCH DAMAGE. + +////////////////////////////////////////////////////////////////////// +/// +/// NAME: transpose +/// +/// PURPOSE: This program measures the time for the transpose of a +/// column-major stored matrix into a row-major stored matrix. +/// +/// USAGE: Program input is the matrix order and the number of times to +/// repeat the operation: +/// +/// transpose <# iterations> +/// +/// The output consists of diagnostics to make sure the +/// transpose worked and timing statistics. +/// +/// HISTORY: Written by Rob Van der Wijngaart, February 2009. +/// Converted to C++11 by Jeff Hammond, February 2016 and May 2017. +/// +////////////////////////////////////////////////////////////////////// + +#include "prk_util.h" +#include "prk_sycl.h" + +int main(int argc, char * argv[]) +{ + std::cout << "Parallel Research Kernels version " << PRKVERSION << std::endl; + std::cout << "C++11/DPCT Matrix transpose: B = A^T" << std::endl; + + ////////////////////////////////////////////////////////////////////// + /// Read and test input parameters + ////////////////////////////////////////////////////////////////////// + + int iterations; + size_t order; + try { + if (argc < 3) { + throw "Usage: <# iterations> "; + } + + iterations = std::atoi(argv[1]); + if (iterations < 1) { + throw "ERROR: iterations must be >= 1"; + } + + order = std::atoi(argv[2]); + if (order <= 0) { + throw "ERROR: Matrix Order must be greater than 0"; + } else if (order > prk::get_max_matrix_size()) { + throw "ERROR: matrix dimension too large - overflow risk"; + } + } + catch (const char * e) { + std::cout << e << std::endl; + return 1; + } + + std::cout << "Number of iterations = " << iterations << std::endl; + std::cout << "Matrix order = " << order << std::endl; + + sycl::queue q(sycl::default_selector{}); + prk::SYCL::print_device_platform(q); + + ////////////////////////////////////////////////////////////////////// + // Allocate space for the input and transpose matrix + ////////////////////////////////////////////////////////////////////// + + const size_t nelems = (size_t)order * (size_t)order; + const size_t bytes = nelems * sizeof(double); + double * h_a = syclx::malloc_host( nelems, q); + double * h_b = syclx::malloc_host( nelems, q); + + // fill A with the sequence 0 to order^2-1 + for (int j=0; j(order*j+i); + h_b[j*order+i] = static_cast(0); + } + } + + // copy input from host to device + double * A = syclx::malloc_device( nelems, q); + double * B = syclx::malloc_device( nelems, q); + q.memcpy(A, &(h_a[0]), bytes).wait(); + q.memcpy(B, &(h_b[0]), bytes).wait(); + + auto trans_time = 0.0; + + for (int iter = 0; iter<=iterations; iter++) { + + if (iter==1) trans_time = prk::wtime(); + + q.submit([&](sycl::handler& h) { + + h.parallel_for( sycl::range<2>{order,order}, [=] (sycl::id<2> it) { +#if USE_2D_INDEXING + sycl::id<2> ij{it[0],it[1]}; + sycl::id<2> ji{it[1],it[0]}; + B[ij] += A[ji]; + A[ji] += (T)1; +#else + B[it[0] * order + it[1]] += A[it[1] * order + it[0]]; + A[it[1] * order + it[0]] += 1.0; +#endif + }); + }); + q.wait(); + } + trans_time = prk::wtime() - trans_time; + + // copy output back to host + q.memcpy(&(h_b[0]), B, bytes).wait(); + + syclx::free(B, q); + syclx::free(A, q); + + ////////////////////////////////////////////////////////////////////// + /// Analyze and output results + ////////////////////////////////////////////////////////////////////// + + const double addit = (iterations+1.) * (iterations/2.); + double abserr(0); + for (int j=0; j(ij)*(1.+iterations)+addit; + abserr += prk::abs(h_b[ji] - reference); + } + } + + syclx::free(h_b, q); + syclx::free(h_a, q); + + const auto epsilon = 1.0e-8; + if (abserr < epsilon) { + std::cout << "Solution validates" << std::endl; + auto avgtime = trans_time/iterations; + auto bytes = (size_t)order * (size_t)order * sizeof(double); + std::cout << "Rate (MB/s): " << 1.0e-6 * (2L*bytes)/avgtime + << " Avg time (s): " << avgtime << std::endl; + } else { + std::cout << "ERROR: Aggregate squared error " << abserr + << " exceeds threshold " << epsilon << std::endl; + return 1; + } + + return 0; +} + + From 83892be1d7d2bc761bdce0ed53c5e9d05d520afa Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Mon, 29 Jun 2020 15:43:12 -0700 Subject: [PATCH 36/66] WIP --- Cxx11/transpose-multigpu-dpcpp.cc | 72 ++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 15 deletions(-) diff --git a/Cxx11/transpose-multigpu-dpcpp.cc b/Cxx11/transpose-multigpu-dpcpp.cc index 2dec847f9..a26244496 100644 --- a/Cxx11/transpose-multigpu-dpcpp.cc +++ b/Cxx11/transpose-multigpu-dpcpp.cc @@ -63,9 +63,10 @@ int main(int argc, char * argv[]) int iterations; size_t order; + int use_ngpu = 1; try { if (argc < 3) { - throw "Usage: <# iterations> "; + throw "Usage: <# iterations> []"; } iterations = std::atoi(argv[1]); @@ -79,6 +80,15 @@ int main(int argc, char * argv[]) } else if (order > prk::get_max_matrix_size()) { throw "ERROR: matrix dimension too large - overflow risk"; } + + if (argc > 3) { + use_ngpu = std::atoi(argv[3]); + } + + if (order % use_ngpu) { + std::cerr << "order = " << order << ", device count = " << use_ngpu << std::endl; + throw "ERROR: matrix order should be divisible by device count!"; + } } catch (const char * e) { std::cout << e << std::endl; @@ -87,34 +97,66 @@ int main(int argc, char * argv[]) std::cout << "Number of iterations = " << iterations << std::endl; std::cout << "Matrix order = " << order << std::endl; + std::cout << "Number of GPUs to use = " << use_ngpu << std::endl; + + std::vector qs; + + auto platforms = sycl::platform::get_platforms(); + for (auto & p : platforms) { + auto pname = p.get_info(); + std::cout << "*Platform: " << pname << std::endl; + if ( pname.find("Level-Zero") != std::string::npos) { + std::cout << "*Level Zero GPU skipped" << std::endl; + break; + } + if ( pname.find("Intel") == std::string::npos) { + std::cout << "*non-Intel skipped" << std::endl; + break; + } + auto devices = p.get_devices(); + for (auto & d : devices ) { + std::cout << "**Device: " << d.get_info() << std::endl; + if ( d.is_gpu() || d.is_cpu() ) { + std::cout << "**Device is CPU or GPU - adding to vector of queues" << std::endl; + qs.push_back(sycl::queue(d)); + } + } + } + + int haz_ngpu = qs.size(); + std::cout << "Number of CPUs and GPUs found = " << haz_ngpu << std::endl; - sycl::queue q(sycl::default_selector{}); - prk::SYCL::print_device_platform(q); + if (use_ngpu > haz_ngpu) { + std::cout << "You cannot use more GPUs (" << use_ngpu << ") than you have (" << haz_ngpu << ")" << std::endl; + } + + int ngpus = use_ngpu; ////////////////////////////////////////////////////////////////////// // Allocate space for the input and transpose matrix ////////////////////////////////////////////////////////////////////// - const size_t nelems = (size_t)order * (size_t)order; - const size_t bytes = nelems * sizeof(double); - double * h_a = syclx::malloc_host( nelems, q); - double * h_b = syclx::malloc_host( nelems, q); + double trans_time(0); + + auto h_a = prk::vector(order * order); + auto h_b = prk::vector(order * order); // fill A with the sequence 0 to order^2-1 - for (int j=0; j(order*j+i); h_b[j*order+i] = static_cast(0); } } - // copy input from host to device - double * A = syclx::malloc_device( nelems, q); - double * B = syclx::malloc_device( nelems, q); - q.memcpy(A, &(h_a[0]), bytes).wait(); - q.memcpy(B, &(h_b[0]), bytes).wait(); + const size_t bytes = order * order * sizeof(double); - auto trans_time = 0.0; + // copy input from host to device + double * A = syclx::malloc_device(order * order, q); + double * B = syclx::malloc_device(order * order, q); + q.memcpy(A, &(h_a[0]), bytes); + q.memcpy(B, &(h_b[0]), bytes); + q.wait(); for (int iter = 0; iter<=iterations; iter++) { From 9a8006a5fc0e7ebd3882b4dd461d333611f922e0 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Mon, 20 Jul 2020 14:31:26 -0700 Subject: [PATCH 37/66] add ignores --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index d4e9d72bc..3c78c8721 100644 --- a/.gitignore +++ b/.gitignore @@ -166,6 +166,8 @@ Cxx11/dgemm-mpi Cxx11/dgemm-sycl Cxx11/dgemm-blas-sycl Cxx11/dgemm-mkl-sycl +Cxx11/dgemm-multigpu-onemkl +Cxx11/dgemm-onemkl Cxx11/dgemm-kokkos Cxx11/dgemm-kernels-kokkos Cxx11/dgemm-raja @@ -217,6 +219,8 @@ Cxx11/nstream-sycl-usm Cxx11/nstream-sycl-explicit Cxx11/nstream-sycl-explicit-usm Cxx11/nstream-dpcpp +Cxx11/nstream-multigpu-dpcpp +Cxx11/nstream-onemkl Cxx11/nstream-celerity Cxx11/nstream-hpx Cxx11/nstream-upcxx From 83faa815c528a5a23b3a3a7a3aaf7689d055b0eb Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Mon, 20 Jul 2020 14:54:59 -0700 Subject: [PATCH 38/66] fix message --- Cxx11/transpose-multigpu-dpcpp.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cxx11/transpose-multigpu-dpcpp.cc b/Cxx11/transpose-multigpu-dpcpp.cc index a26244496..215ae23d1 100644 --- a/Cxx11/transpose-multigpu-dpcpp.cc +++ b/Cxx11/transpose-multigpu-dpcpp.cc @@ -127,7 +127,7 @@ int main(int argc, char * argv[]) std::cout << "Number of CPUs and GPUs found = " << haz_ngpu << std::endl; if (use_ngpu > haz_ngpu) { - std::cout << "You cannot use more GPUs (" << use_ngpu << ") than you have (" << haz_ngpu << ")" << std::endl; + std::cout << "You cannot use more CPUs and GPUs (" << use_ngpu << ") than you have (" << haz_ngpu << ")" << std::endl; } int ngpus = use_ngpu; From 4e818361b966a32117c7e49cc5e23363209ff6ce Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 21 Jul 2020 12:43:44 -0700 Subject: [PATCH 39/66] add fill, cleanup dead code --- Cxx11/prk_util.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Cxx11/prk_util.h b/Cxx11/prk_util.h index 176c1186b..98ea627ef 100644 --- a/Cxx11/prk_util.h +++ b/Cxx11/prk_util.h @@ -203,20 +203,17 @@ namespace prk { public: vector(size_t n) { - //this->data_ = new T[n]; this->data_ = prk::malloc(n); this->size_ = n; } vector(size_t n, T v) { - //this->data_ = new T[n]; this->data_ = prk::malloc(n); for (size_t i=0; idata_[i] = v; this->size_ = n; } ~vector() { - //delete[] this->data_; prk::free(this->data_); } @@ -258,6 +255,10 @@ namespace prk { return &(this->data_[this->size_]); } + void fill(T v) { + for (size_t i=0; isize_; ++i) this->data_[i] = v; + } + #if 0 T & begin() { return this->data_[0]; From 767db07dde49e44b9212025d1b4397473564d3da Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 21 Jul 2020 12:44:12 -0700 Subject: [PATCH 40/66] start decomposition --- Cxx11/transpose-multigpu-dpcpp.cc | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/Cxx11/transpose-multigpu-dpcpp.cc b/Cxx11/transpose-multigpu-dpcpp.cc index 215ae23d1..adf6c49b3 100644 --- a/Cxx11/transpose-multigpu-dpcpp.cc +++ b/Cxx11/transpose-multigpu-dpcpp.cc @@ -95,10 +95,6 @@ int main(int argc, char * argv[]) return 1; } - std::cout << "Number of iterations = " << iterations << std::endl; - std::cout << "Matrix order = " << order << std::endl; - std::cout << "Number of GPUs to use = " << use_ngpu << std::endl; - std::vector qs; auto platforms = sycl::platform::get_platforms(); @@ -132,6 +128,17 @@ int main(int argc, char * argv[]) int ngpus = use_ngpu; + if (order % ngpus != 0) { + std::cout << "ERROR: matrix order " << order << " should be divisible by # procs" << ngpus << std::endl; + return 2; + } + size_t block_order = order / ngpus; + + std::cout << "Number of GPUs to use = " << use_ngpu << std::endl; + std::cout << "Number of iterations = " << iterations << std::endl; + std::cout << "Matrix order = " << order << std::endl; + std::cout << "Block order = " << block_order << std::endl; + ////////////////////////////////////////////////////////////////////// // Allocate space for the input and transpose matrix ////////////////////////////////////////////////////////////////////// From 68f1a94a686045775ca394e9d4d9438fc91dc1ee Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 21 Jul 2020 12:44:52 -0700 Subject: [PATCH 41/66] device queues class and related methods --- Cxx11/prk_sycl.h | 150 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) diff --git a/Cxx11/prk_sycl.h b/Cxx11/prk_sycl.h index 2335c1c07..599d424c7 100644 --- a/Cxx11/prk_sycl.h +++ b/Cxx11/prk_sycl.h @@ -4,12 +4,17 @@ #include #include +//#include // std::distance +#include + #include "CL/sycl.hpp" #ifdef __COMPUTECPP__ #include "SYCL/experimental/usm.h" #endif +#include "prk_util.h" // prk::vector + namespace sycl = cl::sycl; #ifdef __COMPUTECPP__ @@ -83,6 +88,151 @@ namespace prk { #endif } + class queues { + + private: + std::vector list; + + public: + queues(bool use_cpu = true, bool use_gpu = true) + { + auto platforms = sycl::platform::get_platforms(); + for (auto & p : platforms) { + auto pname = p.get_info(); + std::cout << "*Platform: " << pname << std::endl; + if ( pname.find("Level-Zero") != std::string::npos) { + std::cout << "*Level Zero GPU skipped" << std::endl; + break; + } + if ( pname.find("Intel") == std::string::npos) { + std::cout << "*non-Intel skipped" << std::endl; + break; + } + auto devices = p.get_devices(); + for (auto & d : devices ) { + std::cout << "**Device: " << d.get_info() << std::endl; + if ( d.is_cpu() && use_cpu ) { + std::cout << "**Device is CPU - adding to vector of queues" << std::endl; + list.push_back(sycl::queue(d)); + } + if ( d.is_gpu() && use_gpu ) { + std::cout << "**Device is GPU - adding to vector of queues" << std::endl; + list.push_back(sycl::queue(d)); + } + } + } + } + + int size(void) + { + return list.size(); + } + + void wait(int i) + { + list[i].wait(); + } + + void waitall(void) + { + for (auto & i : list) { + i.wait(); + } + } + + template + void allocate(std::vector & device_pointers, + size_t num_elements) + { + std::cout << "allocate" << std::endl; + for (const auto & l : list | boost::adaptors::indexed(0) ) { + auto i = l.index(); + auto v = l.value(); + device_pointers[i] = syclx::malloc_device(num_elements, v); + std::cout << i << ": " << device_pointers[i] << ", " << num_elements << std::endl; + } + } + + template + void free(std::vector & device_pointers) + { + std::cout << "free" << std::endl; + for (const auto & l : list | boost::adaptors::indexed(0) ) { + auto i = l.index(); + auto v = l.value(); + syclx::free(device_pointers[i], v); + } + } + + template + void gather(T * host_pointer, + const std::vector & device_pointers, + size_t num_elements) + { + std::cout << "gather" << std::endl; + for (const auto & l : list | boost::adaptors::indexed(0) ) { + auto i = l.index(); + auto v = l.value(); + auto bytes = num_elements * sizeof(T); + auto target = &host_pointer[i * bytes]; + auto source = device_pointers[i]; + v.memcpy(target, source, bytes); + } + } + + template + void gather(prk::vector & host_pointer, + const std::vector & device_pointers, + size_t num_elements) + { + std::cout << "gather" << std::endl; + for (const auto & l : list | boost::adaptors::indexed(0) ) { + auto i = l.index(); + auto v = l.value(); + auto bytes = num_elements * sizeof(T); + auto target = &host_pointer[i * bytes]; + auto source = device_pointers[i]; + v.memcpy(target, source, bytes); + } + } + + template + void scatter(std::vector & device_pointers, + const T * host_pointer, + size_t num_elements) + { + std::cout << "scatter" << std::endl; + for (const auto & l : list | boost::adaptors::indexed(0) ) { + auto i = l.index(); + auto v = l.value(); + auto bytes = num_elements * sizeof(T); + auto target = device_pointers[i]; + auto source = &host_pointer[i * bytes]; + v.memcpy(target, source, bytes); + } + } + + template + void scatter(std::vector & device_pointers, + prk::vector & host_pointer, + size_t num_elements) + { + std::cout << "scatter" << std::endl; + for (const auto & l : list | boost::adaptors::indexed(0) ) { + auto i = l.index(); + auto v = l.value(); + auto bytes = num_elements * sizeof(T); + auto target = device_pointers[i]; + auto source = &host_pointer[i * bytes]; + std::cout << i << ": " << target << ", " << source << std::endl; + v.memcpy(target, source, bytes); + } + } + + + + }; + } // namespace SYCL } // namespace prk From 452c0a30c8db5b17be06cee3d5a02301c64b24d1 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 21 Jul 2020 12:45:07 -0700 Subject: [PATCH 42/66] use device queues stuff - currently broken --- Cxx11/nstream-multigpu-dpcpp.cc | 148 +++++++++++--------------------- 1 file changed, 50 insertions(+), 98 deletions(-) diff --git a/Cxx11/nstream-multigpu-dpcpp.cc b/Cxx11/nstream-multigpu-dpcpp.cc index 5a717683e..99f5d68a3 100644 --- a/Cxx11/nstream-multigpu-dpcpp.cc +++ b/Cxx11/nstream-multigpu-dpcpp.cc @@ -69,12 +69,14 @@ int main(int argc, char * argv[]) std::cout << "Parallel Research Kernels version " << PRKVERSION << std::endl; std::cout << "C++11/DPC++ STREAM triad: A = B + scalar * C" << std::endl; + auto qs = prk::SYCL::queues(); + ////////////////////////////////////////////////////////////////////// /// Read and test input parameters ////////////////////////////////////////////////////////////////////// int iterations; - size_t length; + size_t length, local_length; int use_ngpu = 1; try { if (argc < 3) { @@ -94,48 +96,34 @@ int main(int argc, char * argv[]) if (argc > 3) { use_ngpu = std::atoi(argv[3]); } + if ( use_ngpu > qs.size() ) { + std::string error = "You cannot use more devices (" + + std::to_string(use_ngpu) + + ") than you have (" + + std::to_string(qs.size()) + ")"; + throw error; + } + + if (length % use_ngpu != 0) { + std::string error = "ERROR: vector length (" + + std::to_string(length) + + ") should be divisible by # procs (" + + std::to_string(use_ngpu) + ")"; + throw error; + } + local_length = length / use_ngpu; } catch (const char * e) { std::cout << e << std::endl; return 1; } + std::cout << "Number of devices = " << use_ngpu << std::endl; std::cout << "Number of iterations = " << iterations << std::endl; std::cout << "Vector length = " << length << std::endl; - std::cout << "Number of GPUs to use = " << use_ngpu << std::endl; - - std::vector qs; - - auto platforms = sycl::platform::get_platforms(); - for (auto & p : platforms) { - auto pname = p.get_info(); - std::cout << "*Platform: " << pname << std::endl; - if ( pname.find("Level-Zero") != std::string::npos) { - std::cout << "*Level Zero GPU skipped" << std::endl; - break; - } - if ( pname.find("Intel") == std::string::npos) { - std::cout << "*non-Intel skipped" << std::endl; - break; - } - auto devices = p.get_devices(); - for (auto & d : devices ) { - std::cout << "**Device: " << d.get_info() << std::endl; - if ( d.is_gpu() || d.is_cpu() ) { - std::cout << "**Device is CPU or GPU - adding to vector of queues" << std::endl; - qs.push_back(sycl::queue(d)); - } - } - } + std::cout << "Vector length (local) = " << local_length << std::endl; - int haz_ngpu = qs.size(); - std::cout << "Number of CPUs and GPUs found = " << haz_ngpu << std::endl; - - if (use_ngpu > haz_ngpu) { - std::cout << "You cannot use more GPUs (" << use_ngpu << ") than you have (" << haz_ngpu << ")" << std::endl; - } - - int ngpus = use_ngpu; + int np = use_ngpu; ////////////////////////////////////////////////////////////////////// // Allocate space and perform the computation @@ -143,55 +131,26 @@ int main(int argc, char * argv[]) double nstream_time(0); - const size_t bytes = length * sizeof(double); + auto h_A = prk::vector(length, 0); + auto h_B = prk::vector(length, 2); + auto h_C = prk::vector(length, 2); - auto h_A = prk::vector(length); - auto h_B = prk::vector(length); - auto h_C = prk::vector(length); + auto d_A = std::vector (np, nullptr); + auto d_B = std::vector (np, nullptr); + auto d_C = std::vector (np, nullptr); - for (size_t i=0; i(d_A, local_length); + qs.allocate(d_B, local_length); + qs.allocate(d_C, local_length); + qs.waitall(); - std::vector ls(ngpus,0); - { - const size_t elements_per_gpu = prk::divceil(length, ngpus); - for (int g=0; g length) { - ls[ngpus-1] = length - (ngpus-1) * elements_per_gpu; - } - } - - auto d_A = std::vector (ngpus, nullptr); - auto d_B = std::vector (ngpus, nullptr); - auto d_C = std::vector (ngpus, nullptr); - - for (int g=0; g(local_length, q); - d_B[g] = syclx::malloc_device(local_length, q); - d_C[g] = syclx::malloc_device(local_length, q); - q.wait(); - - const size_t start = (g>0) ? ls[g-1] : 0; - const size_t size = ls[g] * sizeof(double); - q.memcpy(d_A[g], &(h_A[start]), size); - q.memcpy(d_B[g], &(h_B[start]), size); - q.memcpy(d_C[g], &(h_C[start]), size); - q.wait(); - } + qs.scatter(d_A, h_A, local_length); + qs.scatter(d_B, h_B, local_length); + qs.scatter(d_C, h_C, local_length); + qs.waitall(); - for (size_t i=0; i{size}, [=] (sycl::id<1> i) { @@ -214,27 +174,19 @@ int main(int argc, char * argv[]) }); }); } - for (auto & q : qs) { - q.wait(); - } + qs.waitall(); +#endif } nstream_time = prk::wtime() - nstream_time; } - for (int g=0; g(h_A, d_A, local_length); + qs.waitall(); - const size_t start = (g>0) ? ls[g-1] : 0; - const size_t size = ls[g] * sizeof(double); - - q.memcpy(&(h_A[start]), d_A[g], size); - q.wait(); - - syclx::free(d_C[g], q); - syclx::free(d_B[g], q); - syclx::free(d_A[g], q); - q.wait(); - } + qs.free(d_A); + qs.free(d_B); + qs.free(d_C); + qs.waitall(); ////////////////////////////////////////////////////////////////////// /// Analyze and output results From e068ccfabc0d9be85cfa9a43c88c18f6a637dfdf Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 21 Jul 2020 14:13:26 -0700 Subject: [PATCH 43/66] trying to fix --- Cxx11/nstream-multigpu-dpcpp.cc | 119 ++++++++++++++++++++++++-------- 1 file changed, 89 insertions(+), 30 deletions(-) diff --git a/Cxx11/nstream-multigpu-dpcpp.cc b/Cxx11/nstream-multigpu-dpcpp.cc index 99f5d68a3..d06cee517 100644 --- a/Cxx11/nstream-multigpu-dpcpp.cc +++ b/Cxx11/nstream-multigpu-dpcpp.cc @@ -69,7 +69,32 @@ int main(int argc, char * argv[]) std::cout << "Parallel Research Kernels version " << PRKVERSION << std::endl; std::cout << "C++11/DPC++ STREAM triad: A = B + scalar * C" << std::endl; - auto qs = prk::SYCL::queues(); + std::vector qs; + + auto platforms = sycl::platform::get_platforms(); + for (auto & p : platforms) { + auto pname = p.get_info(); + std::cout << "*Platform: " << pname << std::endl; + if ( pname.find("Level-Zero") != std::string::npos) { + std::cout << "*Level Zero GPU skipped" << std::endl; + break; + } + if ( pname.find("Intel") == std::string::npos) { + std::cout << "*non-Intel skipped" << std::endl; + break; + } + auto devices = p.get_devices(); + for (auto & d : devices ) { + std::cout << "**Device: " << d.get_info() << std::endl; + if ( d.is_gpu() || d.is_cpu() ) { + std::cout << "**Device is CPU or GPU - adding to vector of queues" << std::endl; + qs.push_back(sycl::queue(d)); + } + } + } + + int haz_ngpu = qs.size(); + std::cout << "Number of CPUs and GPUs found = " << haz_ngpu << std::endl; ////////////////////////////////////////////////////////////////////// /// Read and test input parameters @@ -123,7 +148,7 @@ int main(int argc, char * argv[]) std::cout << "Vector length = " << length << std::endl; std::cout << "Vector length (local) = " << local_length << std::endl; - int np = use_ngpu; + int ngpus = use_ngpu; ////////////////////////////////////////////////////////////////////// // Allocate space and perform the computation @@ -131,26 +156,53 @@ int main(int argc, char * argv[]) double nstream_time(0); - auto h_A = prk::vector(length, 0); - auto h_B = prk::vector(length, 2); - auto h_C = prk::vector(length, 2); + auto h_A = prk::vector(length); + auto h_B = prk::vector(length); + auto h_C = prk::vector(length); + + for (size_t i=0; i ls(ngpus,0); + { + const size_t elements_per_gpu = prk::divceil(length, ngpus); + for (int g=0; g length) { + ls[ngpus-1] = length - (ngpus-1) * elements_per_gpu; + } + } + + auto d_A = std::vector (ngpus, nullptr); + auto d_B = std::vector (ngpus, nullptr); + auto d_C = std::vector (ngpus, nullptr); - auto d_A = std::vector (np, nullptr); - auto d_B = std::vector (np, nullptr); - auto d_C = std::vector (np, nullptr); + for (int g=0; g(d_A, local_length); - qs.allocate(d_B, local_length); - qs.allocate(d_C, local_length); - qs.waitall(); + const auto local_length = ls[g]; + const auto local_bytes = local_length * sizeof(double); - qs.scatter(d_A, h_A, local_length); - qs.scatter(d_B, h_B, local_length); - qs.scatter(d_C, h_C, local_length); - qs.waitall(); + d_A[g] = syclx::malloc_device(local_length, q); + d_B[g] = syclx::malloc_device(local_length, q); + d_C[g] = syclx::malloc_device(local_length, q); + q.wait(); - // overwrite host buffer with garbage to detect bugs - h_A.fill(-77777777); + const size_t start = (g>0) ? ls[g-1] : 0; + const size_t size = ls[g] * sizeof(double); + q.memcpy(d_A[g], &(h_A[start]), size); + q.memcpy(d_B[g], &(h_B[start]), size); + q.memcpy(d_C[g], &(h_C[start]), size); + q.wait(); + } + + for (size_t i=0; i{size}, [=] (sycl::id<1> i) { @@ -174,19 +225,27 @@ int main(int argc, char * argv[]) }); }); } - qs.waitall(); -#endif + for (auto & q : qs) { + q.wait(); + } } nstream_time = prk::wtime() - nstream_time; } - qs.gather(h_A, d_A, local_length); - qs.waitall(); + for (int g=0; g0) ? ls[g-1] : 0; + const size_t size = ls[g] * sizeof(double); - qs.free(d_A); - qs.free(d_B); - qs.free(d_C); - qs.waitall(); + q.memcpy(&(h_A[start]), d_A[g], size); + q.wait(); + + syclx::free(d_C[g], q); + syclx::free(d_B[g], q); + syclx::free(d_A[g], q); + q.wait(); + } ////////////////////////////////////////////////////////////////////// /// Analyze and output results From fc1a39dfcca5467669fda8bac7803b4bcd02dc09 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 21 Jul 2020 14:13:48 -0700 Subject: [PATCH 44/66] trying to fix --- Cxx11/nstream-multigpu-dpcpp.cc | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/Cxx11/nstream-multigpu-dpcpp.cc b/Cxx11/nstream-multigpu-dpcpp.cc index d06cee517..391b417f5 100644 --- a/Cxx11/nstream-multigpu-dpcpp.cc +++ b/Cxx11/nstream-multigpu-dpcpp.cc @@ -166,16 +166,7 @@ int main(int argc, char * argv[]) h_C[i] = 2; } - std::vector ls(ngpus,0); - { - const size_t elements_per_gpu = prk::divceil(length, ngpus); - for (int g=0; g length) { - ls[ngpus-1] = length - (ngpus-1) * elements_per_gpu; - } - } + std::vector ls(ngpus,local_length); auto d_A = std::vector (ngpus, nullptr); auto d_B = std::vector (ngpus, nullptr); From a99fee73722e896f24b0ca53a5809303e8ca7dd6 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 21 Jul 2020 14:15:44 -0700 Subject: [PATCH 45/66] trying to fix --- Cxx11/nstream-multigpu-dpcpp.cc | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/Cxx11/nstream-multigpu-dpcpp.cc b/Cxx11/nstream-multigpu-dpcpp.cc index 391b417f5..b2a981b30 100644 --- a/Cxx11/nstream-multigpu-dpcpp.cc +++ b/Cxx11/nstream-multigpu-dpcpp.cc @@ -156,15 +156,9 @@ int main(int argc, char * argv[]) double nstream_time(0); - auto h_A = prk::vector(length); - auto h_B = prk::vector(length); - auto h_C = prk::vector(length); - - for (size_t i=0; i(length, 0); + auto h_B = prk::vector(length, 2); + auto h_C = prk::vector(length, 2); std::vector ls(ngpus,local_length); @@ -225,13 +219,14 @@ int main(int argc, char * argv[]) for (int g=0; g0) ? ls[g-1] : 0; const size_t size = ls[g] * sizeof(double); - q.memcpy(&(h_A[start]), d_A[g], size); q.wait(); + } + for (int g=0; g Date: Tue, 21 Jul 2020 15:36:08 -0700 Subject: [PATCH 46/66] working with inlined methods --- Cxx11/nstream-multigpu-dpcpp.cc | 114 ++++++++++++++++++++++++++------ 1 file changed, 94 insertions(+), 20 deletions(-) diff --git a/Cxx11/nstream-multigpu-dpcpp.cc b/Cxx11/nstream-multigpu-dpcpp.cc index b2a981b30..716615044 100644 --- a/Cxx11/nstream-multigpu-dpcpp.cc +++ b/Cxx11/nstream-multigpu-dpcpp.cc @@ -148,7 +148,7 @@ int main(int argc, char * argv[]) std::cout << "Vector length = " << length << std::endl; std::cout << "Vector length (local) = " << local_length << std::endl; - int ngpus = use_ngpu; + int np = use_ngpu; ////////////////////////////////////////////////////////////////////// // Allocate space and perform the computation @@ -160,49 +160,96 @@ int main(int argc, char * argv[]) auto h_B = prk::vector(length, 2); auto h_C = prk::vector(length, 2); - std::vector ls(ngpus,local_length); + auto d_A = std::vector (np, nullptr); + auto d_B = std::vector (np, nullptr); + auto d_C = std::vector (np, nullptr); - auto d_A = std::vector (ngpus, nullptr); - auto d_B = std::vector (ngpus, nullptr); - auto d_C = std::vector (ngpus, nullptr); - - for (int g=0; g(local_length, q); d_B[g] = syclx::malloc_device(local_length, q); d_C[g] = syclx::malloc_device(local_length, q); q.wait(); + } +#else + auto list(qs); + for (const auto & l : list | boost::adaptors::indexed(0) ) { + auto i = l.index(); + auto v = l.value(); + d_A[i] = syclx::malloc_device(local_length, v); + d_B[i] = syclx::malloc_device(local_length, v); + d_C[i] = syclx::malloc_device(local_length, v); + } + for (auto & i : list) { + i.wait(); + } +#endif - const size_t start = (g>0) ? ls[g-1] : 0; - const size_t size = ls[g] * sizeof(double); +#if 0 + for (int g=0; g{size}, [=] (sycl::id<1> i) { @@ -217,19 +264,46 @@ int main(int argc, char * argv[]) nstream_time = prk::wtime() - nstream_time; } - for (int g=0; g ls(np,local_length); + for (int g=0; g0) ? ls[g-1] : 0; const size_t size = ls[g] * sizeof(double); q.memcpy(&(h_A[start]), d_A[g], size); + } +#else + for (const auto & l : list | boost::adaptors::indexed(0) ) { + auto i = l.index(); + auto v = l.value(); + auto bytes = local_length * sizeof(double); + auto start = local_length * i; + auto target = &h_A[i * local_length]; + auto source = d_A[i]; + v.memcpy(target, source, bytes); + } +#endif + for (auto & q : qs) { q.wait(); } - for (int g=0; g Date: Tue, 21 Jul 2020 15:36:17 -0700 Subject: [PATCH 47/66] working with inlined methods --- Cxx11/nstream-multigpu-dpcpp.cc | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/Cxx11/nstream-multigpu-dpcpp.cc b/Cxx11/nstream-multigpu-dpcpp.cc index 716615044..3f9bc9489 100644 --- a/Cxx11/nstream-multigpu-dpcpp.cc +++ b/Cxx11/nstream-multigpu-dpcpp.cc @@ -264,15 +264,6 @@ int main(int argc, char * argv[]) nstream_time = prk::wtime() - nstream_time; } -#if 0 - std::vector ls(np,local_length); - for (int g=0; g0) ? ls[g-1] : 0; - const size_t size = ls[g] * sizeof(double); - q.memcpy(&(h_A[start]), d_A[g], size); - } -#else for (const auto & l : list | boost::adaptors::indexed(0) ) { auto i = l.index(); auto v = l.value(); @@ -282,7 +273,6 @@ int main(int argc, char * argv[]) auto source = d_A[i]; v.memcpy(target, source, bytes); } -#endif for (auto & q : qs) { q.wait(); } From 63b2d557398e31fe68a5d03aa0d97aab1868977c Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 21 Jul 2020 15:39:15 -0700 Subject: [PATCH 48/66] cleaned up --- Cxx11/nstream-multigpu-dpcpp.cc | 43 +-------------------------------- 1 file changed, 1 insertion(+), 42 deletions(-) diff --git a/Cxx11/nstream-multigpu-dpcpp.cc b/Cxx11/nstream-multigpu-dpcpp.cc index 3f9bc9489..876c07d6b 100644 --- a/Cxx11/nstream-multigpu-dpcpp.cc +++ b/Cxx11/nstream-multigpu-dpcpp.cc @@ -164,16 +164,6 @@ int main(int argc, char * argv[]) auto d_B = std::vector (np, nullptr); auto d_C = std::vector (np, nullptr); -#if 0 - for (int g=0; g(local_length, q); - d_B[g] = syclx::malloc_device(local_length, q); - d_C[g] = syclx::malloc_device(local_length, q); - q.wait(); - } -#else auto list(qs); for (const auto & l : list | boost::adaptors::indexed(0) ) { auto i = l.index(); @@ -185,28 +175,7 @@ int main(int argc, char * argv[]) for (auto & i : list) { i.wait(); } -#endif - -#if 0 - for (int g=0; g Date: Tue, 21 Jul 2020 15:39:18 -0700 Subject: [PATCH 49/66] cleaned up --- Cxx11/prk_sycl.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Cxx11/prk_sycl.h b/Cxx11/prk_sycl.h index 599d424c7..94f8de9d7 100644 --- a/Cxx11/prk_sycl.h +++ b/Cxx11/prk_sycl.h @@ -164,6 +164,7 @@ namespace prk { } } +#if 0 template void gather(T * host_pointer, const std::vector & device_pointers, @@ -179,6 +180,7 @@ namespace prk { v.memcpy(target, source, bytes); } } +#endif template void gather(prk::vector & host_pointer, @@ -196,6 +198,7 @@ namespace prk { } } +#if 0 template void scatter(std::vector & device_pointers, const T * host_pointer, @@ -211,6 +214,7 @@ namespace prk { v.memcpy(target, source, bytes); } } +#endif template void scatter(std::vector & device_pointers, From af851be66c170b0a52142db48580ca5e401e2e6a Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 21 Jul 2020 16:49:49 -0700 Subject: [PATCH 50/66] fixed bugs --- Cxx11/prk_sycl.h | 50 ++++++------------------------------------------ 1 file changed, 6 insertions(+), 44 deletions(-) diff --git a/Cxx11/prk_sycl.h b/Cxx11/prk_sycl.h index 94f8de9d7..ec61f0679 100644 --- a/Cxx11/prk_sycl.h +++ b/Cxx11/prk_sycl.h @@ -140,23 +140,24 @@ namespace prk { } } + sycl::queue queue(int i) { + return this->list[i]; + } + template void allocate(std::vector & device_pointers, size_t num_elements) { - std::cout << "allocate" << std::endl; for (const auto & l : list | boost::adaptors::indexed(0) ) { auto i = l.index(); auto v = l.value(); device_pointers[i] = syclx::malloc_device(num_elements, v); - std::cout << i << ": " << device_pointers[i] << ", " << num_elements << std::endl; } } template void free(std::vector & device_pointers) { - std::cout << "free" << std::endl; for (const auto & l : list | boost::adaptors::indexed(0) ) { auto i = l.index(); auto v = l.value(); @@ -164,71 +165,32 @@ namespace prk { } } -#if 0 - template - void gather(T * host_pointer, - const std::vector & device_pointers, - size_t num_elements) - { - std::cout << "gather" << std::endl; - for (const auto & l : list | boost::adaptors::indexed(0) ) { - auto i = l.index(); - auto v = l.value(); - auto bytes = num_elements * sizeof(T); - auto target = &host_pointer[i * bytes]; - auto source = device_pointers[i]; - v.memcpy(target, source, bytes); - } - } -#endif - template void gather(prk::vector & host_pointer, const std::vector & device_pointers, size_t num_elements) { - std::cout << "gather" << std::endl; for (const auto & l : list | boost::adaptors::indexed(0) ) { auto i = l.index(); auto v = l.value(); auto bytes = num_elements * sizeof(T); - auto target = &host_pointer[i * bytes]; + auto target = &host_pointer[i * num_elements]; auto source = device_pointers[i]; v.memcpy(target, source, bytes); } } -#if 0 - template - void scatter(std::vector & device_pointers, - const T * host_pointer, - size_t num_elements) - { - std::cout << "scatter" << std::endl; - for (const auto & l : list | boost::adaptors::indexed(0) ) { - auto i = l.index(); - auto v = l.value(); - auto bytes = num_elements * sizeof(T); - auto target = device_pointers[i]; - auto source = &host_pointer[i * bytes]; - v.memcpy(target, source, bytes); - } - } -#endif - template void scatter(std::vector & device_pointers, prk::vector & host_pointer, size_t num_elements) { - std::cout << "scatter" << std::endl; for (const auto & l : list | boost::adaptors::indexed(0) ) { auto i = l.index(); auto v = l.value(); auto bytes = num_elements * sizeof(T); auto target = device_pointers[i]; - auto source = &host_pointer[i * bytes]; - std::cout << i << ": " << target << ", " << source << std::endl; + auto source = &host_pointer[i * num_elements]; v.memcpy(target, source, bytes); } } From 45c3a0bba43ff2a979e97cd8a4471ffaa4e34140 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 21 Jul 2020 16:49:58 -0700 Subject: [PATCH 51/66] working but to be replaced --- Cxx11/nstream-multigpu-dpcpp.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/Cxx11/nstream-multigpu-dpcpp.cc b/Cxx11/nstream-multigpu-dpcpp.cc index 876c07d6b..2ab474853 100644 --- a/Cxx11/nstream-multigpu-dpcpp.cc +++ b/Cxx11/nstream-multigpu-dpcpp.cc @@ -180,7 +180,6 @@ int main(int argc, char * argv[]) auto i = l.index(); auto v = l.value(); auto bytes = local_length * sizeof(double); - auto start = local_length * i; { auto target = d_A[i]; auto source = &h_A[i * local_length]; @@ -236,7 +235,6 @@ int main(int argc, char * argv[]) auto i = l.index(); auto v = l.value(); auto bytes = local_length * sizeof(double); - auto start = local_length * i; auto target = &h_A[i * local_length]; auto source = d_A[i]; v.memcpy(target, source, bytes); From aac97680b2d67f10169f0364136f3b3a5284fb99 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 21 Jul 2020 16:50:50 -0700 Subject: [PATCH 52/66] working --- Cxx11/nstream-multigpu-dpcpp.cc | 104 ++++++-------------------------- 1 file changed, 19 insertions(+), 85 deletions(-) diff --git a/Cxx11/nstream-multigpu-dpcpp.cc b/Cxx11/nstream-multigpu-dpcpp.cc index 2ab474853..3de7a5d01 100644 --- a/Cxx11/nstream-multigpu-dpcpp.cc +++ b/Cxx11/nstream-multigpu-dpcpp.cc @@ -69,32 +69,7 @@ int main(int argc, char * argv[]) std::cout << "Parallel Research Kernels version " << PRKVERSION << std::endl; std::cout << "C++11/DPC++ STREAM triad: A = B + scalar * C" << std::endl; - std::vector qs; - - auto platforms = sycl::platform::get_platforms(); - for (auto & p : platforms) { - auto pname = p.get_info(); - std::cout << "*Platform: " << pname << std::endl; - if ( pname.find("Level-Zero") != std::string::npos) { - std::cout << "*Level Zero GPU skipped" << std::endl; - break; - } - if ( pname.find("Intel") == std::string::npos) { - std::cout << "*non-Intel skipped" << std::endl; - break; - } - auto devices = p.get_devices(); - for (auto & d : devices ) { - std::cout << "**Device: " << d.get_info() << std::endl; - if ( d.is_gpu() || d.is_cpu() ) { - std::cout << "**Device is CPU or GPU - adding to vector of queues" << std::endl; - qs.push_back(sycl::queue(d)); - } - } - } - - int haz_ngpu = qs.size(); - std::cout << "Number of CPUs and GPUs found = " << haz_ngpu << std::endl; + auto qs = prk::SYCL::queues(); ////////////////////////////////////////////////////////////////////// /// Read and test input parameters @@ -164,41 +139,15 @@ int main(int argc, char * argv[]) auto d_B = std::vector (np, nullptr); auto d_C = std::vector (np, nullptr); - auto list(qs); - for (const auto & l : list | boost::adaptors::indexed(0) ) { - auto i = l.index(); - auto v = l.value(); - d_A[i] = syclx::malloc_device(local_length, v); - d_B[i] = syclx::malloc_device(local_length, v); - d_C[i] = syclx::malloc_device(local_length, v); - } - for (auto & i : list) { - i.wait(); - } + qs.allocate(d_A, local_length); + qs.allocate(d_B, local_length); + qs.allocate(d_C, local_length); + qs.waitall(); - for (const auto & l : list | boost::adaptors::indexed(0) ) { - auto i = l.index(); - auto v = l.value(); - auto bytes = local_length * sizeof(double); - { - auto target = d_A[i]; - auto source = &h_A[i * local_length]; - v.memcpy(target, source, bytes); - } - { - auto target = d_B[i]; - auto source = &h_B[i * local_length]; - v.memcpy(target, source, bytes); - } - { - auto target = d_C[i]; - auto source = &h_C[i * local_length]; - v.memcpy(target, source, bytes); - } - } - for (auto & i : list) { - i.wait(); - } + qs.scatter(d_A, h_A, local_length); + qs.scatter(d_B, h_B, local_length); + qs.scatter(d_C, h_C, local_length); + qs.waitall(); // overwrite host buffer with garbage to detect bugs h_A.fill(-77777777); @@ -209,8 +158,9 @@ int main(int argc, char * argv[]) if (iter==1) nstream_time = prk::wtime(); +#if 1 for (int g=0; g(h_A, d_A, local_length); + qs.waitall(); - for (const auto & l : list | boost::adaptors::indexed(0) ) { - auto i = l.index(); - auto v = l.value(); - syclx::free(d_A[i], v); - syclx::free(d_B[i], v); - syclx::free(d_C[i], v); - } - for (auto & q : qs) { - q.wait(); - } + qs.free(d_A); + qs.free(d_B); + qs.free(d_C); + qs.waitall(); ////////////////////////////////////////////////////////////////////// /// Analyze and output results From 02591d6bbdbbf5df7d5bdf46474fb056a9ef6c5f Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 22 Jul 2020 08:46:23 -0700 Subject: [PATCH 53/66] fix input helper comment --- Cxx11/nstream-dpcpp.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cxx11/nstream-dpcpp.cc b/Cxx11/nstream-dpcpp.cc index e72a14cd5..268ab4b23 100644 --- a/Cxx11/nstream-dpcpp.cc +++ b/Cxx11/nstream-dpcpp.cc @@ -77,7 +77,7 @@ int main(int argc, char * argv[]) size_t length; try { if (argc < 3) { - throw "Usage: <# iterations> []"; + throw "Usage: <# iterations> "; } iterations = std::atoi(argv[1]); From f01342163b6da74bace01c58596e8fd990865737 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 22 Jul 2020 09:02:37 -0700 Subject: [PATCH 54/66] add broadcast and reduce (unused and untested) --- Cxx11/prk_sycl.h | 45 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/Cxx11/prk_sycl.h b/Cxx11/prk_sycl.h index ec61f0679..6e4356ab6 100644 --- a/Cxx11/prk_sycl.h +++ b/Cxx11/prk_sycl.h @@ -165,8 +165,44 @@ namespace prk { } } - template - void gather(prk::vector & host_pointer, + // UNUSED and UNTESTED + template + void broadcast(std::vector & device_pointers, + const B & host_pointer, + size_t num_elements) + { + for (const auto & l : list | boost::adaptors::indexed(0) ) { + auto i = l.index(); + auto v = l.value(); + auto bytes = num_elements * sizeof(T); + auto target = device_pointers[i]; + auto source = &host_pointer[0]; + v.memcpy(target, source, bytes); + } + } + + // UNUSED and UNTESTED + template + void reduce(B & host_pointer, + const std::vector & device_pointers, + size_t num_elements) + { + auto temp = std::vector(num_elements, 0); + for (const auto & l : list | boost::adaptors::indexed(0) ) { + auto i = l.index(); + auto v = l.value(); + auto bytes = num_elements * sizeof(T); + auto target = device_pointers[i]; + auto source = &host_pointer[0]; + v.memcpy(temp, source, bytes); + for (size_t e=0; e + void gather(B & host_pointer, const std::vector & device_pointers, size_t num_elements) { @@ -180,9 +216,9 @@ namespace prk { } } - template + template void scatter(std::vector & device_pointers, - prk::vector & host_pointer, + const B & host_pointer, size_t num_elements) { for (const auto & l : list | boost::adaptors::indexed(0) ) { @@ -196,7 +232,6 @@ namespace prk { } - }; } // namespace SYCL From e0fe457d61db6d8fe18c31775bfb5cbe69604aa6 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 24 Jul 2020 11:17:16 -0700 Subject: [PATCH 55/66] remove unnecessary preprocessor --- Cxx11/nstream-multigpu-dpcpp.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/Cxx11/nstream-multigpu-dpcpp.cc b/Cxx11/nstream-multigpu-dpcpp.cc index 3de7a5d01..315fca060 100644 --- a/Cxx11/nstream-multigpu-dpcpp.cc +++ b/Cxx11/nstream-multigpu-dpcpp.cc @@ -158,7 +158,6 @@ int main(int argc, char * argv[]) if (iter==1) nstream_time = prk::wtime(); -#if 1 for (int g=0; g Date: Fri, 24 Jul 2020 12:30:26 -0700 Subject: [PATCH 56/66] remove 2D indexing code that won't work with USM --- Cxx11/transpose-dpcpp.cc | 7 ------- 1 file changed, 7 deletions(-) diff --git a/Cxx11/transpose-dpcpp.cc b/Cxx11/transpose-dpcpp.cc index 2dec847f9..6d742f1bf 100644 --- a/Cxx11/transpose-dpcpp.cc +++ b/Cxx11/transpose-dpcpp.cc @@ -123,15 +123,8 @@ int main(int argc, char * argv[]) q.submit([&](sycl::handler& h) { h.parallel_for( sycl::range<2>{order,order}, [=] (sycl::id<2> it) { -#if USE_2D_INDEXING - sycl::id<2> ij{it[0],it[1]}; - sycl::id<2> ji{it[1],it[0]}; - B[ij] += A[ji]; - A[ji] += (T)1; -#else B[it[0] * order + it[1]] += A[it[1] * order + it[0]]; A[it[1] * order + it[0]] += 1.0; -#endif }); }); q.wait(); From 76a8bd5484a5a870e096fffcd8b669d84836ab74 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 24 Jul 2020 12:30:34 -0700 Subject: [PATCH 57/66] fix a bunch of problems --- Cxx11/transpose-multigpu-dpcpp.cc | 123 +++++++++++++----------------- 1 file changed, 51 insertions(+), 72 deletions(-) diff --git a/Cxx11/transpose-multigpu-dpcpp.cc b/Cxx11/transpose-multigpu-dpcpp.cc index adf6c49b3..ef055bece 100644 --- a/Cxx11/transpose-multigpu-dpcpp.cc +++ b/Cxx11/transpose-multigpu-dpcpp.cc @@ -57,12 +57,14 @@ int main(int argc, char * argv[]) std::cout << "Parallel Research Kernels version " << PRKVERSION << std::endl; std::cout << "C++11/DPCT Matrix transpose: B = A^T" << std::endl; + auto qs = prk::SYCL::queues(); + ////////////////////////////////////////////////////////////////////// /// Read and test input parameters ////////////////////////////////////////////////////////////////////// int iterations; - size_t order; + size_t order, block_order; int use_ngpu = 1; try { if (argc < 3) { @@ -84,61 +86,35 @@ int main(int argc, char * argv[]) if (argc > 3) { use_ngpu = std::atoi(argv[3]); } + if ( use_ngpu > qs.size() ) { + std::string error = "You cannot use more devices (" + + std::to_string(use_ngpu) + + ") than you have (" + + std::to_string(qs.size()) + ")"; + throw error; + } - if (order % use_ngpu) { - std::cerr << "order = " << order << ", device count = " << use_ngpu << std::endl; - throw "ERROR: matrix order should be divisible by device count!"; + if (order % use_ngpu != 0) { + std::string error = "ERROR: matrix order (" + + std::to_string(order) + + ") should be divisible by # procs (" + + std::to_string(use_ngpu) + ")"; + throw error; } + block_order = order / use_ngpu; } catch (const char * e) { std::cout << e << std::endl; return 1; } - std::vector qs; - - auto platforms = sycl::platform::get_platforms(); - for (auto & p : platforms) { - auto pname = p.get_info(); - std::cout << "*Platform: " << pname << std::endl; - if ( pname.find("Level-Zero") != std::string::npos) { - std::cout << "*Level Zero GPU skipped" << std::endl; - break; - } - if ( pname.find("Intel") == std::string::npos) { - std::cout << "*non-Intel skipped" << std::endl; - break; - } - auto devices = p.get_devices(); - for (auto & d : devices ) { - std::cout << "**Device: " << d.get_info() << std::endl; - if ( d.is_gpu() || d.is_cpu() ) { - std::cout << "**Device is CPU or GPU - adding to vector of queues" << std::endl; - qs.push_back(sycl::queue(d)); - } - } - } - - int haz_ngpu = qs.size(); - std::cout << "Number of CPUs and GPUs found = " << haz_ngpu << std::endl; - - if (use_ngpu > haz_ngpu) { - std::cout << "You cannot use more CPUs and GPUs (" << use_ngpu << ") than you have (" << haz_ngpu << ")" << std::endl; - } - - int ngpus = use_ngpu; - - if (order % ngpus != 0) { - std::cout << "ERROR: matrix order " << order << " should be divisible by # procs" << ngpus << std::endl; - return 2; - } - size_t block_order = order / ngpus; - - std::cout << "Number of GPUs to use = " << use_ngpu << std::endl; + std::cout << "Number of devices = " << use_ngpu << std::endl; std::cout << "Number of iterations = " << iterations << std::endl; std::cout << "Matrix order = " << order << std::endl; std::cout << "Block order = " << block_order << std::endl; + int np = use_ngpu; + ////////////////////////////////////////////////////////////////////// // Allocate space for the input and transpose matrix ////////////////////////////////////////////////////////////////////// @@ -156,42 +132,48 @@ int main(int argc, char * argv[]) } } - const size_t bytes = order * order * sizeof(double); + auto d_a = std::vector (np, nullptr); + auto d_b = std::vector (np, nullptr); + + qs.allocate(d_a, order * block_order); + qs.allocate(d_b, order * block_order); + qs.waitall(); - // copy input from host to device - double * A = syclx::malloc_device(order * order, q); - double * B = syclx::malloc_device(order * order, q); - q.memcpy(A, &(h_a[0]), bytes); - q.memcpy(B, &(h_b[0]), bytes); - q.wait(); + qs.scatter(d_a, h_a, order * block_order); + qs.scatter(d_b, h_b, order * block_order); + qs.waitall(); + + // overwrite host buffer with garbage to detect bugs + h_a.fill(-77777777); for (int iter = 0; iter<=iterations; iter++) { if (iter==1) trans_time = prk::wtime(); - q.submit([&](sycl::handler& h) { - - h.parallel_for( sycl::range<2>{order,order}, [=] (sycl::id<2> it) { -#if USE_2D_INDEXING - sycl::id<2> ij{it[0],it[1]}; - sycl::id<2> ji{it[1],it[0]}; - B[ij] += A[ji]; - A[ji] += (T)1; -#else - B[it[0] * order + it[1]] += A[it[1] * order + it[0]]; - A[it[1] * order + it[0]] += 1.0; -#endif - }); - }); - q.wait(); + for (int g=0; g{order,order}, [=] (sycl::id<2> it) { + B[it[0] * order + it[1]] += A[it[1] * order + it[0]]; + A[it[1] * order + it[0]] += 1.0; + }); + }); + } + qs.waitall(); } trans_time = prk::wtime() - trans_time; // copy output back to host - q.memcpy(&(h_b[0]), B, bytes).wait(); + qs.gather(h_a, d_a, order * block_order); + qs.waitall(); - syclx::free(B, q); - syclx::free(A, q); + qs.free(d_a); + qs.free(d_b); + qs.waitall(); ////////////////////////////////////////////////////////////////////// /// Analyze and output results @@ -208,9 +190,6 @@ int main(int argc, char * argv[]) } } - syclx::free(h_b, q); - syclx::free(h_a, q); - const auto epsilon = 1.0e-8; if (abserr < epsilon) { std::cout << "Solution validates" << std::endl; From b5666fde38b1e4fca2b57523e99fed339793316e Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Sun, 26 Jul 2020 08:46:16 -0700 Subject: [PATCH 58/66] need a unit test for the collectives... --- Cxx11/test_dpcpp_collectives.cc | 135 ++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 Cxx11/test_dpcpp_collectives.cc diff --git a/Cxx11/test_dpcpp_collectives.cc b/Cxx11/test_dpcpp_collectives.cc new file mode 100644 index 000000000..4a1472a44 --- /dev/null +++ b/Cxx11/test_dpcpp_collectives.cc @@ -0,0 +1,135 @@ +/// +/// Copyright (c) 2020, Intel Corporation +/// +/// Redistribution and use in source and binary forms, with or without +/// modification, are permitted provided that the following conditions +/// are met: +/// +/// * Redistributions of source code must retain the above copyright +/// notice, this list of conditions and the following disclaimer. +/// * Redistributions in binary form must reproduce the above +/// copyright notice, this list of conditions and the following +/// disclaimer in the documentation and/or other materials provided +/// with the distribution. +/// * Neither the name of Intel Corporation nor the names of its +/// contributors may be used to endorse or promote products +/// derived from this software without specific prior written +/// permission. +/// +/// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +/// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +/// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +/// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +/// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +/// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +/// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +/// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +/// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +/// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +/// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +/// POSSIBILITY OF SUCH DAMAGE. + +#include "prk_sycl.h" +#include "prk_util.h" + +int main(int argc, char * argv[]) +{ + auto qs = prk::SYCL::queues(); + + size_t length, local_length; + int use_ngpu = 1; + try { + if (argc < 2) { + throw "Usage: []"; + } + + length = std::atoi(argv[2]); + if (length <= 0) { + throw "ERROR: vector length must be positive"; + } + + if (argc > 3) { + use_ngpu = std::atoi(argv[3]); + } + if ( use_ngpu > qs.size() ) { + std::string error = "You cannot use more devices (" + + std::to_string(use_ngpu) + + ") than you have (" + + std::to_string(qs.size()) + ")"; + throw error; + } + + if (length % use_ngpu != 0) { + std::string error = "ERROR: vector length (" + + std::to_string(length) + + ") should be divisible by # procs (" + + std::to_string(use_ngpu) + ")"; + throw error; + } + local_length = length / use_ngpu; + } + catch (const char * e) { + std::cout << e << std::endl; + return 1; + } + + std::cout << "Number of devices = " << use_ngpu << std::endl; + std::cout << "Vector length = " << length << std::endl; + std::cout << "Vector length (local) = " << local_length << std::endl; + + int np = use_ngpu; + + auto h_A = prk::vector(length, 0); + auto h_B = prk::vector(length, 2); + + auto d_A = std::vector (np, nullptr); + auto d_B = std::vector (np, nullptr); + + qs.allocate(d_A, local_length); + qs.allocate(d_B, local_length); + qs.waitall(); + + qs.scatter(d_A, h_A, local_length); + qs.scatter(d_B, h_B, local_length); + qs.waitall(); + + // overwrite host buffer with garbage to detect bugs + h_A.fill(-77777777); + + const double scalar(3); + { + for (int iter = 0; iter<=iterations; iter++) { + + if (iter==1) nstream_time = prk::wtime(); + + for (int g=0; g{size}, [=] (sycl::id<1> i) { + p_A[i] += p_B[i] + scalar * p_C[i]; + }); + }); + } + qs.waitall(); + } + nstream_time = prk::wtime() - nstream_time; + } + + qs.gather(h_A, d_A, local_length); + qs.gather(h_B, d_B, local_length); + qs.waitall(); + + qs.free(d_A); + qs.free(d_B); + qs.waitall(); + + return 0; +} + + From ee43a2d71f7641462e7838e422441fe490dbdaeb Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Sun, 26 Jul 2020 08:46:39 -0700 Subject: [PATCH 59/66] whitespace --- Cxx11/transpose-dpcpp.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/Cxx11/transpose-dpcpp.cc b/Cxx11/transpose-dpcpp.cc index 6d742f1bf..7fe69d290 100644 --- a/Cxx11/transpose-dpcpp.cc +++ b/Cxx11/transpose-dpcpp.cc @@ -121,7 +121,6 @@ int main(int argc, char * argv[]) if (iter==1) trans_time = prk::wtime(); q.submit([&](sycl::handler& h) { - h.parallel_for( sycl::range<2>{order,order}, [=] (sycl::id<2> it) { B[it[0] * order + it[1]] += A[it[1] * order + it[0]]; A[it[1] * order + it[0]] += 1.0; From b94896d9ad896f00239aea9ceeffa556dac76aaf Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Sun, 26 Jul 2020 08:46:56 -0700 Subject: [PATCH 60/66] hoist invariant; start alltoall --- Cxx11/prk_sycl.h | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/Cxx11/prk_sycl.h b/Cxx11/prk_sycl.h index 6e4356ab6..0c667deda 100644 --- a/Cxx11/prk_sycl.h +++ b/Cxx11/prk_sycl.h @@ -171,10 +171,10 @@ namespace prk { const B & host_pointer, size_t num_elements) { + auto bytes = num_elements * sizeof(T); for (const auto & l : list | boost::adaptors::indexed(0) ) { auto i = l.index(); auto v = l.value(); - auto bytes = num_elements * sizeof(T); auto target = device_pointers[i]; auto source = &host_pointer[0]; v.memcpy(target, source, bytes); @@ -187,11 +187,11 @@ namespace prk { const std::vector & device_pointers, size_t num_elements) { + auto bytes = num_elements * sizeof(T); auto temp = std::vector(num_elements, 0); for (const auto & l : list | boost::adaptors::indexed(0) ) { auto i = l.index(); auto v = l.value(); - auto bytes = num_elements * sizeof(T); auto target = device_pointers[i]; auto source = &host_pointer[0]; v.memcpy(temp, source, bytes); @@ -206,10 +206,10 @@ namespace prk { const std::vector & device_pointers, size_t num_elements) { + auto bytes = num_elements * sizeof(T); for (const auto & l : list | boost::adaptors::indexed(0) ) { auto i = l.index(); auto v = l.value(); - auto bytes = num_elements * sizeof(T); auto target = &host_pointer[i * num_elements]; auto source = device_pointers[i]; v.memcpy(target, source, bytes); @@ -221,17 +221,51 @@ namespace prk { const B & host_pointer, size_t num_elements) { + auto bytes = num_elements * sizeof(T); for (const auto & l : list | boost::adaptors::indexed(0) ) { auto i = l.index(); auto v = l.value(); - auto bytes = num_elements * sizeof(T); auto target = device_pointers[i]; auto source = &host_pointer[i * num_elements]; v.memcpy(target, source, bytes); } } + // num_elements is defined the same as MPI + // each device contributes np * num_elements + // each device receives np * num_elements + template + void alltoall(std::vector & device_pointers_out, + std::vector & device_pointers_in, + size_t num_elements) + { + auto bytes = num_elements * sizeof(T); + // allocate np*np temp space on the host, because + // we cannot copy device-to-device if they are in + // different contexts. + // we can specialize for single-context later... + int np = this->list.size(); + prk::vector temp(num_elements * np * np); + + // gather phase - contiguous + for (const auto & l : list | boost::adaptors::indexed(0) ) { + auto i = l.index(); + auto v = l.value(); + auto target = &temp[i * np * num_elements]; + auto source = device_pointers_in[i]; + v.memcpy(target, source, np * bytes); + } + // scatter phase - noncontiguous + for (const auto & l : list | boost::adaptors::indexed(0) ) { + auto i = l.index(); + auto v = l.value(); + auto target = device_pointers_out[i]; + auto source = &temp[i * num_elements]; + v.memcpy(target, source, bytes); + } + + } }; } // namespace SYCL From da594fbd588a1ceae6e8b22ce32ed1a73fc53907 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Sun, 26 Jul 2020 09:00:51 -0700 Subject: [PATCH 61/66] progress on unit test --- Cxx11/Makefile | 4 ++ Cxx11/test_dpcpp_collectives.cc | 76 ++++++++++++++++++--------------- 2 files changed, 45 insertions(+), 35 deletions(-) diff --git a/Cxx11/Makefile b/Cxx11/Makefile index a20cdfbb4..e97e44d86 100644 --- a/Cxx11/Makefile +++ b/Cxx11/Makefile @@ -99,6 +99,9 @@ sycl-explicit: nstream-sycl-explicit transpose-sycl-explicit dpcpp: nstream-dpcpp nstream-multigpu-dpcpp stencil-dpcpp stencil-multigpu-dpcpp transpose-dpcpp transpose-multigpu-dpcpp +test_dpcpp_collectives: test_dpcpp_collectives.cc prk_util.h prk_sycl.h + $(SYCLCXX) $(SYCLFLAGS) -g $< -o $@ + tbb: p2p-innerloop-tbb p2p-tbb stencil-tbb transpose-tbb nstream-tbb \ p2p-hyperplane-tbb p2p-tasks-tbb @@ -278,6 +281,7 @@ clean: -rm -f *-sycl-explicit-usm -rm -f *-dpct -rm -f *-dpcpp + -rm -f test_dpcpp_collectives -rm -f *-celerity -rm -f *-tbb -rm -f *-stl diff --git a/Cxx11/test_dpcpp_collectives.cc b/Cxx11/test_dpcpp_collectives.cc index 4a1472a44..4168f4426 100644 --- a/Cxx11/test_dpcpp_collectives.cc +++ b/Cxx11/test_dpcpp_collectives.cc @@ -43,13 +43,13 @@ int main(int argc, char * argv[]) throw "Usage: []"; } - length = std::atoi(argv[2]); + length = std::atoi(argv[1]); if (length <= 0) { throw "ERROR: vector length must be positive"; } if (argc > 3) { - use_ngpu = std::atoi(argv[3]); + use_ngpu = std::atoi(argv[2]); } if ( use_ngpu > qs.size() ) { std::string error = "You cannot use more devices (" @@ -79,54 +79,60 @@ int main(int argc, char * argv[]) int np = use_ngpu; - auto h_A = prk::vector(length, 0); - auto h_B = prk::vector(length, 2); + auto host = prk::vector(length, 37); - auto d_A = std::vector (np, nullptr); - auto d_B = std::vector (np, nullptr); + auto device = std::vector (np, nullptr); - qs.allocate(d_A, local_length); - qs.allocate(d_B, local_length); + qs.allocate(device, local_length); qs.waitall(); - qs.scatter(d_A, h_A, local_length); - qs.scatter(d_B, h_B, local_length); + qs.scatter(device, host, local_length); qs.waitall(); - // overwrite host buffer with garbage to detect bugs - h_A.fill(-77777777); + host.fill(-77777777); - const double scalar(3); - { - for (int iter = 0; iter<=iterations; iter++) { + qs.gather(host, device, local_length); + qs.waitall(); - if (iter==1) nstream_time = prk::wtime(); + { + size_t errors(0); + for (size_t i=0; i{local_length}, [=] (sycl::id<1> i) { + p[i] = i; + }); + }); + } + qs.waitall(); - const size_t size = local_length; + qs.gather(host, device, local_length); + qs.waitall(); - q.submit([&](sycl::handler& h) { - h.parallel_for( sycl::range<1>{size}, [=] (sycl::id<1> i) { - p_A[i] += p_B[i] + scalar * p_C[i]; - }); - }); + { + size_t errors(0); + for (size_t i=0; i(h_A, d_A, local_length); - qs.gather(h_B, d_B, local_length); - qs.waitall(); - - qs.free(d_A); - qs.free(d_B); + qs.free(device); qs.waitall(); return 0; From 0a74fa00b311ab326024f84285f7cc72b9b44710 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Sun, 26 Jul 2020 11:15:45 -0700 Subject: [PATCH 62/66] work around issue with CPU-only in "multigpu" tester when i use 2 devices with CPU+GPU, it's fine. when i use 1 device with GPU, it's fine. when i use 1 device with CPU, scatter crashes in memcpy. i have no idea what is wrong here... Signed-off-by: Jeff Hammond --- Cxx11/prk_sycl.h | 36 ++++++++++++------ Cxx11/test_dpcpp_collectives.cc | 65 +++++++++++++++++++++++++++------ 2 files changed, 78 insertions(+), 23 deletions(-) diff --git a/Cxx11/prk_sycl.h b/Cxx11/prk_sycl.h index 0c667deda..38daa35f5 100644 --- a/Cxx11/prk_sycl.h +++ b/Cxx11/prk_sycl.h @@ -100,10 +100,12 @@ namespace prk { for (auto & p : platforms) { auto pname = p.get_info(); std::cout << "*Platform: " << pname << std::endl; +#if 0 if ( pname.find("Level-Zero") != std::string::npos) { std::cout << "*Level Zero GPU skipped" << std::endl; break; } +#endif if ( pname.find("Intel") == std::string::npos) { std::cout << "*non-Intel skipped" << std::endl; break; @@ -111,15 +113,20 @@ namespace prk { auto devices = p.get_devices(); for (auto & d : devices ) { std::cout << "**Device: " << d.get_info() << std::endl; - if ( d.is_cpu() && use_cpu ) { - std::cout << "**Device is CPU - adding to vector of queues" << std::endl; - list.push_back(sycl::queue(d)); - } if ( d.is_gpu() && use_gpu ) { std::cout << "**Device is GPU - adding to vector of queues" << std::endl; list.push_back(sycl::queue(d)); } } +#if 1 + for (auto & d : devices ) { + std::cout << "**Device: " << d.get_info() << std::endl; + if ( d.is_cpu() && use_cpu ) { + std::cout << "**Device is CPU - adding to vector of queues" << std::endl; + list.push_back(sycl::queue(d)); + } + } +#endif } } @@ -130,7 +137,10 @@ namespace prk { void wait(int i) { - list[i].wait(); + if ( i > this->size() ) { + std::cerr << "ERROR: invalid device id: " << i << std::endl; + } + list.at(i).wait(); } void waitall(void) @@ -141,7 +151,10 @@ namespace prk { } sycl::queue queue(int i) { - return this->list[i]; + if ( i > this->size() ) { + std::cerr << "ERROR: invalid device id: " << i << std::endl; + } + return this->list.at(i); } template @@ -182,21 +195,22 @@ namespace prk { } // UNUSED and UNTESTED - template + template void reduce(B & host_pointer, const std::vector & device_pointers, size_t num_elements) { auto bytes = num_elements * sizeof(T); - auto temp = std::vector(num_elements, 0); + auto temp = prk::vector(num_elements, 0); for (const auto & l : list | boost::adaptors::indexed(0) ) { auto i = l.index(); auto v = l.value(); - auto target = device_pointers[i]; + auto target = &temp[0]; auto source = &host_pointer[0]; - v.memcpy(temp, source, bytes); + v.memcpy(target, source, bytes); + target = device_pointers[i]; for (size_t e=0; e []"; + if (argc == 2) { + std::string a(argv[1]); + if ( a.find("h") != std::string::npos) { + throw "HELP: [ ]"; + } } - length = std::atoi(argv[1]); - if (length <= 0) { - throw "ERROR: vector length must be positive"; + if (argc > 1) { + length = std::atoi(argv[1]); + if (length <= 0) { + throw "ERROR: vector length must be positive"; + } } - if (argc > 3) { + if (argc > 2) { use_ngpu = std::atoi(argv[2]); } + } + catch (const char * e) { + std::cout << e << std::endl; + return 1; + } + + auto qs = prk::SYCL::queues(use_ngpu>1,true); + + try { if ( use_ngpu > qs.size() ) { std::string error = "You cannot use more devices (" + std::to_string(use_ngpu) @@ -68,7 +80,7 @@ int main(int argc, char * argv[]) } local_length = length / use_ngpu; } - catch (const char * e) { + catch (std::string & e) { std::cout << e << std::endl; return 1; } @@ -81,11 +93,13 @@ int main(int argc, char * argv[]) auto host = prk::vector(length, 37); - auto device = std::vector (np, nullptr); + auto device = std::vector(np, nullptr); qs.allocate(device, local_length); qs.waitall(); + std::cout << "Testing scatter-gather" << std::endl; + qs.scatter(device, host, local_length); qs.waitall(); @@ -122,15 +136,42 @@ int main(int argc, char * argv[]) qs.waitall(); { + size_t errors(0); + for (int d=0; d(local_length, -10); + + qs.broadcast(device, host2, local_length); + qs.waitall(); + + qs.reduce(host2, device, local_length); + qs.waitall(); + + { + double correct = -10 * np; size_t errors(0); for (size_t i=0; i Date: Sun, 26 Jul 2020 11:23:04 -0700 Subject: [PATCH 63/66] broadcast and reduce tested --- Cxx11/prk_sycl.h | 6 ++---- Cxx11/test_dpcpp_collectives.cc | 10 +++++----- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/Cxx11/prk_sycl.h b/Cxx11/prk_sycl.h index 38daa35f5..a5d1d52c9 100644 --- a/Cxx11/prk_sycl.h +++ b/Cxx11/prk_sycl.h @@ -178,7 +178,6 @@ namespace prk { } } - // UNUSED and UNTESTED template void broadcast(std::vector & device_pointers, const B & host_pointer, @@ -194,7 +193,6 @@ namespace prk { } } - // UNUSED and UNTESTED template void reduce(B & host_pointer, const std::vector & device_pointers, @@ -206,9 +204,9 @@ namespace prk { auto i = l.index(); auto v = l.value(); auto target = &temp[0]; - auto source = &host_pointer[0]; + auto source = device_pointers[i]; v.memcpy(target, source, bytes); - target = device_pointers[i]; + target = &host_pointer[0]; for (size_t e=0; e(local_length, -10); @@ -163,15 +163,15 @@ int main(int argc, char * argv[]) { double correct = -10 * np; size_t errors(0); - for (size_t i=0; i Date: Sun, 26 Jul 2020 12:13:43 -0700 Subject: [PATCH 64/66] there are bugs somewhere Signed-off-by: Jeff Hammond --- Cxx11/prk_sycl.h | 9 ++++-- Cxx11/test_dpcpp_collectives.cc | 53 ++++++++++++++++++++++++++++++--- 2 files changed, 55 insertions(+), 7 deletions(-) diff --git a/Cxx11/prk_sycl.h b/Cxx11/prk_sycl.h index a5d1d52c9..01bd531e0 100644 --- a/Cxx11/prk_sycl.h +++ b/Cxx11/prk_sycl.h @@ -189,6 +189,7 @@ namespace prk { auto v = l.value(); auto target = device_pointers[i]; auto source = &host_pointer[0]; + std::cout << "BCAST: device " << i << std::endl; v.memcpy(target, source, bytes); } } @@ -198,17 +199,19 @@ namespace prk { const std::vector & device_pointers, size_t num_elements) { + std::cout << "REDUCE: num_elements " << num_elements << std::endl; auto bytes = num_elements * sizeof(T); + std::cout << "REDUCE: bytes " << bytes << std::endl; auto temp = prk::vector(num_elements, 0); for (const auto & l : list | boost::adaptors::indexed(0) ) { auto i = l.index(); auto v = l.value(); + std::cout << "REDUCE: device " << i << std::endl; auto target = &temp[0]; auto source = device_pointers[i]; v.memcpy(target, source, bytes); - target = &host_pointer[0]; for (size_t e=0; e + template void alltoall(std::vector & device_pointers_out, std::vector & device_pointers_in, size_t num_elements) diff --git a/Cxx11/test_dpcpp_collectives.cc b/Cxx11/test_dpcpp_collectives.cc index 9f1c7b12d..001920d36 100644 --- a/Cxx11/test_dpcpp_collectives.cc +++ b/Cxx11/test_dpcpp_collectives.cc @@ -93,11 +93,21 @@ int main(int argc, char * argv[]) auto host = prk::vector(length, 37); - auto device = std::vector(np, nullptr); + auto host2 = prk::vector(local_length, -10); + auto device = std::vector(np, nullptr); qs.allocate(device, local_length); qs.waitall(); + // device out vector + auto device2 = std::vector(np, nullptr); + qs.allocate(device2, local_length); + qs.waitall(); + + //////////////////////////////////////////////////////////////////////////// + // scatter and gather + //////////////////////////////////////////////////////////////////////////// + std::cout << "Testing scatter-gather" << std::endl; qs.scatter(device, host, local_length); @@ -150,9 +160,11 @@ int main(int argc, char * argv[]) if (errors != 0) std::abort(); } - std::cout << "Testing broadcast-reduce" << std::endl; + //////////////////////////////////////////////////////////////////////////// + // broadcast and reduce + //////////////////////////////////////////////////////////////////////////// - auto host2 = prk::vector(local_length, -10); + std::cout << "Testing broadcast-reduce" << std::endl; qs.broadcast(device, host2, local_length); qs.waitall(); @@ -165,7 +177,7 @@ int main(int argc, char * argv[]) size_t errors(0); for (size_t i=0; i(device2, host, local_length); + qs.waitall(); + + // reset host and fill with input + host.fill(0); + for (int d=0; d(device, host, local_length); + qs.waitall(); + + qs.alltoall(device2, device, local_length); + qs.waitall(); +#endif + + //////////////////////////////////////////////////////////////////////////// + // THE END + //////////////////////////////////////////////////////////////////////////// + qs.free(device); qs.waitall(); From 0193c93bbac5645f434414050fc258f7684fb76b Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 28 Oct 2020 07:22:17 -0700 Subject: [PATCH 65/66] never commit binaries --- Cxx11/stencil-2d-sycl | Bin 218944 -> 0 bytes Cxx11/transpose-2d-sycl | Bin 90472 -> 0 bytes 2 files changed, 0 insertions(+), 0 deletions(-) delete mode 100755 Cxx11/stencil-2d-sycl delete mode 100755 Cxx11/transpose-2d-sycl diff --git a/Cxx11/stencil-2d-sycl b/Cxx11/stencil-2d-sycl deleted file mode 100755 index 1ede69183b5049779fe0bb80e8a6c17a40a983a5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 218944 zcmeEP3tUuH{+|(0GHsx=sBEK}3d#)BOnMFqdM5{^w4$=W5W_+x9MoRB7*JlvY3aFc z+qUd(+gks2qwR&33cj$HVNZKXnRUl8L*0Cq{J-CG?wz?a%&4_1>INU3dw;)s-sgAD z`Tf4<^_y(hur5}s#rW%Dxx_-MWuQZ_bX_E9h9k;}?E& zgglG82-0Fa<0WZfX!JKv|30sa0~BcoAC^AiH~2JC$S3^;{~Y?OeCC}hN#kirkTm_p z@ik4-mNbWa(Vu@)WDLgB>`%m3p{JlCQ~J}NJN%L~o@Re6o1{Br29rEPghS)0bkl<| z3PevQUDMy1yQIH0Ug^(xCP=o$cpBcBM*d%>e_z(c0gAMP4@%WiC8E$Q7UOC7yBz+Q&i(zDmH?^--36xBOVEYDrA zu_meOnF|}g+H$uqf6c9VeXPZCPcB&Zz@10VtjS+Fv->+atqa!N=Dg*lA=&S5OwE4Z zbz6^(JsT%BU%v4lJBQ@wUFtb*)O&aKn%iUOV}%QsKVN=xmz`hyt;C2Y{5cmXKRTQV z+H*u?{-r3SNbX;V;{IM#awPYcb&t&d5^+azKQIdak5Tw{qw*q!vnC3^>d46MCr8O| zpD5+_n<(j-8zubdQTTaL%I&Er>G>c^dOkn{k5mpXqn$+JpNRGviSLNQ-xwwQtx??n zDN25iiNg0p;XfTkel|p@->;&SPoF5|a8;Ci`J&Xz%~9N67A4*%qVV5~lAdRyl;@LC z%IELsq$1U0Y?OFcM{(a8rCt9jO8NgIN_zT7DWAKdxW6$em&eUapSf{(>m!yda8uj`xw|e3v# zOZj*U=}fSkXeoF=hGTr(tmmsj*Fz2dN4opD4~Dw$hk9Xp>hyRGePb-bA-_h~zw-2e z=Aj>8It%pl8~HjG2xjuh8R7(9=oXf`1q3 z`7KCy2-^6WqnBrbo=(I4e3U2C?+6S3kEoYKlxKxro(V`F|NfwtL%kl3Q4SyL;TMF} z?{xTK`b+ijlNkvApodL$k1ss?Z0cTvEcy92O`0+}f2w<2nL9rpWU0HjtZ3Z${9B4{ z%P+*6yU3D%?Z^?M-RI}$7tWZGo}S+S#&J_87Q#!}#K|`~3rD-t(+i8omF2t3#!Ylj zbrxoi9*3Xo^9!d;Djio=l$|@G5J8M_%I9cz|FrykL^5T1eo4{fo7~0uMP+4E%H-FP zKRup|{G9x8rKLra$1|o$h-$Pu!(CK1Y2sugei~B`hD`IsDM(-Y1ZSUDSW+~uEE`|? z=R2oN<$t%#`4}fW7tJUvDs@ksGFb*Ts1T{2JY_)sRsGZZPrYrbyJ%8=Vab%jTLw&? zGCfSy__f z^T(ASyE1SH#W-1=Nb;DgM#3ZIWZaDtr=_P;W=h6Qotp2yt+WU^nJ^_=lnFmb5x5be zCPHJeg?Xo-Or01h1g0ZUDOvrQNs*2li*A}YIlpk6yRaC+jW5YHK9rW173EKxSmrJt zS0Ym|$ee=0aZ}xfQ;`}}etsYVHj)`L@=J@#rcRkWu4JP7w)|;nfl&0SGwLalcS4=|KqL#{v zMBO7M(YgX60LEzdz``lzZrveTPM|W8@(iQON~YX|4(LDYHqg?Mv?-I(@X8C_*^^N0 zB5@PSip*7y?sAgsDW~3A=AK~TucdU#boQe+6`~j9$8<~nm6NAWL~5@nnp!@oNOZEd z6_)VZ0Flj+{R>M5P6aC{wrmLyc;myM@kQ=&6HC(5M_)74l~Xin)QvZfp_6b2gGPJ@~0M+ z6cxIsl%)@lZ~2lR=KQ={%Ztj3($knClLJ%Pqz@=YD+>e{t>kVjpD>}QEPvvp(vnCe z*FOzC(Uii8Xxmp$bQde*&@UI2WnaPkpI@v~xg$rX=cfNfpG6qDadh zG)`~_VXQ=2L!!|0kUq8i#z_-VuuKA4(S-8Jg&a4W!={BsgFbtN2vV>00fke>7v-ZS z$I*F6(7Iq4^`5TR`!L99Hv2bp7DnyDDa$=l-RhsR2c{3m`1v)fhmzi(b!CVWt23Xn zjq&29i5Lyb^3gc>WVW3bB9^-DDf^h?4;GR;# z2Q*)Oyg?RbOmtf&6-}C2Bp!vOw^`6YiY8B^!@^0WaL9mk9evWciSSy4tdGNo8=+f? z$^&cic+2piL-PmpOS24jIfo3*@87R~M)0-Yd4c!zf$}vk53UFHOHU6WGb}^%@-l}w z^V2bSiI8-6{X^ZQ>w(L!FfCxBho(hYC?8fSq4gg=<4 zg5L!^e&Js?OLu_@)4jptJO9Jn0WTJA7$*P2u(R|4Cm!!~!9VHCitkq8ivIW}nY57{%lNL+)2@{cpc*|y!`$_sY zx_?NxKf>~3i2JT4_gxW(j2{onxE;SkBD-7Sdq|%8n^e%^8($Z?;50xzOYuKkOEn?=zpJ7*$ zPwd?IGwgKoiN70vh8<1*6$TSe!!9OYBLDKEVb_yCQnJLafme_}%)~eBV)6~!n4b+h zoqPlTC*QD_$-jmS{m;Nj$j>&IcpA6_`7RURz)8p-ZsHsEJNf2$hNK!<>VTC*y&GPV zO?*^%;4j6*2P6^rOEd8S83g_YoA`{|_;Z-}Ml0br#l&ZtjK5qHU$#{FCC|iH499pD znD}hd#$U0C4~Q`ES8C$xO-Fp2VdCo|EBF;Ae$PNC7RxLX|0ol`#>BUo_;XEswt3^P z*2L#rlkqpt#5dz~btXRNlZ?N56aN^4iD!d}-^;{bY2x=b@taNjV@>=uCjN0Iev653 zV0QehnfS+>+_#$eCz$vU{oA@mz{$LYdGx0eWZ2YyF_*n)M z&mAUyhKX;{@mThe7nu0*CjNyceu9aAk%^yV;$LjyC!6?}nD{9s{-q{fzA@>EP>7v=q!QG66h>}&JyS>fzA@>EP+F-1h(3K zwM(ho7_WHZzR2%lQK}o)ucZxtr+-YE z5VWsZ(C?5Y1nFxK^y{PvJ^Jbdy@WI&M_;X=7my~@=&KR*zep2e^i>G@Y0@;CeWil_ z3u!`%z5+o%N}7Ek2+-HM2aMBiBu(hg*COa^NfYw(H4FL*(uDea4T8RmG@(3SouG%1CWPm!74!w9 z3ElZ>1l^xBAv<4%pwA*rsLod^=u=4(qVp99`UKL1=6tz=KAJQkIiEw&-ANOQ^Q8&8 z3u!`dzGOk~x&btyH(!FFw~!{}=CcTT18G8SzSbX^|1U`=lWr08$D|3Z`I-g&4rxMa zz6L?RPMT1fuTIcQNE1Ty)e3q6X+med8bSYyG$Auzg`l4%O{mOQD(JtECPd~d5cH#@ z361%31^obNLSjCLpl6dN6y{44^xdQhf%%dJeLHDFU%muEmyssq<+BL-X3~VZe62r- z`X^0@%hw|4Ye^H@@-++k3etqMd<}xWj5MJvU!9JxlgqnQ0fhcv-nUxT1uCq0yOouHSH zCS>NT74!nq*`#X({V&qPNLPT)U3@=QdTQ$-ii^3%auLY|c|e`=M1AKjF8F7rMSB=k zmABM(vPGv~MA8M#{w@7mr@HzEMg3B#{Hir~jI*hsW=5t(X=+RskEVu;(69CW_}VU( zf7j6c(h2=+Cs!c_7E4Wi0isea=CAvhda~F>9xerpI%U-GwwIxH8(@ zaoR)3g-lgTAXSt%JyprtX(U2ZMR|*os&m?>>MtUI2&p<=Pt`ahRU$z#!ueQxicrp*zt+vLY@s&HQ?gYQnGc?|-w5cOVmtUxO7(-ibLIfW@q>tk~Lwg7x zq|1*md?YEJvG&0o7hB~r$90)EWacl`)*$2ByZAw2`jh!{B7bh@Pa=LqRf$5`#G*nn zMWI|9$YPIRq4?c|j^=o?k&5pLVOj({v#&?kV$H54cXNL$`7W$~lWofAS>z69(QdX@@$}u7-C9K+S z-DBHVZFW3rD_e+qju7?1iYFTx`_3%ufx69R-DZor%@JbWJ>WWtCGjEz$#ybEh0)px z+fW8*Stl_$=o~HsEqVfUR*Jer`xvF|PlmqQcs2A@bU%u>p52$Ht`@(QtE^g*kjl6} z8{a*1OEGA$C7mDEGd<3l6D`!#Gs!OQnD6mPZMx2PE#lXf!=Xsu7F$AGbC%VDv8DpG zX&Jrv2lP)`H3LGTPlT8BZ421VX`8`&g^FtBjw9~s=I>VTz`MHCJG8BOsqLYL>Q8JB zH&!*eW7Rd+EtOqe_bLSSH1n@Ep=T~d6)duY$6|O;vb7ONmv#?+Cdl$q)ZL4Q(BC`% zZfipdhA)a2KGtfDk}+semd>b?jMEnl(;0t~j9!aks~O2!?LNUk;;vg-XT6T&X?^UA z7K>>>w2g_ID6=mI$aeJAui5%Fmc?zgUEZ+Nw+BTK9E;d~CfKfiZGv#IMYy<#ISCB6 z6Kb~D&U)K-uPE&b6qhA;tL@qbGPN==w}8kqrDj6Sl^ozQugo07L2iaq{XSFG#|;IU zTtGckn};}L0e=NMLOUJ-De7{4@KQXF#RUd0D=IEbMDBp+b|L~X*S2gC^`u>ZGLxa- z!WgNkYJ-HQxRbBcB{U#`wyMWrs)R+66e$WRg_sBG(EyKff!J3V*B8&c+> zXZAGJ4YvEd@?(PWaWX!FU-diiFOEY^+A9BvS65+6ofUq1)YFckZ>1gzhj>duG_kfh zS75+IG)YECc1APTmC>T8jZWL}H>(?*wyZZ5)n!+NhpJQHA;(jmtScE~?a4XS+iX?i z8J)VGo$E~G#i=f1bh(PU###A8LK&P>4S}AKoBocrSkDVuw~KCE_OzVpuiPiXskOXY z=IoJoL^M#0Nh;28yynw&R@Y z#$TU}VoEWJ=|?6>ZB!b+NOpNJBG{^)hsY^&nVckloZvCuB*ogGR5n;qptgH!;6zH0 zigvhwwx0HH6qev+shhN$AY!6(mi?XQnHVXt-`c81!EeP~Lfy(9>OYQx`5I+L_Qj@N zjm`VgR)*80-MiTk**^y&tTZBYsb6bX!9kP=KZj`^7GYl02v>b4BRn<`;ru{^M;j3; z9=Bc5)`_%X0BWDdTiQpto(YVY$LsVc|BCQrp{@=@`6u|Q_kmn=ADU{#ov2~Omfb3x zEueg8cZqQ0CS2aW;jRZp5!P_?h3haCQiP_xw@K!_Paqka0?9~%H{psoPttrM#SwFU zE8`5UT|dhyV$R3uQ9i}+Aikxg*z0Axy1|y3Wl!UeXl%J`Y^92Kj6GRfi{>xX4HpuU z=E~S=t9k&Xkhz%Jt^cIp5FPBHmCB_F&Wz8o{M~a)JXM2DIn{0M@w?l9j9QkN^-HCZm(71LaBvS$!~*Iazda0lXU&# zfY!(TVfFFrcJ=Y?w?=(D2%+A4eXM5IqE*&1jMU6`2iM13Y6>o*$p z5yO5$v=k>=in@I7b#op7Pe;|a?vtZd&}pYwEdCxlj36Fvllp=sj=2H25ukdA?P*^KKl$7Wtn^Y?n#zkc-3RNS>qOjVQRdJY*} zv5XW#O<9R-`0l}X>F71-=s^rACP#mTBma2m;_uSMH=!8X8ePT_j5v zh0?`2p)M|l3oV7|JqvZBt%4&zQWcXa) zbeUd};WpJW#DR*=F}?>hT6cEXpm>?6!HAW-_!|U*O74baYC{lWJ!Hhv44QyFY;S-R;%Y3C>^hbyvO+m=yI`H%+ zv57FlRHl~S5kwc_cxXn?(w_&4NK@P82_k?`L;xbtzXSsiI7NR3!0!kEZSh9R5Cgf{ zhm@vtaOGM$au6Su(l?wKcH4p;>P*!PC&jn5-UUAj$Hfyl9T z=2!TJGMK`op{Axolk~MPd7ilIjNrq2gnzLyd*UuMz6W{)Th)Ac6}`f6V!(ND)!T3t z9!iM}<@@aL0mOZ$2%$ei7|KM6`iqR!+_X-n|5@b2e>NR2XHFtJzCd?;HympVQ7XbH z^L-0cK>Hltw2%1n9)5hYpnkI((jH-b)Ed+*QV)PSID(uiG9YAb)0fC*m>{2k&t^e> z8u<(t72iE^iCIwuxmZ+I z+~%Q%4D3~qH!%C{3?M1UH++F_5acJBG>E}JF~%5z{D2XBxCr`?Mu=#c@qMp?>@r-1 zhw@h$N?+r9Ae453Eb1?^Y(Cy1)BiQ{AqDvd<|MM?TXo0t;aK}ON<|9tLs0j$-XOIE z{v3%P-(sLULXh7g^`Sv6CiNPqgCocVA_GE@Pv>ixAYTriS&+X#KEnn1B>Iac$O*v! zM2}p{0D^+t%wdx~@|v}JhqszLd17-hB4WtwrOKnz_7>(fTC%9 ziFCQtJuY>#b~97s;p7h#=AN8yRQD`^)?&f37-OodL#Af*c^S;eQ4*d%gUu9kZ+lPK z0o0|4;r0|Y(WIS@)QI^96aGGPV+w>ko9dL?KG>>Pv5Tpnw4}XT!cc&gso; zQ!01FPVae{C-$wm4|CoD-xT#-#rEvSfQ6Mt?P6Osx4XK4{|f!EP-$G(g{|OEs2U*1 z&P8qV?_?BA`Zh)VSS$LJm4@i_fQV(?y99v{-Z{hYpDzTB>YnsN1Q1N&HbR)=Skc#?{y+wa&d^T0tiO2G9MU{OApUBKRO5>ME z^+Wbr(r~j9PlAk#lklUx%eb2?bN`r$@KfRjo^8c5(E;UB0NR*)e?j{6J|ui9UYEVp z1!y~0Q|@P)Qey3LG9|SK?4-x+YY=KMjU72j4IHq;ro@5uDS!^UDxPkbXk>$&;qts- zuR!L5_@y)x_Zz?b>Sy7XU+q7BIU@^#Rl-sdAZGo?6jrRo(4`D58WqRtq?{@0V59(9JaKIDBvwfr?LQu# zIDZUJa-D?XNvxBIXY5!y0tB07r2_>g2F-b6K~E&S^uJdBK)X#-+zV77avH z{lYbD^;Mz(#{jY6`#dE0CcLv7xcE#_fmQZ8FkR>Zn)y_*_y#Q>og1o+qgGPxW(WL2 za1yrR#rvu~$;z7gQqgM;W0`ij5VI)5#bfql`slER3t5x-MGlP27Mvk6%M_|#b5MT! z{kAqt&JDKMW<874v3YE`PmYCUPrB*lEY!gi8m?|8s>LS>vxLkgNl-q`a_QG*{_sB zAFw1P^>viZJ4(g|+dW07G{vh}l|na^8{4y+IcMciynUWi8e6*18M}e5r(qgf{SNCd z#G)5paQh4ZL_AYgPO$Y}37;z|e1pANmM&Obu?CSYLBnYjjYh%mWA+B5bc=25 zQp6T_TfU)ufw!W(-j=l?|ycNQwb zWqY=TRrV&TtmnNbX1H%gwe|Gi9ktVlYD4Yh;r}&pH;Hf($JhtM)e=Utp1DJ6USZsv zcx8U-L=su#h(KE+okS|{f53wN_aI~w^MJ> zIcu@ZZIX221p0)OLtpWCH9X0V&V2yQFQG`dJmab6tixPL2%7(C$Xtkd4q>lzA!djB zoNS;LW2^cO^+Yk)imLT}3Bo9WFwGpwNiM_+>Z0t6{qfRa`X}s_4{$lbCV0YLf!GgH zQE||-Rl%eaMYVDjit1M&{3xsyTz=%RlqZN~NI3tAX7x*uZqYup$KY0eaeqxtiZ&Y@ zx_N+ZAl*-6Z-~DOgObAC5t0`iP7h$kVL??)`9;j?@Hd%N0eke|D}lj3P?qFW&0z3E zPpqh7s%GU|C|`a@2wloAU_YCH#vpcf1ksFL1$G^w00`kC~e5pv^FhMmQ5u ze;4$+FwAQXy^8Oixb${T@EG z6F_Rc`ICSj-|QdoQ;i=qJ#7^^cj!bTiOD+g0*P@TSiHlrUCU@*(tfg4Jt)4J$Bvo0 z>*L`%V2|t@53B2{V_p&Pu!e|u*hRMLo#A*G3sn0pvTw%2Y)HIBwytCWh2vq1On8{^ zKT3#G5Dz=q@O7Z@FvJ>)hs8r+B$%<3aR-qM8npY1hYkL9Cmwb<<6-lfD6IcE9#(;H zLh-QAd+Smp@vwa1z`(;|l%I@;QCy+%2Jo=QE2-eLQy7UOcj94B`1g&6{bM;>eU-fy z1se$uJExbZz_a)LbU3C=?I-D<$DD!NWdG3@e*s z8~2WfecjNBhegK2#<53@gokYg9u^b8!vKe(-;IEW0o)LHmaN2@888fQ9u# zIK;vRiRaJ5!fv-QjDB$+9~B7;y9knC#==}|HQ`v;ac~6vEr^AkAzeknX7&#YTMhw( z&i>Ej{ldcjE?h^{$@^e#l`8P~#SC!YSeOI#r=3a1;aFIIL=uXHC4&>d!XE6oe^}TZ zC}sl-`wia1v9RkzIA$#DBjzp~3wuH&(ZIr9l1?I(cLXeKhA4|jSlFglWoZpUsYxu% z71iw$-EDW4a1aaoKac?|>~VgJf`ye~4rH;w!iw>x4M0R%KmMfRryUkn)s>n`lQtGP z{fStZ6J8DLr=4m;ENlnj@5I7PSlBnH+D z6${&lkGg+>h22U2?Xj>I83v;^VPVgOdCjC(9Sb}CgeVZyovc$&T%7TJ?^xI#<~Yh) z&$5^C4W+VxNdp+N1cRzXku)Ym$HE>vK1xEi!Vwa3r}5o{VztM@t}q*#*HdqF-IATK2O7m%#qYlVM^jqq=*=PPiktFYEftTJ=UMGhPbZKDHtHZKemOVv7RVPJtgs)Ob8=pL36wvR?j8`{QIqK!0Zli}7}J&wTo z$i0?lE#~4-p2=|`<(Vi1FIsulJ)xIp^D`04^Lf@r)HDMPI#}g-*^m1!&#t;`{t9k? zMtQz0%d;gWq&yp6V8qeNv*mfcJdQ%VUiBT@_c&NzRPoh5Tz#V*KqqY%5%Le&jM>md7kpG zy_ILe`+9kneipGjm$Ehv#qzv%=f2DHSRqPHT0giAEzgkoo>`d9Q`GMj&&i5sfPy7o zxq`7l8wTK5@l3_IW31HMpnZwGj*4d}*05SQTTQ@{D_OZLj|(V|o6pUJO7&~*_}0wI zVewXfoOmdnId(+^iz70)n1%}{YzcGhgZWIwVj`|0O>=pk5wW{~PUndr+rokzE`!8_ zL2`i&58Oy*kOg{>rFxJFT%B6Euo$GpR(*OLoK`mMQPgU#PQ-&tk1;+t+VDuMw$N8f z+z?5%@EZ;xNUUz!a!RI-vPD)6Q>Rn*`?rs7#x zE7DOd9)TslNsKkp8sFi;HNL~;8sAaI8sBbMSQ&YZ@9^Ln-{EqN?i%}X}dmg_4Fo|V;%YD!}F(ZV@<(bc*Bk?X|J5)RmJMftQTXdwE z;*g#6ZCd(2q3)8Sb^8aqoa#C_`QI+brfT*X_=ghd=ouD7$d0e$)_~4jHVg5Y*RVN| zaqa1+#Q3tP2WlQV&H~JHv}xaC&A_74*;uSs`iKMUwc1ojkH5D#A8D<2?9(8Q1mPQq zAi16t&8;A?6u2kUc^o>3CH2GO{YPecLRwme(-Yp(OzwLK_t6_!q)3K|NJ5tEBa-kX z`@u+tiAbWZ)(;B80bEQ(c{8g207gSNbSEk+u>J$b1LjulikY6{RIxE>x7yrhU!Hq6 z?b4WnM=~w`^H6Y#8sk4l+N5vI!^Dv1C|iQnmWVdugiT4KG{)ak@mweuOU{N9aYjWv zY`&%9xHBAfHgh9KIvD1Py49)f;RuUirUE?9(Xq4QpW+z?{bD4J*vqmPb9gVMtt+~F zeV-AxSbd;@{CBfy);xDw^hI(9b8_TOFKJ_ zYnf+|mqK1KdDxvXMr1dc%!NS7@!){Z18!15MZV$o4j1<0(&n?<`Fzf3||APL6= z#2pFY7Ls26D>I=l$ngc1Jb2|pot{`bs>3|Om_3RirkB4DJxcL+*~K8!J>U^(9h!tS z+0e;g0FIX3QV*?%YS*a~c(#m+X)lQI`OnBdp`CL8!ugdfnrzhnb{4~K^> z%EfRWXy6a9s?{d8_4A)Z^Ns=G^xPm%&h(m)GO}8bMy|utq}V^-GAgF0c2($1gx$(~#q!J%=;JT=v*(by=qRmZH9& zscr+Ay*JWR+cg^Zh2VIroYg(2% zRZ-Mi5X~)lS!&@}IF40^7R;^^yZ&Yk9WUYi)y-#LE&6L!!EP{1 zvxogoUV>9S*{KeXS1OlbqzIg5L|A2eEo*$OHcEe4Q7m6u1Ls zJ1R4y2|E{l)JyO@k(0HutZ(pr4YoXwH7GwAqURxlv@vV(%;a+IAB=q93_k2L~b-FLYXb^?mvwq5f;&(2p>2Xb9RnJKNnN1 zhyGYgLTa3tArpU>>h>{)OU2W}&I2-C*tHdJt9lWWXNYal=am?xC{Nn6kGHjDsk^s- zl4*0TT}W2t?(SvT3pp;=Uvu2ctJRMxcNR|TJ9FnZ@H;Ja=FS9cu`jc$uVd6~RvI^) zQ@K2L=FUIeW3{NOr@vRZ{6~L$pMT&F%vmTOw%5v*Jx*^S#-ZmA>1=FK#yiz9e>Yn~7h57$5qNS@-w(;Xy(${8gR2THD;x=yEDV}& z{E3w==KFFHlS}m}XU~z2Ga7Ao_Yx_3RR%8)@7nnooI#*4P2q-|gd_#g*gnlfjKQwF&=gQKQfyWAsC(u+dC}RbAsi1N8pS)7=dzB)MxOT3fG4BL^>-C{ zjWJ?T!eZg{Zn7(5I~vq8EKOik6(|B6JS~pV2K!;bhHyw(5p8d#;PtS(P)(>SZb`-p zXXP?)Y?sA0;oawsW6-FC9n*U0yw4>sm%Q!}ckT5*U@$w1xpSm%Lo0g}Lhr;>4>tGU ze0#-{7O2TpZuF^~TtdG1F^T!=YD?_Ge35}d4XHmy?ocw;A_MQSv~U=OJWP89sB9Ix zIj9ZSGOZpf_h3U~&O}II{^80YprxZ<&NbqbWZjorY?Q)L(nv}8<7wBmKS&% z8Ffl^U~pL4Oldc5>T?#1J|J9jQzTYSbu&FQ?Ca4@f(oRP<;0gOYdEeiu7tP zQoMrco&dHSe?o@OES#n{f}a8hY}brdJozwhK5-{fA-H6&22M{^`xZ8*?^uvH z{Tu$%8$v?F`S(0;Tb#C?=JJl`VJhR%4tUhF9i2ud&+YqO%*t-pYDDPh%4kTmV6aLk zJBjTM&E{(~yw#%YIsb}riNhPFR-+GULw}E?dN3HGwLr6iG0CA!*u7wKJXO>$AZ=Od z7H#?ty2T*J9n=#7CQooHw6G!*{3Ak6r^PXG=$%6kun&3>_G5xSDP;Gw=@ffsQ@2`} zd!qkwWqgBXuo~G1cLuqYJYruMjn^-gM>U=%3a3eX1w}2UZ}sFJ*rWE@JId4z5bhU)fKG^YJsY%GCT!o$XM2{qOB?(>-F9^I z>AD6QnW_0az}&ZQH~xoMC{0?WkPIw)=-_swAZ`T; zCZp;wWZM#k=ks9I);-*F3xLlGOY3v zOt$?6AAsN~m)C5{Kegj?e}Nv@$6fPfmFNSB=0k#k673m%<7 z72!L3UWc(OpA(7A^phaZII<-?CMTZ~u|3CzRkq2tMB;eB@7R;)!hYOEjTC!Yi!JVS zbl|qCS_Y26goVknzMmSTcd{q>c#l4I$WHHlg@T}A7$SXDT zV#*#PPa>GQ-`KKQiyiQ%IUb;BEL}9y48;9mGnOIGf@lSRxFaZ7M!NbkUBUEle=bt_ z2ts12e+8$(!ISo=;vF7u8WC&Q|FGJdx^Zp~gDYyPgAPLicqm5l5$y*!<1(T&GkREl zYJ24HxOy6iyuL_`Z^!Be?a~ksaf9@ye`_m93Kq$skpB)@a|LP8^Y7bPBRcach#V9i zal2L$^q>2%DFosHrQVi+woJ2~P(E6V)NgFI1V@t2huIb2k4nM^bOOSI!~7vlX&SCD z5*{F1^$)x#-rOYYmI%g>hZx2(1|Bk`=Eu|S5+Pk1`T-q{Rerc8^uu7pkt^fq;?4=A z+z40AM>nMhc?En5o9Al4`+8`cuwYKqQ4R!@Cj*MHReMFMyZlB`TX6#Ke4qsu+wfKB zFO`f&_YfR1)5KPt{*F@lntM`S3KJ7UULDurm6!wXOc zN$fXdgCBKYga-fgx8V&w>3%pfDnrkr4B)6p0Zc>y{)KX{KKg>$aKpafXoO1bXmW3S-()MxR~YHm5SBp^h}wdr20Xvaap%IJp!&&QgTTDLie=z5z9Ot~KV>uqc)J+nV% z<;`MmU`wp1#RH3Q>hOSa1+AiU5l*y4IG8!G%uo3(R4=&*=Z9HT?m`yfJZWBp^UQ3Z zX5?shV>MFeA{@Yu0gS`Y8l53p<9~LAWpEWnhp1^#}xxN)qg;z7QfNl_tG4f^M&E-Z|d$w_s|s6{JZw+ zZ?LXvU+ZtW!-;AA&7ouOuYQ+U>D6&Og@MMNut>Cx@|ClL~L{C;p6)HU~R?^Cz4ikOJXgW~d&gE)0m7;7P&~he{ddD)rw1!EFk!L7 z>2bga%!2vlQ3ks_$u3WEe2%AXF0_{%PyIYRoF1X?DV}Qv)1=oF&*d-(KUO^HiggdB z*S9Lx52QhOYq3k+;8IKM4I{jL&UUIlx-ju!#mQ7Zx>A3HRp(QzpE=b}8A`K~x?His zQe2Y*vrqNY*Gi}LZAE<@$1&`5TCp5$rz>?ce7yz}5^1PKQk2w2o-+tTtc5<+&s+#< zjnn$M)B1r^eHT7AJFP1?+uQ;lG-MTZmy)^!p{(XP%(P6Mp1D+JmDG=2)^#v!xhZj_ z%c>zjIBQf8uBTvzEA<;tJ6+c8a9<(ZuT^5k+8g+TG;hqYev0V6a9Tel#DUVgrI;~j zPMF3nZy#bJmK>`eHY*Va%P6mFp(V$Arxz{Lh17Y+dSS=99>z~eHGb|~Xu)*x-{=%+ z$x**&K6hlNZ-b2*SM;`p=4{V#cmk}K_)hveq5T0ZTU-)PGdH2n@9?q)_U^kN#y$+s zr1CzdoBO4kv4VII1TEipw`S_TJ`%o2(IF`K0@X`*NpE?59)X?Q=MklH3GC!|%B!7~ zD_|#Y%CRoZu{P$=T3$+1*}7a<&J8&kU)!p4faaN$A2VJ7m^!Mx{N(%SBE$Bl3|T|o zVQ4RplJ@ehcJ^}Xn;b9=d-*StYJn_tpuJ3RUiAM57R*>|{9#pXS#Fjp&zeqsA0sL@zWO(Kp=zBbprT;@jb} zb0^C$!-#H~#>x&eqIq9Ws1f}Xq$glRyQX&<(dvl9#E3q3MyUCdCFZxo z?d|Cw%lBzdFMyLn(w?5VjN?X!+tUlyp;bk)r*B0;1?*PCo-QFXXutkf?P<$Y*}g;U z>FaRPdg%Q5Yvy!Cwx_Q`NVKOvhtp8~Ir{uLb&p@NJ$=yf-_iE;{97a2)0pLKR=7JZ z%f3cf)|WtUb9ts=26b2(xpVOX<2cLSklp|rEAG`sX|UDHXYUvAq*c1omkfnQuD24? zn{pZ%0a%p_5xA}DE22OMXa_eFz_c6#o7FP}5nbkRc@m`!j!hpfd#mEPn?^bv*W~M#1s@Op4n7XU09Ilw6ejn8RLaxWSisWCI!~EgpsG>QUal{~YD*^$3|QIo>|^jPhQz zh(v9Ux5qtB@4$bOsK-<~h+c~@!+jr1qYhKw;$Tgu zdJ9bVPrR3hbSIU02S}Bzyso5 zVdaJ40SS`H4qC?p01})ine1)BY{3H{K`S1yeC?cRj|vj-&ZYTE4U{xF)BeCj^ar6J z!Fy(qV8|^13CPisZl+$@dECV>Vy69-39Rfekf7`8aFC!1(h~p)&b+A;Bsgq9g6_p; zn~v+B!{^&uB|cF1XnTC%v!Z?C1CPSVA&C!+!zJ1W0UvnuT{Ngj_`u~TsCM{(i_8Or z547AUn|263aN&}0eBdc_x+3EPg9;@+un11~7ayQ5@=L}C4qE;@8Xx%mxTyHR8XX^K z5%|CmsBC})#0>jN10<+0K!Roo5^aswPU|*U{ac;ZO~gj( za6wK^>UwbeIryCVK5+x<4^Hd9!CIGNeUJM&XE{}@$X~+f=Q&fC;ERM8UdO~UBO^lb zy|lnrimWL-hv)<1D78xJ2l%oTCO*9bEkN?Hl3Ze?SP=SQJ;k5c(p6BKYk3SNunCnVqT8K}$Jo9Vu z0m~o+4zj|@ee_xOnjGub@VXU+0T;w2a1ujK>JKu<%TPx(enNTa>LrE362hsxRLo|wY2bMP&ZWuAykPW4nFZH-qbSBdS4uKAx zs>h0ir^p7{l4YOe0vnwB$(cpYJeRlcOssWyjzlePik+!=d)*Hws^abY)JSiS=SF&a z{duJKqWMH?=DNK0`7ZAHcH3(Z7JRwwv#`QJxBs^e1+-8H^q_@reP6(v1{`6%pk9_# z-bUf-U&57btfZa;1sEf5{njD2gpwBtiTH#^kbM~-D0l>Ar$-_ll6oE#feRT0sSQSi zG8mA$$Csp2-6K;2I3;hrz-Jh8Unas&-(=OBS$P}DY7p_$6+Dg=tQ}zG!Q&8HV&CV) z5{e99JGSbrI11UfXLlGzqVI3Mk~^IDf{#plf!#sq{(_@lChsuBM{bk&2#zZYJI~Yd z3>6*&A1V7kLA5~eI}jg9!fIaa@7Eoo_((HU+`|GNS^pF(FAN`Pl}vWHL40JlWHw8t zsO^-VU7r@-TP0KM8f~iJ0i_Tf3|gJr6QZrGz+J7D`N{VOK$Gk9&`E@XCVn$$((_t? zCgf=A=&}9kmiOnJ%Bbr^@F^Q{?mXTk!Ptv0#mbuyAqYB)91Us}T5! zW~+LHvA}@E7cA<~@e>yeG;A2S6&oKi9XXyPC-4&&9IedpJV!*t>6zJr2mRILc;*up z$?@DzoioSt=N))BJx@{f%<()Gj|Z_53CVB?!d|hG1gB>{wN)oiy|ccqVC{kgPEt3) zS{FMjQgNMuSf%k{4t7sivG`*Z?0+n=!0w6?nC|eB`UM4O4oqW^Vyy#qXwY=8A;5sl17ATR zu-QV#ZT1kisX^vfiqcu>#DRvX8es-_bV^W3hay^$I+yh`#VQaH1?xjR^XplF>Jn$_ zE=2w=QUWgxEKPtebiFpX_5&qLL?#F4jRRFeQfB3(ZiKIOPV0N{MMJ@v`X+p#j9luP z;4+jP&rBjRIqKG&)TQw7iPQQnJk$ygEHbCHL2R`Wt6y53I7|b1BA&Ap;0at3lo3dU zlDbN-5{N?$xv49}4wlU>t6r*&IY4B%LIbJWgCGEHeE`Q5A~HA@xYb|7!!fNJst9Mb>*rjl%dvJjjR6uwJvMoky1|3A~1<;7DQ~KkYE6T2eK@ zX)5{BD%jq>{|B(5;*=Xsa+A`i}Zc%C9X7Y9~)2-!He z{zRZ28rOsjBtX&%WMh+{2#|ov`<|40VO^5C0s)&=X=J* zFbT*=Z$whI2yeCox4?Wr!J7{G5Xcb(g8(j*w~4!SYVmm>Biu@#jL-ZjQR#slG{MQ=B&oak9v3!&62;3B$oa_2@#_0RcW77Vm88>VU$Qqi%Or zHYF&Ya`Z0Q7H3m7{hY81 zL3$;;@f^lxta|gG0H)%Jm=>~k6YuGQ=!j3%uQoI($bSYenkMmm8;W(Lw)efo)sAR=+dxCQI?eWZK@$SU5r@_hJn;Y8Tq7+52kWAe;U!>F5IK5I<>3)&gL95D*4e5aLIGqh49acbz)?0iXU3=QGMDEL zY>{nqZ{lUg$Jb*%O~1edmp9m|IltyhU?8EFnKT40GoiPmgWkl|4S(?`z`gl06R1i0 zjY0lultv1L01aiWmX%FI`q!aiJWQA3u2P8q^W)U?d>O~7;ixLT2M;|^O_K+z1-{}) zH6Cf$Jg2Q~G&&xp2A11@e7IZiU=MkC>2;y0Xw_2@a-f^-$Y9?XY5%*a>(u-Ip&|W$ zE>8SXydyCa@*5nRKD|vjVwD_*SCsY8MrC0XKr`ub&C*D!oAGp*d+J>D-6M8(+x8k5|-1UUf8g3DZ=g(CP0uw{KUlD_qdi1+LlASoqvH~%1 zT$$m-1aB#-$E9x5p3g)imCFibPn`#F_0{~iL)@*A2Zq{tribq1&bVXq+eE5%{to&Z3-zM#TJn@kl&)29!Y7sZ+ z?xR&F-CPhNz$gO6Gaa(fv>UYW^2i>&CGkGYoI0(EOUKI6w zo{?esRE#xO@IFQ9``-kERYcBZOWv7kceq zJ>2lz&k8(wU?)c%%FH}IAu~8U6o=<$RxU24wqmP39;+Cf>b=b%@K{5hPz>(X@Cy05 zm&z=koMdx0-P-~-ed2Wo9{j^A4QKx0gi#w(N%tvW1@E+g9GNe@gY5F8L0PZgUMgqq zjt(4cet~|Q;rTRll4%|1nlAM-t}jHV5dSACxp_Qbodbi#ujuz5?w>bau@{Im{0c=x zDzDb#X^kn2RKHXYhvn6>Fw}eb2QEVr$Z$aT^Yc zu0!riW7V^i4ybzFrlld5?fSb`G{e30cR~9Zou23y)b~ZFH>^Ox(9w*P8^<8C?2TiP z>%@DzV~}gfPL&sC@cPUOLG!kdeIAC~LZ>ig*tempdSSLC*^%*ZqpId|S*y^;Mg^gu9xO}UC>ttf@=GmMs>nC*O#PK7pjPGq# z8{s`W9l2|RzE1GhcOQP}U$LFFLil?HP)HY7#!mNi7j!A>o*Ac@KWDvyBP_8RY>!P{ z0d2&W0`E17xNG#$F@8!d-Y|8 z@^ZYfi2w;%27m)|aSjgFl-bl+xN+g$n$;UJE5EQhk63LK1mL3vAN3d3NwM}r2Hs{a zqhtrv_3Op4)f8=1@kw+FNn(w|kvP4YpRBffy6B&3!al{=?*0awv*?RMxY<{%?d~`6 zNp?=9=fe?{qLBHcH*k=9=ziRc3(Vp)iW!4FK0};G3ndNItGYNn(O#SYA$W=b!+#{j z2}gQkN8ClaJne!!y)TyQg~(G0<*DyKkaWt^nIpV?&!oVlH<;u}?A9~MQv>A5FOGZ_ z^3-apdJ?4r6}fV^Ax>U|C*)Dc6U6XA9DUOTsEB(S#0keA%{VnEA&FTJLYg*#Yg4f? zt<9GT-#lL!(DLff>yh)slosd9_oYw&;kRfchp9fjxaz0s(|;WJv-RnO+MlIQ8{U2?`t-n{K3&(p zoj(1d(5H_?Uju!*0F|+r9Bt5mPJP;eLeUnc$ssF5pN{`SxITRk$}6Bxr>6gp>eDTd zpLY864m88P=+nmaO7cz-F)MK}?-ZHz8;q(QzlP_@$00^y&y%g{V(3Qzj%`)1;)T;# z*FeU&mLOLgzuyg9q3985tJ(=6bYe@0%i}0^VSRz(DUDZRUeIBdWS94fHg$F77colZ zmp`~-p0HQIJ87BD{rX}8uhhS)YlhC_yMl3 z-TPf%x=xsiBBW0ETtDEAN(6mS48aFoA2g?;qxHdi&NlSH+{G0SV6xzidob!&fGbJs zy&!K5kQb7?5hP*Z6VAdM^LmtRV7_p)Qn|6f33Z(}Ho*j9HNEk|yO9(xZ%ctoT+F#gTgrxl z#URO?0I`L^^IQ_qanpo=?F%Fb#`3wgOtj$WX#?bStZ3y;T0giAogb3=`98|C77N-@ zo-H#%%5!xpBaT*{&B=Oswx&cZ&#A19L$N%&-@5PeTmhM6*?*3ynx9dgFU#_*ogPx2 zGgJ0fo|ZJdJRO50mgi|q(xF(M3#RP5Jf{g!YSQk5+n-UMt7Lf=ObaQ`^fUKXo(T@U zJPVYF<@qja<4`QmnimuJm25zDiZ zwQ(qx=LwVcU7jDDK{rj>rjqdT48hN5QJ0N$pV6>*D!HNi4ZrM8-eI`k@boz9m<<50 z!}l9brMy%V5XETGMN%!)9|kR00k20_+{}#}>3}aP>Q*eB{l1Y3d$<^*&fUXZ3fL&n(Y~ zWZO$aQ@H%6K%Z_$NVK!l$eTePE*M3G;zkiOd}=-?P6M6-M)wi_sypY zkXa8c$)a5_gM;^1?y^7 z??aDr#i`3K7NK?zctl!y=5-<@KXuBgpGlcZJLAUm&EqybeF^ z7a^~$C^WhHldFvm`hD=v713Vm@cq{tsvsVb&a3+~{%qbDbPN1ODwAfD49 z_k+iD=>6a^I3)dpk>1$l&?0b~TCY`DEA~0=2j7(b$rj9&uE~d;8g@T8_&t1Ea6dR+ zdTDrR-h%tVNoRcSuItMh(v#Z%2iw@QZCUlrwQ zGl1ll`Llu`+owIy%0of6cg!H$oTC7;k)z$%liEY)spY?jQ_JJJv$Deu;L3G}gKVoH zJpqvIR`}@z*$xwsZOoDKJOS$G;pdg-N_=h2U)$qr=Nz$5d~HK~2)=gc+z_n316h>5EOiY>ZQ9J3vYRSK7gIv=Yx*8 zAAH4n^rT_-@{JmKhoQY3^?vZ?T(iA=x1?Gi3ms@LuR(O$%a%jbUfux}>9DYuzaJTH zFXQSj%P7IDV~uv+4-S2%Ef-Uk`(Z?j`@t;_23D&4?#DghM)W$f5k0(ZPupU0w3GJ0 zWv3DS%UG$>u#1%)W<(!JeYU7 zp$fy}{YQo!4wYdX_7t39?irYs9^ON^Hy;jlgyCMC<`F59VIq=Ya$&$oVcH^+uA-?# zie#9GWSER(q?j>BB+@;a)?@NafdhMT1Ea~~eB*(QhR5pIe%KRY{&0V81bezv+SAJ4 z+S}8g?cB4k^(v3TiD|vcp<_>v!v?Q|FrWJ9P0+I<+0&P!pxW8fE;0|0J>9ZRw(pSn z)C;c)x2K;nrz^5OJ!tzLVNWlD)BUxlseAmA?dgM-|Bkk&fB!v%!dMSyvZryDb@P7T z4-OkEu3)G%*z{*_J`cb9!DDh_I`0SnuiX#+IqnDFgi}j!S(&^ad>LF9;HpeM|LpAx|SzJP$kYnE!7xNO0}& z*0*0B1s_O9LAApN`jUBo@PUS}Wz!D92TmLojt{8jbVbGoj{QdB1CPULP?jQ{M;=^n zO=l$S^@BQG7 z+-ccB1fmX?MhS}dgX6w2)BWI$;NV^{)BWI$SnB|Y1Rw@Z{cXg(Q>)?RHQW#G!^x$a zaoyAxxF38cPqgLz;5$wCgTq(I{oq@8KX@z9gBHH^{WspQYJco{L2Z{*-bUeSi*SW& zwgt5r6kv?J^;>v9`1gcFue{2W$g7(UW0nW6Z| zCz5HxM}VY!Et##X`@{+hP5#dN!S`ong}5KQ;-SFGii;z4I8KB98kECT40-8LBi)%txR-6l&2!JNobN@(OMc${;KWSMKUHET z`m?wGJV}3^&gWrpKX?EuN$9*Eyz_qWtO)mm<90vO{osOHX@E0=5>U-NKsAD@*C7~5 z)hQla0w6cLT6lhx@@n_xS|JEiA=t zJ6mx-_!~Hyek+j;-VgqUydNCR5#R+u8mtK!NPq-b%h*kVB0vHv?|V`bOCgpc$N(fG zof39IFuF&gKA@z>zJzNC<$TY$7$yN3>5VbtGrZw`aDn-Nf;S!VA&?^o1_4|~zaJc* z2QotZ*Y5{6arFDa83f%B&5`$m8=Q?sI1Pp`Bv*!mRWCARaP<4ZO}_N|!SRW)C!#3# z<9_gc-am8QivrfcHA)yw53~CmFa6a|y?^GURT63W8TUCVP+xz>eU65=U&{S6vmPd7 z1k-+nmi8u;`_TIwj}>5mexGB@TL1&d(O#m<&ifoK+QqL&FqCIer4HGG&=n;YxPRs; z`1zl_&#?ef(eD14w5RrZ|BPw9?N7bWajR`g*9o?SkqP*@Dj7dFq)o7;K4z~4bcGjr zt0P|K*ee7F7Z~gJIz9~=$4uhx^4Dxte+1zfR=`^$18dM=C2C;fROnBWq|wNAaXhIq z9#|v(NyvF0_c|hh@?uA%P2TK?7kRZKUgX`56U60?_#IE!*61G?Y^mobX_9qV6%H5Rzv@fNso;j$B4$cGDRt{2o3_FAMl;{A>@ zaCc1}F63JsxR4K1Ip-P|JUVU9zH7RW51T)i#zSlP453*@cn6y9cAQ|F5_jowEcuk4 z!{xI#X&-cROVCD?H$A?=n;ykgDZJ@%K3w3YN8D+h}$ab5#;%ZtT1Diz3VGGPf;3BsekvJzX!{*mx(- z6SaPbZ0=TKaEAu2Yjjz6nFIUC9HG;SWvIAikP+K=`Ebb$XkOcxy;7L%DJbp8*ES9l zH`_$Kw()HJQw?$s|KdIz!rizJCqe%-H|!Ix#Q8pOZR7mq5zP8tIQ%~J*nzu|#GsUn zLehCb4x|KaU_Y*Hlv46qKuWMsIdE-bWGU%uNJ-$v#%%0fhL|MvmSU1DpS?-@PDB=Q ze~!5RQ{LQ2`N+xGVypTqN`W^w?zFl`xxBH*MwbVO!@E#=@9fRn$tSOkC=aN)clC=N z%b&~K)@XHmMtCo}fQm6hVF&WERN>8?yx!FMp*froQDCcpmjY;Z_GVu;1xm$*@N2c# zmq~2o3fwl1`{TpT>*IZkLd7`f_v@Hb(a{jW87L!cx!``TeciWs%hQLizB}repQ`Vk zv-D@{ySW%>e}=wmc>AU3yAK8RT}NX(efKn>@9Ot0wl0CbOOCdJE<5#IiR+k=~i&e zOdVq{g_euc&GE}K(#6%~PvTQR#r-`9lZx9wZ%}bp@|hC1v!^82fTW5`&6j$JSXCi6 zaF8u{bb<{Vd>3b_BUy5D243{t8qjcC=o<>IoSz?yyHSGbEtFiT-sVER?RO5bVtxFr;nbbwe1aX+BV z`+H-&Htz3>a^E7{y4C>I$^-V3DBeI5^pZ5*K<^09cuVNPLs#euiezhy}YM1w7|It=#_IIiuWM1i1zt0?#ss3=K z;yq_0*rQ#kKjKQoc+N z@z-7d|Lna9U>4Qc_CML!gb)xhxZncF4eSdv~@wHO08C0YSCK7rAn*j`Twq&bH|e*3iR#UuiyV;$l<=ub>HXA zHTRr3vt*tp!;T+z!mtyE6+b=ohx<{j?`a0Qtol@>>f4rW8Q7MAZ5h~>fo&PsmVtj? z22#Smo)`q#dykm#ZMW8i>bdO27l#d5l=I%=xg!s__-_x_ee>K3J`}BG-O$LccD8=# z&;>`bJUjHuLu4Nw+I`(AyF7o&J; z^$h1X8|R(djiMl!enDYKo#O0VXRme^C;N83#?t|J-?bOzuKfoup0YpO{?xI-{x81p z0%h#}7c6WgykDC)ZrKliI40KLQ%Fb@2)iZ`9 zf+-cnCr_^~o?109KPVb^Nb#_-V@4fz=*Yt+1jkg)N>rDfGOMhnwyZQ!Qc*r_=8UqL zwTa%ro{8G&CAEq2n#9b?+C)jBs`Avb>O|$#MD3}S-GbU+Zg5OcR8n11QBhWr7*|$P zR#H7>dg73>>X~H~HHq0})ivdnGZVpqefs3)?h^_cUt2bFN_j=1Y);vfS+(-=lQ$wU zt-8E4II^Z>8shicJyBj;R$U@@%>jx1N~)_%<|azZ5vBePNF2A{c6_Aw0u_00Vf0_$BjFD+z_wB#Ee-rwTa1Pi39deF|>O=NF2F7U$}x4n@~D1O4+DtexpU@wL#4!!V7PcSabN6d{g z+$p1qj!cw>BZtkIa&k@LprV2KiJDneRh8AX{esCQHRV%^Yig^@XHFY3r1;R{DeRZp z>RD526EkO3RD@$=Hp8ZrRfVhGJFj%_nz>Ud`c0V>3! z)pLxNJ^&O>Mt6K~Y6n31QR9W@@a_dU_dq=+wqmvueW5 zV5d};HpW#}w+TvSa+s8sGN`mJwVOgud)J&=Ud!+=pBYl6)$Hl&enG!|oQ&cyCDW^_ z!ab53jG0+lHYZUzt5$oZx@6|GvRI}|s;bInmPWg1@~o*OEE*0Rj`PZ@EBhrTR3@fW zmq~kx?Z1*KQ_5;;Dy#b@!aeIZvvEHyY{CefUNST4C>T~-TQ;LA+?txQT84i`ZFyA% z-A)OQ!30xCw3(GNW@zK5Raefc>X$gsuSN%rh6d@?CXs&bZ%^DtDOo3yP1OFup)THMcmgbj;L|VUqHGpz}Ps{||J3kIw&r&iCj6KhOgl zJ*8x(CN!74n%dGKLuQvvsYPLUIJ?};D4A2Nms#@A> z=Y|t!c{M{zE+)wGnM_?87R)dN%`Pu3D{X9ERX%fOS!vXZOM#O@H2s$*7(&yVbQOxv zFP~XntE4DP{TX_Of5=#A0S3VJD$tS}6_(Yf)M?`CLm2hp~!}6w7>gvjBFDAD?hrJF^^Kjl;wP<*F{u%u8OS1<>ch#=H%t{&&kgjkTWo6P)Z8xXJGEY zyn+1(<_{b&aNxi}0}BQY9+WdEcTnD-{)6%d4Hz_V(4avDg9aDm6yz4<74$F2FBniT zuwYO@LBZg`NF2=S2h;Unnhk~uH_rQS-J&R8tKIglb9RHXS<9k4B%B>zn>(O*d=-;8 zyNJQaXdYA4xOWGZmerP&SL6;H<|dLcGpZ^^GqgvJJ#27s@wAz+>v=BM+S5|(C?O$M%kFnx5+I&bXINIoZ>ABk2bpQqG;Tl>$cm*&*84VGRAMb zD9X<&$2V?jUXBgq%E>2j^xIf%49LxkPKxG;)^F?Ua&hzHc%xfSf%Dt0(w}tIV&gOG zn!>Pd<1?)ApS82N`X7l~wK(dp@cO8qoEsxsXtpk@H*SvY+8pi9=o)RzvBwSw=UnEK z+2alypHn=5%fMM>BWHw_fzjz>%!J~x<#{6y8=qU8J1D$7WY}=cS2?AGDRaz}kt1i1 z965e?@yNptIkF;bIEYhAX+>G}$dScUDn^bUQ9Sa<3iY1P70!$?>TK-f+>tEB<41*k zGk`YHng4^%;S46b5wrcj#BIOrkM%CUD;-ZNgFo@wlP$spR8lCdC{-Swp@8MNoE&n&@v~5|9Uc+lWyw1bx!CL8$B%2_*Mce@jOKArhEYoYg^^vIUb(x;cdrPG#9P(a#HW%ZO2!XpPw2( zHr|$ad0Xb;Iu9@R@CpzARQ|31)n5Es53lpE<_(MA;NguP-qPc1#k@jVv7BBQnxZoz zTX$w;H=N~ci(#R^uvKK2w2th$Hj&L~8`&mjdb^nSPWQRRo1X6NlHteC`onZ;=bWEg z?49#NJ$9bnaAmaqkmSG7S2z2I#g6kH${-|3TZI@kt zMEQ0K|AYCSQi*(I*&q1r6jPg6*>{DBw@BaTq`Ob3N)imEu;eRmStDO$Z zckgSWeAl_>%1d5~@>^&2!Z9DI$PvyowF;P9bcQ*-+wT` z^M>N`nN>V!E*rDS9G@HAaTb^J)M4cKk}>1^7jN#tgU!Fwoi4l2H@!vIM|zk1Ez+Cy zX=LZ;Z=Ky#tDdkPS~lbG$tR8B=G4x2Ic|MC5E-oPo>6jgS#iyDo==w+*Op8(S)162 zH$*xUPG63*h0Yc^Tj%TwXX~Af>&qMPgT7&Qvp%}AqO`1f%un3PH+LsKxS5{pcD!-B zcXIk|f2?+S8149D;{tPYf9Q5XZaxp5i>H>)EU73iEvYRTQ^aG1!^YPRE}L00xuUGN zeCn8qHP!ujRGXVqUc-Z@@Hs0_CPt1Nz|%+`SBT^LgHzm4n?XS2SG>YK=JdL5!Co$*!zWjIQAt!t+v}K8~ z@s#6q{>ypDiAIEthkDna6iVb~WbB zR~Q>XdhGYVbsw9Bkk-Qx1=em`E=$JNdD-ST|rjrs4%_wt(~-A(iD6#fVEJ>BKk z@?C#xq^r)oZ?I&DtDD_-%k$m;zbN0?Zo8W1+bR4H=6kKvVfoJCTb8ucyZ1L%9PH|5 z583j3=l>Vwdm-Ppt)*$cox=ZMzDK+KhGO`y?v6;;2KPS9y5n5k?D1Qk?*acs`QGTZ zvuVDa!vA2tSGxRKzSk{}@|iW=<+&oV!HmexcXl)9%kXL@HhceDub-wnosIeXVU4lu zZQ1qHM%SO^Vf~$MyK9_1HL_XGE^+qXnTLPn`l-t4{Fn0(yM7Aping<+s=S(4%7PK$ z3q)hfCyy&T<4<*xx7mg=$6lWW6_$nBj+o{+g0|_R7eQ`R(12AM@KwBtPa4Utr;liP+Lq-%t4MWsM(eZ|`pW znBQLIi1~}${xUkn0kc)kzu|5-e(#Cwl2|)yZ+69I{Hr(P->?~fmeXVFi`$&d_?KT0 zZAZMF)gRuBzi2c4S8c|>WHbI1oAIySj9*^^^izvIHr?Ek8bojOj<)=t@8ZnH8ZEgm z@^2g*y?;^Y#z)}zELS%^CHzeYoj8B2yI}f7O_{XR-COFnCz(Nu;DHvAuLExvOQmdv zvvR54#@X$h?d5EO-}7%rbPDyud4cZ0vHf^z5YTU0_^xs)e0$yiP(SS{jmM?Ec;@-1 z_JyeLTxQV-)QHv*TJmM zQpY!WHO$+l2YtW2Ddzcmlki4O-dtq;Kkxg!xybyB*-?${y}8J?=`0d2f8$$b-Zow3 zeD=~=7W+*5Xg7-R6b~!+#w*W;l~cpYkzwVf*H>I*xXsjA9#A*B6wJ3fhHuPwl%G;E8DC;QiQs%Le(Z#F6HM>atn$>F|MeS17e zVjrb36Ga*n)5^t&+l6{ttZC{>tmb5K!W!?Ys~z8K5cVUk8^=N>R{g6#`K-k2>f0~s zs~eowFJAS2Shch57~<-R>Bg2!#{Mho^4BZejnS&#`~N%o+O+STgHu!fclzFRUAqMb zK>lxRM|@rJdN=mmKJ2LG)~&Htg0GMqYrjqgbGU?@#>^DgPP~m5*?2v^PW5#GZ8S#I zzsAG^|6*V9zBMNH;K0%L#C62$@wS_*cMEn7p8H?wJHGC2oZT)a+5a2c5norlt}$^Y zD{gMx`?BwBUeZ|TLrur^{uGv>R$m-e=O1A82g7>MSZ~F#rO%w1zi(K-Yk#XBL0z9k zv-(Ta13s5#^)I9LjrDA81TxG&o4P)`X7y{r`q;+$Q`GgDHuHZF#qVeJ){IAe*3Ih0 z)LB2BpMs9T@m!f2aOMN4EG@N}_)A~CN&OWzkTpgT!hSRuRmGp2~ z(b(T`IQwj;#Z7CBt80vV7S2BNX>lJla(6byWwPV#v!E8&A1-Qpbz|H_IQxvK#oebt z6D}Ku4kTBq>UvR&&M||SQ*_*CIjs3Y*CFDCbvVjcVlldAF|2DBVRmJZ z?cpvS-pRxIyy!-vb&Yips=8sjQ=~!XZ=J7I5|sVf67{<~MKQwK4r|u~EWLg^VQbeD zteAMaUU2bt2{>!F2Us!jcD>=W?f&`_)UBPa4;2${w-=o9}NxqT+OTxzLaacEUd5*&b^IcqjuwtdtI39Hc z;f<2#(|#UI(HN9Z+oo%peJSF#ZJGo3 zbDXv(o$@o_) z;^O)af|F1B&UKncfu&nM%gbo6H0v0$JdXj3_w#%(b!pb|Wo-`ui&wm99t)Oc`AqYn zVDYB;FtE6|=ELFSljgIX<|43k%V+sJ0vy*o4oU~zHH6XE3Z^Lz|->6XuObu3t8Tyuuy`8cq6%|8}*JXqs-505(mEMD=Z`9yFQ zMLyG93>I&iCxOM8X3J>_oP5%G0C`r7IE(w9BesBIbNv*2S|(F$?k;t>9k$HFQ$0M* zVI6DJJ-*z-Cwcf}hoz&!!!tZQ)5Db>uJZ6H9 zI%ZQ8BdqPPcBg{1eSSM(Yc~h1n0ULnaPf8tIBPc#teAMa)8LdVe|-t+*6wt$V&d)2 zfKy)mcD~Lt@hK+W?yP3oCE#p*^TCR-b{c~^H=Iq$peQa4dyeB2m!R0W;asqMDIT}L zahS&A;b&mQN~awo=Yhp*jM%YsK3Kf+U~vmWzv@457l6fUOj+E8VDak1j<1Wr8ng1* z@wEsnUfb3Zdoe|vwk@4v=Y~tb@-3#=oV6tMH`RP8oOo%rxXZwCeV4?W{u*HJ8gH-qDvZ-Eo<=lNFZam}~EiC4U7 zz8x&h@|os4z~W8wayR_w#%&b!pc8W^L~S zi&wm9z8@^j@|orpVDYB;0kF8Z=AXmK_aMdc{18~W<+D6L432Al1Wvr4=U-5dYhDQ_ zUh$^+QLr@2XPO@ai#N@WgT=))KLID7G~0OoC0M%Uvs^t1mgZ`T<@qVFct6ke)TOzG zVr_o~uBRy8H2)eb&GMP%r@`V)^D|&^am~-d$>-^a9r~$IPrd-e?vX4 z`L}T56>pk<2bN~}O!Et1@uvAju(-J9m*C{{^ZYV(>6XuOwHkabWe&yi{0dn7Y>LIb z8v1$G>~XJw#Vg)4uK|}*-+(qmbDbS z)_K$6cGy39_$?2=?QkpXpFI8@53lp^yADgodmet@!ykC~&mR8J!+-JcdJli(;g3E1 zR}X*U;lF`(ZS*PDu5msCYa4f^?B+@WtDH;s28v>YwcXb4bFlK^w-dH@Uw{=8Z}%l! zyj=p$+Ifcf_D2hwNe&;yFB`9`n^gUR<6pw3g9H#O3NrDwCopwyAk@)v0cI>67BZ~51 zajDeRr?eWE1}9!)(71H!;?;*8j~QT%arx|cYylRpZEK0IC0LxcEuCW5My=uG)3ZXG z)YJ2>%*H+`94>66d&wFiqgeOX{}aeW=&=j!xVYxs;pFr4+=IGw z%V)Xj36^Hfd6ws1aN_+u??GLfH6L2r-qgh_-ZbwCmS*`(^Il-_rnwJTTwL?saPsN7 zw&l4WSi0r2JnsXJYtDfa@8>y}dR%iJoOs2X=Kf%5md`ZjgT&`&lV%&w zgTT@)pXI6mEX_KHSe^&NiTCq7gt|2A{9-;}kh*lsXL%k5j%ywcC*IHVfz;!gN5F|!ylEZ@mS*`(^Fd(org;=tTwL>LIQjfM zkD)Hz@>#A921~Qf$(H9s;KciR9!p)Cb-uQ?hf)`>c+-3sSeoTC&4+`1o+p6gnvaAN@8|g_>T%6S!--eCX`Tp{X8BC>F<|kg`B<>HxaQ;F zpkLz|t(AX`T!gZ3XW@@1}EOn^K|NQ&E;_76>pkP0!y=eruk&Bc+*?~78ln% z15Q3a&oik@w|thXO0cd$bWLY@u7VS<>qCn>g?a;ZUBeky4JTglrnv^J>uvc=b1hiB zX`TfZXPRy7&W4juI(4nD7;zRCzt*3FPs^!TyVjrUu&(vzdH6IBpYFw<;qhmB_$&|4 zcUU^k_V76#KG(wwJp3~cpXcH8J-pDv7kKzW4`1ZrMIOG`!;3w9iHDbfb#HJf)~@j{ z11l%GUf#`>1XlY<+j%)Y#R&WN23No-e||e*Yj-6+#l+h!ZKhqqYj+iX#l+h!gVVn7 z*XOsp8lPg~?XH2-e)HS;ItjR-e*p-0fiT8nedT0T!=5 z?D$;{)-fTU9lv*i#oO_F7g(IOEuCWb26w~B_aL0jiT6;CYrYpwyfj%|#1jjW$1t;Fmb3OIA z=3l{ySG;NdHCUSEGtEzf#hd15z~bVXpM{gp&+~KCrCUDB)$?F!{u#ycyb3Jd&+~7n zOS9%rYx`TUc*UFM-+`rBKGXaHSiEU|5iBmQ`DHlyUZPl@SA(TnKFjkf;JD^j;l%rS zevNuu^BOquiZ{)_2TQYjrulWSc+>m_SX^B5S~&Tn*~ar9z|t+B%ijTn%{+!&(HIF)TLWK%k%r-xaJSw z#QS;vGxfOU58=cs-ZcLOEY0$n=JjClruieVxVYwz;pFr4{8#GIEuZD;6R6XuOl>|%k zMHI`kIuq~bIR#Fd7g4NjDx7%5o8~mIG|Ok2)4}3Ra|T!(KS4Uh&Uq~$z3*p0}qi&5J44wiBFq#hd0Gz|t(AY3>XbZ<==mi{qz><}MKO>77Cw z&pUyoTRzM4&fvJ_UEsv~dES+JTys}A@rpOiyMd)yKGU24i#N^Pz~cC6qPaVSe9~;= zd3UgM%V)Xj0hZ=VD3<4*aN_+u_o6P%ODNWM4><9PH_g4l(k!28-V-d|H17o#$4?W@ zeIVrX^W2xZbjxRX-Wwd(+z(E?pXYt3$2I4`iC4U7&IL=ee5N@MEZ#Khk4TH-r-|l# z2>JXx51=mH@>#A1g6mkvl@!bKAUN@t!&zJb^_!?)<#B`I#4Fx34*}05Mn2QLFIc>3 z-VZE}pHQ=n-TfitlTO|1Dn=YXVO;!PSD)9>qIc5mUboO;-RlnX@Nf?w=*5rl_>mqy z$it%?#ss51JjTNZd-xC!kM;1O9zM*&hkLll!$){{oQKDIc!GzI^zcz0KH9?*J$#IZ zkM;0zVBH%ZkF|T_6Tr%$fA5$eAKItT!HN8*7-9e3xEMnD_S^aQoRjb=hMy+wN+9Cx z62#d0CgWF3yxkNC?K^*ce!Eh9is7fp`pO`*pZ#{e&Z+nm!%vfT(;(vQ{Pj)8uNZ5m zV?y`Fj5dZ~9ILOP_qE?+mbb(|0CV96wF;odqGE^x3&;K3KZtv%H)Q zmS)WXmgjT8;-%T*&IL=e<_F^zfW<4`H2(}N&GMP%^T6Uw^Z8(L{4~+L5JEm_wt4je zuyo63`MVGt*L)G2_);%!5jd{-VmR@NH_eN|(k!28z631ZG%o>*6XuObvam?$5JfMSAfM!v&CHrmgYk}ZYfy2;!X2aU}=`mG%o{-H_car#qrZb^R*E2 zT|==vUk8?M`7F=ZgX5ZSfDjAD7d11!FjVsXpC(tNnb-3b=2c+-3rSeoTC&3A*vo927K z;`nKz`CbV5{5;=BUApD7{M`?ZYhD2-UYafL0dQRNgK**%Z<>D&mS*`(^Fv_qrukv8 zIDVRFegr~3KhM9QF5U83{#JsexrkzUeiSTTnl0`zurwdxagT$=E8a9e0hVU@O!F_n z;!X3DU~&93(OeH9-%}JD&%Xjow|thrUxVYCpN11J%@+3zIIj6wIPr=%&Ch{HP~dtmXV`F*fBewt|hGlYB}P;5MZ2$pX7EPsCi$2G5q6EDpc_YpX* z`C~ZoiZ{)F1&^S}XPQ3&i#N@G1B>IQiRMosGC&F$gDE8a9`fu&hK)7$|p-ZXavi;HXC9!@@Kwmf$NOSgQM zza7BRd@RNC+!;>1G+W$`)TQ}2kIRM=uXxkk1uV_-ndY6q;!X3;U~z;s$@4C7^7(n* zmAZ7xXZh<2j%(fxPP{Z*T!MOBb2m8giZ{*O!O|?BY2Fs{WxcIXkeV>gMeFuO&>&bOk&wBJ-9pZ)cogIet-5q+CF#v1NG6sSb zf#iq_V$e6066*lX9PnXM_A*2Erb)V_$t<+cUp&m#oN9g?)=&o8zTpT#VaosHv+7_ zq}{lYVDXB#F>(-C`$;|9l=%3|PFj+4kSDV6~Snou}bbOuXIc&9w8^ zcLsjNSUWoo&V=)igR`iMQ=Hn{aWEfFKL0p4+i`?7j^%UU6f2!JzRv}V*FLvnV*yya z@?dd41FKJIHSRpHc#RR`&IgNEA39z%{uWX+2IbSXU4XriB3@&o1bdO=EC+feHJ@ipH>UA&*?o2kb&-vTFI@uvA!ur$kOnr{P(H_f+$#l z0VkiI=jGI;TRzLxox&8&8J6d}z~VLkSlr#lGu;;^K4tgZQ*OfVH{)=MHPGf5^iRd-xGA{uds<(!-B>_%Vm2 z<8co^;o)C;_(=~x<>7h{|H{L^2J77LG}g`y&w!=bpF0z*UpbVHXYnb?=KL6bCn&Sv-9Q$kF6f2!}Jp3LkUSq_L zrPsmYl?RJ^1FSx!)ws1_@fuUc{Q)dqec18!CRk%uK0Cht2o`V0*IQt5mIFIC{0UCJ zw<$Jfy#tPGUI!;$nl0{Ka9rPeaN-qj`rZdipM0k81F(40_h+!UxV{hJ^iZ{)lfTdYJ)BHEEc+>nTSX^B5XK?aK zv&~~0z|t+B(6rH_d+sOS62Y`D?Iv)BFusTwL=;IQjfM z|AV@8%V)XzCs>*_hgqJ#1&jCd{2g^^*8FB|zXyv~ylHL#OS62YISCeTnuBznwNa02 zPKB3G-#cb`P6JD~e3s{Qa9nc+oOnOaEvUyew}ca~c+=bpEY0$n=GI{GrnwDRTwL>Z zaPmpBjpw#t>6XuO)ebDpIwx43GvUPhd2UZ#nsvUgwpnoE6>pk5fTdYJ)7%j(-ZXCy z78lpt2~Ivg&pS|;Zuu48staF}m-QmP5-ZbwH{y8!7ndTl~ z@us;aSe$9LvD*tyKIzo8j$*`FT>M%`-yN()-vMdYI(s^-YaM-OtUcq-m9-dszpP<>&#bU?) z$!Gdb1dBI)#b9x9eUsqilRi6_lz^pMKFiBwur%u!wLDJ&i}&+fN?n?D{94;Guz1Cr z=BZ$5md`X#1B*A!)4}56n#T(Ef4JWoDKT=VI0@|{MpJf8uUZuu;0=XpN$xaPCr#4Fx3 zp97X=`AqYxwz8EYnu6Z$>e14uUp)TF>S)P}G7m&1uy zylK7yEY0$n<}1PCP4iN)xVYx4;Na zydTBd-VYY9c+-!MDV&d)o0;hf9ug`C{9-m_3?LLCje)HS;IzPszn0ULtHq*{u z-zWGLW9>A?b#L%DIRD<@Q|jUrr}lPl@EM$Z{=LBl#}O9T@%A~KVx`l#FTmn82JKk= z5-eVMu(+?l>Qh>c`#V^?#;kE)gT<>4JAS_b>zI(wj^B-7@pkuZ1$uXxj!1WTWMrca3%Z~9WSBPelwsc`a1pPhTss7tqe zmX~y}H0zkQJZHd(_w(F>x-{!}x3(?e#4Fx3w*pJEe5ScISiEU&0~Qz8yd9i;(roi& zTk6s+pXIL|IIcMpPQ0Jz_SEB=v*5%l-ZXarOS62Yxg%J-Y2F?zF0Q!~oP2(scc3oa z@>#ArgQa;5#qzu(oOnOa+0><3^QX1#0w-Sarg!mb2FEo|gcI-Q`55YP&BwxtSG;LH4lK>`ndalc;!X1jU~zHH zC&J0+=ed}=bjxSCngo_+U1M6FOW?%&d7eyNnsxnYZKuGASG;L11xvGhrnw9(-ZW1I zi;HWX4kzCD9X`Ts|X8BBWC0M*^t^$jT zYd!@|K54e`Tn(0P`7Bp8U}@GhyXCnSPQ0JzS=6Oj*YnnPHk^3Ho90u&(k!28o&y$d zn&*PW#Wl}^lh4odY1E}#KFjmz;JD^9;KciRK9hP}^I34>6>pm7gQZzM(|k5qylFlM zEH1A3TsZmsJTIUw-SSzkeg@Vx^#>Ho^LcRMb-!bA=Tq19v+hZZTL>p!@uv9#@G4^D zGtC!*#hd1fz~W4^jon3X@=2%ebrmDd;^OzZi}7i>7;E>smpH6@-6bBr)Wer~@t1r2 z6&}9Q!%H2Oj;lPp%)?iE_!*4D>e7%Qn@bHZuuJiCs9=_Sbw|Mwg58vkD+dX`T zhnIW!P7mJ&*1hrFSi8r)2do_W_l^njp?#{n+>1{!wy&(+eQ?UR-_F11ydR%p;_X&6 z)6QSt1NapaZ}%Xa_MN{zzunLADJI_TAvo=4zn!o1VSI{-w|k_ScK-T)fnPD!PRE4q zjaS0)6XuU!0|$>%5vRBW#qNzCgOks{H-6l4gf$+EPrxZwI_>!UC0M-1tR34=g2gKj z7WWibeM+lw^YKUmw>z~U8en%97(Sw7SJd$4%Z{5n`%T=N@n@=3GJt82m1 zEuZD@58$}wH{rxfv&H=p9M}97oOs2X=C{GpET3uq6Ii@yeg`Zru6Z4te14war7qp_ zS+3p#OY^l9%k%qS@zQK@AAqI#I*Kf&Tn^S5Aean0Ys z$>-pkTsY|nbra28P-ZZC!LyqS9jD^i> z8SwJ?d2T^ny5+O{wFFD^jTFmsD>(6fo?BB_p6e*qwhf$k#hd2sz|t(AX>JP^Z<^bI z#l`d79!@^}ol6_fSzzgw&+^v+9M{|tPQ0Jz?WxB#cY+hIc+6XuOwG&vHZ=zV9cZL)1=Xn?E(tI<;+U^P`Uh$^6D_EN4GtIk! z#hc~?SX^9lH#qtHJa?xq-SSzUcL&Ec_ka`c=eZ~KxaMAP;uUY2_W(h5`TnH!L&+{kuXxjZAXu8^ zGtDEw;!X2Nu(-J9gW%-z^E`^WbjxRX9u1Cb9s?)d&-205@SiEUI8Z0iZ`4~9)CQ@uX9}AXl`7F=Jf#aHwhZFDT`2^~5%_qW%SG;L121~Pi zrg;)rylE~0i;HWX3@4v7+jyP=mTvhhSEXQSzLR2kE`t;A=Xol1X}*hMZKuJBSG;MS z4wh#5OmjI{ylFlOEH1A3WH|Z!JXcVcZuuNBe|;C? zSB$k&F7=GyA~@0;>e4fUMbyP9E^Q<2_vX@gi`RY^mImAQ_EmqGC)?+6TUy%Gh zxeU83Xh`b5yC9_@*_L&V#wM_IeUD{Dy3haZi-LUt;;WT(N)U(WVYUwIP!QD0l#kj!rlU&Hq7#QJ8TQ*~^W z?Cgf*t+J;!BwxU;pWBcep}2Dzl38uoPV6%3a~3ot3slFBmHip(lRd8?xg5Lk0`>=S zqc3AWQ(unlyEFUiiiTvT?WkjW%U;R)WtWmyZ1%Mc$qM4;+`@KHUyH4$zUnr%Lv?I= zTkP$uU-k~h3AWX}4ar~^bl=yIyodT|Y&~`&wg`SXb`ABF*!7CPAHDFaRuG3>_H(w6 zNi6#z@{R3{9f2*tPQ;GJR$wP$=VO;+ufwjyuE4IsK7(D4?ZkwX{R_6AdT;C}*ooK) z;-5fgCjDWTP+#~f;;Apg_Qq!an(>2Oj{N}t^yk?x@?#TSvA-iv@?)1_d%wVV#jeDz z!*0Zu5;yv#hGd=MUv5aggK>xCTZb&Y|uKkdF6SwhGjyv(6v3=cW_c`NKb!^)#>=%qz*)JQC z)3L3-rmnaK+EZVSy@L9(AemgFI(EHmN;0`i2W)CGIRcyA0zYxnGn2`M)K_79cW3*v zlF1dSW1o@jkW6lr?U+m^I^NTee#=SxS8)_6h3iyCsv0RL9oIcBg;Y-IK}h zu)X)hDsJy&a`g7Bzh5$0usi;|Wb%0Gt@d7;2Pc!OsJGfb znS4Qh>^kf+Y`{F#`+#J!ka=?%b|F>^bhxC<8yBR^$x7L|ZOc~ro^#TnAs$Tp`}fIx z6fvS@mbe^BEgaKnxTu>El&THiC+DXFGP4fK%s#mNcBi(Q9}L}T-@W?xV2h&uDzF=A z%aq8?Xl9oBpHI1!dHhh`T*AKKR>rs-xQF1_U5#-g;MAYx zQf()~y#Qyr<*R^OWpT(Im)R;@_X7C&gBz0jsz-R`bQ#>((abmEhG%9C%gi2@nK(CX zacb&B!K~q#t(cm_zaKM!^F*iz`rOnadL?HR{rwP~OYQ>4`nbXRP+WQ{|8Z`)dDFP1 zeq3MT783VU{b}3v-HchBn^=9gh?@@A7S6_v_S5Nbt>Bn?+ zKP%wUX~QLKxJdsqaO;l#Xy*}mn}!^ zpvHZp^&L-}Ih=F#w{T-^{%UOsaV;ROkaO;AB`n;v*~2pv!!vsi&&(N?S#WO4MJ+DQ zSe$-I+IgvIlT$KtLJxkT7#FR=eXZ|VxQ_GhB)W*Lt5K7-O`q3(=H#Z^mY&Wyg4>#H zE1=Dyip_7^B;rbEG$ijM_nX_cYf_uvwp-~VYtH7kP47$;%xk)B>)`U?wr1NpX~&exV6NUlhe&@+mUI_Z`*wOm~j5)w@uHOPUkvD(zIwy>N&(5xUJbX{k~|#g`3~D zY$nHDE^0{5Ab*?Nwqw$p-?roFWBFB^-!|Q&ufDqJw&_0O1-Px*wijr#?3$n6w$W_P zSHx9a+mPHx$=vjK8Y4rsvQzZT^*pbWYM6RyJK>md@wd%y+jQcd;C{u%ulC#Na4X@q zX4_WKX2FKdZ`)epj^}>pe{ycxg?rbv-)(-|Mi3Vyn{L}gxbL{n+L~=!M4MIgv$-+( z5OHZ@c7XkGBN((-Yb!o9wrIxFWc%*|zDl*@gSXpW3$3q;UyxUlI53 zI5)jTU(0q%Hkn5^z+J)psEtF-qv=fMOW=NNUh9E>1O5w%{i%6$EOFUAo33vXTt4@; z{`zX+a^QY!eOKULg8!%1r~9cVh@11__sN8Gah^JsGp4pjOUk^WvEdzU%e2+~3xhPf zug%A&bHn_Y*6qkWnfk1lyhVK${6wDx^tp;Y@7b#%IUJi_Kc&g` z%}DSRV8kbG?X?P z^f@fE_o&SJlyfr{MMui1DVe=F1ftu~aHQ3TB?T_)_^jI97-h7Yi&@=*# zwe&fEWJB`z(zqhlCwC5-Raz{vJNSOw!IQ&q@3=d7<`?zz1pUn6nUDS+Zu;8I_0y}_ ze)glEbWOGgHzeOvKf%;6@62VRGV4Fcf z_juP8Hzad)Ow8XhO?z!dlkR8NPji}9ZoMY`E-AZ``#ouTVaqfP-HayveyR3Urzw5f zR_$x$DTjN)ThHP=B2C$}--3NNVKbT*&}Y^{r>SV`H0hYv{cLb4zf;rrD{WR&{FuMA z`6IM2(0JDUY{2h?)OY&AMmo_Hor&KpDWen8tQo*C#NaQc$Qwsu}w zk=mGy{Yh`rbCb?zT7Pfu{TA_zAv{+vZ|sBM5t*C2zR>+_-TkiZnyqWA``L&1Jz%)r zpxoOJn_ur+v|aUpYuk70+GcY<_|}82?Zho_8}5JFZs0jeczv;ew$1JT*!;e^>x<>| z*_Pi+hSwKQH0rT^xJB0&pU}sgCtV+(Y<(X(u5#Kkp6cCPwaZCcJ%3iwzQ=R!+?PZ9 zEy&I0&VA3&XVFVe)AX&^l+Ha~J?u^Yip~v|ZaKnbc=z)35lE?TO9PzuL?^UBtuM1w4-k_1v@NdUQRj z-`N-Jkc{T(b&YN97~kwXO&8(sr*Gw*f)%fSpJaE3_xXo#pWg}i1pM+Cul25hU)Lp> zEK~eJnOT>lj>*hkoW}Y6;`EW3y%%M0C3{|r(fZBWsLaBd;fac~XyX+*=i%@f`Yybq z(2D0Ly1ca|nM@fT@NP?&c6{SfuTK2RX-KmIB=t-}?a z(_Tq?i|f04+jP2erO`bE}@yJUTq z2;SZ$m!5acdN&c=uxnpFg12kdTZ!PGyB6d7N7oGKr*_ML`Ea*B;L8(zz)vRV>cvC` zA+IJB@HLlES$Fjc{?eV!FW)`uwqC*0yBFj8 zb&m|_g*`K1?(f+L{8rCC;PZRY`O;n)gk0N8A$7eJa$hfX{;OW<{GC0r&g&a|wg;Wp z_0IaXPq3mloxjyP>!UuwqCJc8U9?vQ^ecO1z--*B5BRn|eZZ^xa8`b^PX;0H^ijwM zeH8LhAJUublf@b?>DwV-NjW3+jI=X2G{dF-;^DyxO4UWf`R?jHQ@RE{`S3;uZv6_| zJ)4qp=*5V7Go{^YsVRR+sf}Ls3)>~rR;Q*sm|EUyNtfUhCX72%T78=xJdkn@@jp*( zbN4PSUr+6R&MqxCrs^BAzi!cH{!T5gYT4$~E-hbenfP6omS42I41U3OJ73?W<(=EL zTiB)bBireHkLTKMx2#Lc+uF6?*tO+r?Y8@8*Oqr=cK$=pmd|Il`?PE8HJR5E^I7}t zzV6!cpY8Qs-d}D1R?7WdgHLy;Pr0{iux7`-sLapa3%oYF4cgwzZiB8*vfIFaot**y zZT3K3E$-6p=`O9G?D80w_!|>h&-4rycFW-@c(<%y^bA&Y>%#!=mUUOp;Fj*i_|DnA z&22q{dv?#j`Ni%T@Yna~qre_*>Usum^ymX~PEU63r9CqUU)ocF*Y;FkT~A&Jd#q>2 zS9-SoThDgMo?J3E*#S-S?{d1RF{j^nIlUvyX~WJx~$K>?RuEFXZ_oDLMj(dUEWVb=vpR(Ja zYkhVb_%E_E;J;B$@9om=sV=P_3v>EaBCEb*2Pien6HDzgvM)fVJ?H|(+&T1Uh8vp-S#U@kw z>qYH;S#P!FkKDBRpe=toMkn2GS`G(4+Db23+}XC{%WZ?l+je}hZSZ2-$6z$)?|DJW z2`hDW_!*h_Yibr>d;V=Ir?n&zi@nqT;IRBM+^4KkWb7)ygn^#g&ZcZ7+ zY@d?BbDYmpGTu#1`8uV>l8%-`hP7YRIw&gUMckB*kGAGd2e*H|HD7k#>7mw~riSoF z=5I4P)wK?8XqkO!>)?Y{ZHW1_RU5n;TeZP^PU|*!7qwm_we;7mu>Fgv!QrfPY0CEe zUD!KQuEFzkY8%)WQx`UF6LHTH$I0)?ljh^efaEfT1Wn@#T#imMjG6H`8?b&|ImKX7rHHKH5~j>xL=-Z z+mU^=x^2hz+6M16?w97)<@!3Wo%ZqKcJ!r}jc;l-9Q=OR*K2J%((j+!cKoJo@XeO= zb#2(!E$T}jvVEl0aPY>kuk~#^((gapc3jjhSk%tO%TMUyO;B{LR9_F$*Y8>l2VdM; zWAFR69qIRyb{%hO7u@udeeKq@&2_24ahZ&`@4HTN(Xklc7B^x87XxsD@c44oJ<>* zGN@P9IqAW`U&CxnY5$kBlohF2{H=@n)Zu(4BdyIlX(@}+y6aE;bmZIi>(e^odo^u> z^$_m+eZoK1enD!Rk6QBQ*co7axyo?xT^WnOkF@OgR?A>j%Z_ih4Bl+{q1KGX4#E3e zK)v6hBku|Sy@mdWO~n6eef@$w|01I!?@|0F;}O=WzW#5UyS7H}f9pJ#O!Ocu;NoZ(9boWnfzdwq;;j2DW8jTL!jeU|R;Z zWnfzdwq;;j2L2zlWpKD0>x4O(fvs-`=Qk5QL8G=L(ic5d z=Dh~jvEFmgqE{ld==U62R_t6DTH|weit2jxP|JLmNPh1bF1CK&nhUq5uuEac^L9p9 zx8F-|aPHXCHC)IcHyY_CsgC zcD6-sVYvFv&h~P4ptA=#dz7)*{_{#(ci7#*&|}Y?AOk=aObnm&h~P4ptA=#dz7^6TPuH{R;ZgydZDTp5E%3g=x%o%To(C@#!xq zKhrFq);JIOjnabjz~-N(d`9Cu)s$}$GzJnT?HcFtrhFz7Tr|Ho<=Y4I?`w|F3YIj>cL+8P-gKTx3pxhVhc?G= zAL!#^@g@`vHp_g*2~$=oj02Hvr|xX zS#$9_^Im&%yq)!a=tZZG^x*U44@`uj^UIxBX?Z zLUbps_3mvalsr}^!$Llp|e|gaHdIj2Fo^!ljZ}yir9j{lE{pC}~>$PKl zNe$1t+Xs4;*k3w3UatuI%U+JxYrg(+faCT0uD^_Pyk5cem#L1|tF`_z&++=$p}*)& z9KMyAFSl&U-{g3`4(l%uJ6^A}`pa(|ua6-5i@tkC{p%H0f7#%8y`JhX>8Yx1ALtcR zf5~>dKC0+1eH^dXO8sT1E?ysj@|Sgv*GHTDFaoXOvqn` zI$j_D@t1Lq*GG8#rOfgAXpXL)zhh~U-ZX6JMLeWDcJcDD25^o!gIxY0=oemI}neZ{UJpBYSY=hK7>AME(R@sk`s z(fIQtou+5bkMLJIe)L^YyxrgEyKA+*RgSm&nR>@BaXhbnhs$e@FT5w}r{1kte^){M ztZ?xiT)aLn81{d66mNV_$1jQT!yUgO#uqz&b&NmV@trS>`nUU3eUGWuySwAf5iHU>3A z7&f_%y(a8u)9cAw!uU>`T$euN`1p0~^I<=mUT3}y-<`b8uP>B>qjHAbwIx zG_Ljty?h54w*=iZ4zG*&0>`g{SA6})g(0&#HFD^CJUhA=-(>JWr8&MIy!tP?IO^Z# zhw<>C&Tk9DZaDnI|DgYp9N|D~S4e z&pGC1MeE(i%_lu9-til6kK%264`rg0&Lu8yyFM0|9;&filYY3gVGQ-xvNH&J--K74ce+>J|8#V=_c^)oyrYA_c*+`m^xyv-*) zxgqJO`2C{(?RnR}@akv9tr4&9xz+MMe23tN`!CyGZq=ggy|yrnZk70!jNHihs$=IBb`-FCo;mN8@zND^@;lbyW?Mi*Y+;EHsWnO z&*K4Lb9uWKUi}1rDGYniXQ#DHpf{G_WQJr%B_!+DCD zAkXpp?(bdy^Pi0L*#5nX3zhDv%mbfCJco0*jOW7u9ayhD-*D4IFd1HY7Ty-Ece#tt zOhkM^Y~Jew-&~%LaPbpeKbFt$9bbP<(>&kVEy~X%Hy$Ph(Q;DvXuXFoisJJfe>1%D zUwC$5$h36F-NCy@eElI&yv+mOz&EFJ2PQ7{Q6#@z@F$~JXg8# zTFFklqF+Km%C zzGgW-sEgt){~H{?(2Y0iKbxnB(vx$4Q$5c*e*Th(w;ZknZ^!$m#=G%p^WF)4l{e-GH;?H%jkHV-`3U>}-}NK>%dVfoeG9|zd9KlV z_;7!%j`;99zVLmxx8v7GJg4t)Il7$P!p9s{65>Q2GL*!+Asd^ir@jpD=a z17qEn=1OPh@N9`tRcAB~BmVk~JV2KRJU7!@9mpOUqz*^^<53 z^<(?xVaFHk8SyqgFCG~6Q}4#l2~jt}A45I}Cgm17CNkf_tN-~=M7;I0W>CZzwu<_< z@zbxs@ov1?JaZ*{bNl_5Kf>&}Lko}yiw z=B?lUQU7b*^>&5p{|3izbms*Qt8mFaAnIp+yQm*KPu0Uq=V*7GZ2ja6)%Lax);XQ} z?l>*eLO#O&|8swYe-&Q+=iJ#e53LI$e!QE1xO@tiJ>Z)gZ`vK|XM;OmFOR|l9sc3| zb>~gX=cA6#Sw=&a-=~Bo_!_>s{#y@^wztrY+g4F{&=p?&7hYc&GIkt&4KF=~?)u#J z@6-cbyt}?NJ=r6gUvD*hsQ(y*u}lj8=Wlk6Y(D-_c&)c?X%ugH_y@fDS^ail*n{bO z`W+SOCNa=vINpDP}Gh1(Io=9qE(to1w|^QX@9h-s!EZn zh;1rWsVdS1Q54Zqdd_#wxxT*lv&Z(1_f9R%*gpQA=R4ndedjy3-J|?NTz+wt%404> zd_4tl9OtDcY5X$ZV;|;uJ(ShuPU`;<-*JS>-@^FW&F}{p{_G>v&NRbc0yx^UyFUpV z5MI!oeqEcV_RoGdtG^5Xe>cE!JTH8S+L87@$8hT@f{Q-?58TdW-jA}5UtXeiPBK52 zdHof@5pP+>o6y709wqqmO==(1X8rmi!_QO+F7$TeF`Cy&o|n+)B*3x#-9OCg`qIuf z0gm%p?!AM@!}3DL^UmW0fA&MvugKrOVR)9?$DCLAs(*yaU*dVaYZt-43vi(g zg3$lP6IA~4UDUqp-(UF{l;8D+$ocq>x%}?OvburtHPlZ1PILR`0gifO*slLdkMgg- z%iPZU0FHROh2@Cg?cW%FfbB$~lVf*N`7D>O&@ioUGkg#0$x?pu9&^8s0vy-z^4)|V z(QhA7ye{7%Jji-|;3V}s&HEzB{r(xlpJzG*cBfz07O4E$Pf`27$nf_79Q(a=BJ zUPC|BxcoD$_saO66gamd@V^GQY2Cli<&!*~o4MZ`md)kg-2~vSnx^4~Id7E{f`GCy# zT>v+Y^8tWkJIN1IJDArHU+*nbJI`=C#|dne0IvIGJzVhcwtESF=})q{q{!=U0vzY{ zTTCY+r@qJVGpzqy;P$_GirTq+OZU9)fsKRxKK~VJ|0I{cxr*}w_yoaU%kcLC9Q(b* z{)8mM{~6%e&gBZVFXO+nX2#D$0LSv@|DM{H^_r{$9Q5#$_Y?e$-2NeeqyI4M*E)ga zah#XB+j(8X-0vU5I_!pZKgZ`O+`N^B@;I*x?3WO{eTU&UG5rglf5UxL{u$O=1P@OG z+(Zw5+XMbe5BRPowf`LNccHhh0bJwxFKND*#|&S`TIPN~4sea<+h|<`&%f$1{C~2# zzW7!C0N{v+8`*vlJRE3K`Q2G+NBX@CaMOI>0D>OdIly!x<9{09IG){W)DEUA!Pi}< zsr<9&37#Ra^;v*p`NfyB8ZyuDYd2^e!*b`%0LOlltdB{*-vGF2oZr`QX#f4xzLdFc z(_H>mfMfe>U!rk}-1#EIFS4Ip;BUI$+|CNXabC~PQ#;~cDt(m4&v=08>(_Sxj^+3K z1&#A0fvxkuPVlq;O>jW3e!T>QnQuEdZ!1a2)LghhS*RQ{0c#`4QF#JahzqCr@l>POIGt`dt z?^#_EbD-ku$7cwhJ)gy|meBK4ze(e7JWTD#_@4tfj%W8dDlhyb`3dv790#~A|4&q2 z_%=L1nCA6qf@9uhd{r6#{2uC8&UbG3ZElC{7g?821040_lZ>}ps2S@iE`R=PRzsd* z_&)+%Xp%wt?LVNoj^1ef()`CY=#2I~X6buIAY0UX=ME*e}?HW z&E+>4-e7%0_Qekv{v5CSEnNOB57GE9vR*BC_!hvC&R=G}eSyn29;Wh*7qc32lHv9v zX8hmZ1O9jq_?H2W_}s(vFXOrHlQf=J*)FSazZ(EIt^0#L;Qz|)oM-!5=>M@t&F%aV zz;Qg;C$qZ8S9oDw1US;?gCY9-QIB>OK1J;$uP6H7%k5kMIP#yf%ztFQ&j1|p|0>&$ zckq1Q`WUtIY>3`$fMfaV7(XIsU*z%^AENOK9e(C4m4A@w`~bKAF90{;^9KM&Je+?& zjbGvizW-@z=h7z$F8Dn3IKkIgPuR`ve}mz#a`|6i_)7rC_Ah^&#wqxD|7XnOxm&{l zZ||e>U*L8=2XJi1Vtyj{yy>$9zxWE37x+5BP4j(1!(l(Nyt|Iue+uBJ-(oI6e963? z({MP?V*NSE<-I4U-{e=b8uAja`~2^9&H}ioe!COkdfnM>l70O+z;Rv|UrTttjVrwZ za2)@cOqL<^_NL#bcBYvgWdHs-z;!#!XZCZg+s;w>Eb|Q+|I+~1_-s+X;(xyTb3D%T z1Q$F!!SIW0Zwma{&+~Ym>Ms8QhChFf;1wRvR{@Uu3-kWrOX%v`0LS^B4bAsI0FL9l z%=i)dv09#6E0!y_rb@TAHj9-+v1q|v0_AQL-G-;U!g9s6qmX3M0LvI)lTny$XwOa= z%B2l3Lm&2pp`5ATX+yc0C@f`|TguQ!YARYcZJ0;eFy^$OUqi8s0aY2p)G~&t8Ad!| z!03djoMCi^UMCHsi{d1T5d(r#(WX*{iqY|>qF6Jmm0_t;QIw^k(>APUCQ2Te=(sY` zaTykABHGqOblMY9q)tSKIvMS8GP-C}QAAHgM?V!s&UCcmbhO9mC^f-JXqchQOk3*q z`NMl>=ap);msL(N7gDK`5-y}pEToE+41S#~xn7}MnYX7-xy_neQLXZ7t>CqrZp*RM ziM%&ItyH;Qu9ZEjzOquO7fOlK%|fH$HWT&bd)=aEC9oaEKbC8&aHpwM@Cv=)iB|c3 zH~M4AZ55m4hF5Qzep@dWT~)3YR^3+g=X%3yC5m*(Dv|8*d!<}EW%|)8G*?{@e#w7i z-@JkwT8+HxEflJ51dy;Tz-eHv%B^|LvfEOHX4CZ3rmA5(hD!8H!cO-4(b%2TO!cdo z?)PJ+-;Wc0e(cw3zefADIoYSbsXo5}l}DF?aWfPCtpSUwTCQhYsIDIqDGR?_dZTnG zQZ?Px2&`R0XtC+Fy!OgUqG**|0F+_7HI-LY#R}X3Y{5`Ub+uZlFBdAR1jG$vXm40Z ztQFUDOF(zV5^4o@9MV-`Q&ZWdwbBGyQl)max(QVR-xTi6uA)lqKQepBQO?4gQc(Ez zg`>(jz`z4@M=Uk>k%ieqxqa|kSLISSpapxw^V!Ma{rmIIl3JSGJMTcN5f)-657b-U zLCr;SCr(UY?R0L%SwFInPpXOg+HTu*s`|_1GGNSY=9biaIpr+mZDmgtD!_Q$k}6ax z^c!aNQht>}tVYlT_~Zc;i9;Md8|Dd%VfThEj|x0=Jo=9g^;UMhdP zZrhn@wrhuLIIn2#VGAnC)#`E; z#-C3sC#f7esSc@Xp$06j=^TMks@dA6LPmPzV18CXchl++{jBQ1eGAwx3}oq8-bXmYF(@8mcvN9#V6=ar&-0I=wdza=?&bmfOJ>6(~PBELkeLlB$-|^!rodC8FrIX3o zV$p52V1;1}xH5CLZegi2+ElV!gC!`J6v-InHks?FcB-`rTv4;c;;D44e%e{@B9ep+EPKcurO})T?%0J~s`N>w|%b+)g0brDHzlvQs5jTLk_@{GVpXy8^P2 zmv|``kiN3qOn4%KHP`5f>jAVIEoVYlxnemp6j8>lSj{C#?02KoPM$7+HVR7g#-^s& zq!PkP7waWgY2^-92dR`rrAA|SGg?o)0+tY<73to}uhZt6FP?rdx`tZe51b zhpJVw0c}`Dw7$GeRRL4qfo78JS{aO6G`oN(QmQ}Eq^+7(f)b7=g+ z3wxn)>Ry(VGuJr0lmt2DYEK&o{9Kfl><;lR*%|PEpi=?e5MWd07QwnU+QN1cjNt}2 z{6J9v13h93=O;$?d$$PNR(VZOHd^2)0!IeW^Ax((U^k<1={7Lo39_q74$J|@0tKh7#8a&a`8N%Rzkd24Ty(E?J@WvSZD{9bqiaE6wd9ZW#EUyTM)rD zjL|AWlW3T~^FoeyE zw$PV*#O+$@K5vO4Y^NADapOrZDk#Mjx5iqRNQteeO*2aBKe`i_P1A7O6k z=??goeAmSldvrc@v*(od#s*cvFLi}6z zA<&LiE5;ZKG}i5{BZD9q&1=>-JMJGoypp}NJxA%lsFI4UP|zw?y)`4i)txFY_>&*~oF!f68B!Bub2{2V^)j2+G>t)*^is87n>QYWqa<@G;ya_Bm zVxMuxYWpwvzG8cV<09k`)=twW*em|YSHKNrX4gY)-GMDo(ho=UQV5qb514hExXrO~ z5$zz_a{M7WV#{?RkD^XaIt0gK$?3I1ORY2}GkEYMtIINMYc4e&3hfaDV2W#wUy&X` zJtgf84#--rTCUeCZlR`+{f0jA(Mmj=-uZrN4B!oElmmGn;y*zh9*sdmDnz)WO96>v z9IGBF^qsF$luQf47;;rO`pwsux($K^#pBi|Rx0h*nqh4!}03iPHppn|@D03pH{CqZ8_RftZ6z^&9IM8Cr|kV(21=A@E#1|o2<UrR_C2wg{J8%F~MQ$y$gNUWvFfuSiEfOxX>H^}#qe9p?>u%Uh{8 zt0rv`V=B8%_JDmZ`!6-^Za}n9(VK0qwq|RkBkp~z{mpuHzEE8*6`aLlby1?G@H(tL z!?$5|%sh;uRa73XoJ5nGVwQ<#8e>5Mygw9qs>#7O;HG7w((hb3eHt`#&waGzu{rX$l&ZW+QY!2+*%7SM_>r&sAk!)l0{qx z+bdbL)4JDn*4~%s=pe##H_8#7J{SXw|JZN<2Bt|eepWle)`rW*9kV}d@{78_OiQ-q z_%^=gs(5o|C2(^_BX|2X7P9$dPJLL7?5FrFTTOvG#jCxAiWw1t0gGqJo@_VZwt|ay z9O1Tky-DX^kviRKyCwk9m*ZOncDIx&i+y}!Y`>PSuKlXJ8`ge>SX+|oOyIH=#it}{k%FqwC)dMsvB-`N=5WBABkPpL0WQFU=csQQ2hg}?I_VSq+#kjq z4RCMOM1rX#gag!zW$<6zQTEmjaOT49V_{0-p3z@6X*`d#y`D9v7q7AIr6q*#j z_z^}gQe0>zGD}zzJR?t)OOU!T&8O@W%|dMz(syY9M5;&5=m}*(CBi9}bft`jM^QX%tw+7vmA%ecUJml~mR3DGuphm(Mb(TYV zWZB25ylM4$`y=3^S|2KXGCy|Mk9~d%!yo-X*3Lta{}ZA=n(dJ8nA;pLu3)%ne>*%_2^lqnb`UYGPFwgIAn(%3{o z+65kx;Ci0|54Y8!HXI*94uvgqK$_WXzp*hkM6RUtEhx97)dQ!BX}%1HlwFd_0#W4F zgyZ+Tt}8v*+{1nd8w2&lIN~c>o2^v*p=fMi|9RJfn~7U<`1oy$tHzFkRMJa<>fZj0 zp`FCCNQdeqF)0>Wp3r*le9(qz#eaHt)fV0``}kuR#81I)xvO=c54Tw%iMd36bSP#h zhlt)6LO{MtWb|OgZOIJXFKnW7e+yV4Mgy6G|M({-W2a|^4gCX!atMc##-CV`+l#*~ zVgW$o3C7gpRn%5_tFZb()Y|wj8w3@*5tc*Fdav$spi_8AeJY|knyieSVYZI245E*0 z?=p-SDmO~*CXp;VWC9q!H8G19tSLmf;}-9u6v|%kz4BMxN*h3Z9ZGK=^z@*MiHI9Q zFg1lXRaYW-&B*^hC1@b60~vm86;fwaD$36f3eGxATZ@-o@R2kF9>m418~0M=qidDM z^qJbe8;FAnBZ_wq7>r$`(;Us#fws0B)5ke2aG@2ejZXYN=hKQrY^U&shFsTxrGt82 zU0lDuVLxbGMckCCS=#VwqDb=Xl_y)Mi4l!&8gLJZfy*nK4n8JmAKv8C$>qa5(qL=F zuNn%I7z+D=6E`3Cp>y#quxQx4I}$x3Qzjjq`q}aC@V9C@QgVbTR4PJEyJa7QCkKR2k|E|HIhNVMQMu6 z*Y0V$!Koi}A$KxRTZ4Q}<$6my1tlNZ^boA{zi)p1pl%S9G{BU+H ze8=7&AYs_l>YA3CuD)3g=L-;8+8s!m(0T16^3cG?<0q+cIaIflV;wHhx&7BOkZZUj zjG;uf=I~;?Zm(Dx*@GZNFGgfHp0~>U4heHJ|@GW`y zSkPzW%-zeT2`zq8YHS-mYDxy91JyMClN_;y+=!QV%)SBHg;8gaNtM8T zIf9piY+HXJ&14+VIDBXOm2(+A;tq>3Nm8G3ID4}}b&cOv4eqWj?aLj)5i}R?#ej`B zhxEWD3(5BC1$~)s@9Zzg$soN5KI;$MMaHD|6Gg~d+QfSpDU}1468u3PJlV+-GNe7R zCO<4_Se2Gj?uxiKSf8UB3^r1Ze_JF6smS?Ik1l+2RFdq0F-6EjLX-ew)2TeWd$jFV zd$|gaX~taXfXd+GA-k`q$a5iuiA0~_yrmV)R2I@`;yANET030^&E<&OYFCk9_U%*Y zMAESO(+hJ)<{;(U5@gTzoHA;f(@-&ZF$Gz3fYGH^!%sdQ-r0w0BZ(#FNY&)$3u3Z+ zL9g&hoE>KjJRlYi&hUvP<1l!FxbDslEoN8;r$TZD0U^PCzT}F7nmWeKwLu5A{vKT$3!&Jmf7(PJ6Bq?v+Gm@L#U&W{FZK@MISRHrSiC<)liWyYf& z2erzsm|x5|I_fU8F%IQeq0)BqoPL|Qxju1(+x+F|qu~&5Iu~W&gl-Awwu7QDssp0V zj4(|G5?r`b*m7?RBmR*Necr8BtMwY4BP%|=>`Nx)u(?&uBE<36+m*na!gvJ$P$s#jQscL=c5?`!f2WWL*MSP9^1 zZUP>#dn-CQ6RWj$0U`9nZ214fpZugQ^Gk)nu-jBoY{~M^EeS22d$iAH_UZyt-$B#IW@iW+3 zWw^ALyeX@HO5Q{z&hUbT@h9`YkL&NH;?~7?Wc8+nskFa4k8`e(WR+9Ohv-oyRB6srH~3{k?XGt^=Q2`}&p&;K&~ zhGUZc*FH$~*FH#)h|<@)Dn7}CA9$NxkhY^aQT zN9w=uD2@Mxk5R?lTwj(!`jv6LFVz3bk5T=XKhE_-{Yia!|6r)Tb(ZQ|XQ=)nz4@(3 zJ$XM6s=xR+)nEJsRg`f`15!`EFF<{S$^6f7{gYfD*C+VO2EV}PJoldr!KJ<%f=hG3 z_s|q|jpw-k=if^W{5-rp1b;Gqp{v&eOvX+n>HoQ}Q>)Gwf>5HBAmq*P#Z>>rLj=7` L2FjH}U#`|| z-#Pc(bI(2Z+~wSR-^;b-u1QHIlcHZq%GnB`z4;D-(tZ`C=AvF2hf<^*qc{|+a-z}) zR4KT1I){AH!|^ zp<>Zsrc33cO@kGiLwk^6d7B36p)sAvuS<(px*Xbr4EN+~pdK32Q8!tKwD_!JO2mU6 zy3;g#o30QkC)L4pJHOWAcMj0f>7j)Lxb#p@P8oS`mhqdD9DoQr7${cp+Z&RWm-}5` z6`!}gEtE-(~m_M$r>h!wW zhStTWFD@!Py)b`lOXJvFQEu`l?c`~*L<-uM6kWiX_$`+>s(x9@`Bjbqefyt!N`L

q4HLSIi!6s!6@)(YJqc)iFyuJ=>P{dw%NPM|Uo9UjFdJ@|QjwUH+15N&gQAY^>dW z{)fN+VxnjEIf0?)y?E8&RsBovsC<0OpIR?V`r^&s*6I1ghN~d?z*r>UK;QWIbD*Sn z^baPW{|{7hJoBLV)y1pGfsK>zy$23HJdi-24<^vh@6dV0 ztH+cC`R+(Szc)d9KOuqs|B^tS+ywdwB%q(4Am3{e=;w_D@|=`_{<8$_=!pdQ-zDI) zFF`-xPGE;s3HX;Ms9#rtdbuzG{e%Q^PDnt{c|bfnc_Kl*aQw95j=fF<;w0RA^Bhr8 z<*3I@#*cFg-B=D>Imk3Oj#J50B(sxOE9e=yDpS3oJq|}04kkg_JQ)&ik zeDugqWw|=#{8Y!kFX>keil!fH(&Sky^VR1ebHRslcFK8~K9;XRKR`L>N%{4170C9r zCq@r1ihPehOwL`%m-=5FBj*<$6zKP#j@EmBUM z{t@Vt@;hSq{}T0*34OLneOe$NuiK>_)=56P9$uCF=f%|TBBY`GO_INrOn6~_*5a$G z95aSkC8m zJ$HUZOKl~RG}ksPa8}On+3l4z70n)Bb49JM#aUTCqXIwW<0~8Mn<|>UXFq9U$M{ITwmJ& z;TKYRAT*h48zJA}0+$z5)_E(M%Mp|7aW=N__Gu-b=|p1h;!1CmuePy4V^&xR=^GmJ zJQw8Jb6b|Q_`LO=%DTqN%kvr<7nN%{HrBP2*9uwkAz5AAT&E+yJWVj<2VK`YXL@GZ zJmoVAQ4^5ZG z>}{&6sPvZC*GtW6nym6v)Sx8Yl;yIJLXP>~1+@*H$_ig)4cJxHx%G&q z)@H9~VQsUowW3awpwK8mWkrimCl%75@;#9Z*hm&H_B45$TN)cG>S}#UJPU0RF0$(M zdWvUW-&WD?DKqjslnK?H*VO7OXJcV|E2yN>(30$VsES3kRo({VQR@>mI`aZy(^7X@ z>VoRJ)|MI(J;PVm0>h1Mn6RM3EUt1U?5)6~HNS>MY(LO$v>BED%VtCa)_Rfh>Gj|( zYN^>P>K-|X))g@UAZGaTD;ryVk`OH?QkjsvSg*3W#s%nr{(Id9S~^5)Y(T?nt@M@G zL+wJi>SnL8>d{@+YdvMl70teCg}+Ku<0AH>3o6kIGSaSiW;HCTg|ySXEv@xl(aA2U ztYciBDCV@>%DVg(pnA=gEdd6vM-*0heHFEJcKeKrN?lXE_2T<^4T^ck>$C0w!-rA z@@X(ldoDWhih8|wpONpG4T~wa+fj?H%?+YuYn@Ca6MI3GcVTU%*VE#y^H%yAo9%hp zlSjkHkQZFh>TUJfZIsABV9<>{uLi9w!Y+~K=C@W?dz(GA^-Xp0G?!~bPt;ghi?)4X zt*=I{K)>v5E}zcwkFSxCd)f@U$6i=cS?L9F^F;KdD)NRZY@Wgjfi1*X30b4EkY%v9 zw9c=ug<>fJT2XatLnX%z=cI+v*`Uv!B3Q~=&#P>#@_JB{6(o;hEe%7jce||jNwCv$ z_HXDc^x6d}^&YQoWmI{-J+Js*uUW~(p3Az@O^Ma%p>1QlSWt`6u-Svg@hzr19;wfp zF}1O(wa%;60!HrWlJwed{-T-+z0ZrZ1$%B~Q>)P)wESBe5=o%fZ*GxyG3F_izLMqz zEhPxc4!+Kj;gsGONskw3Sl7HHe?BmJU z-P~ARizT`RPBi=iG5yn;rJj>6(&5y3oN9^c(rGoRN^^Ts!1c zsBBuIpt*P(7LvZQz6pe6D`)KW6}3p}g(4~tF&{H6QI$Y7R4J27OFemGZOUYqb7H9{ zcWiEP@8j5l$g@3Pdz?KR)cIrW_9(DeDV;sLWTMk!$519XNxIxnp7oE!dD3T6y+2@O zs|LL8&l9iv?u^c|X!^%8lc;y%cv%g2&3ZL_jscG)ipXU%;IV`dxrz*U=B-~2172?> zj8hGG%A{Xz172&XTFh(%-l-EKoM*tZP3u>U0gola$kk-P%cdjZ78~%=WCgy>fFBUy zqA1G^_zVNS(||V{@T&}Xz0EOtwE@pPDg9bwz#G@?)*A5I&X$&9odG{cXM(WHfFEqY zZ!_SB81UN-_+t(Dod*0+1AdPIudm@TI%vQjXQ1C}z#nhGA2i^H8SqMGuiffH2e z;71tfEe8Av27HzQ&oxZ_vKsJK9f>fq8T?Rb&9QA9P0biga5pFl&3k~?42K;yf zevbiPWWWav_+kTouL18i;13$`fukBYs)3^#_@C9lXXX>WRNFsHR|BbU?#43RvMygzc)QxZIenAJC|vLs z(BV_w!LRv52c8JmFfOzwjO&zF37ZIS7x43hxwIJS67W-mxuh6cE8r&xb15;jTELGI z<`QD4Q^0>D%%#Inn}F{lOm{fcB;dOVbEz;iPr$z*%%#APTfjdd%q75(L%=r>=F(rt zCgAG`bIC7c74VgWxzrc32)LOrm-s@8fG;D=rM=MJZ-6*uK4C8Dh4u(|4q-0kg|-WL zI$p*8`ZN|;M4ZvJd3t0rbk1&_gLW+Pt zAbcX>yU33DkcqzL#j!d$`%?fpvBKVdFih4u(|4q+}?g|-WLI$b--LA3RwiakMP-q6#;)h_#DD} z4~hCG>>#{Hz^@YK5>RNnfS)JKrJqojfS)2L@@X>i#MVc+@-@Rep4u=xF6;(HTlleqN!VjS+ z0%m__|H$dzajEKmOKpE=uY0DmyUVuLp{U&(yB=WBz5Pt|b3>kbRZ;%lN%~FIW6dXa zKmtYST*m{c>RJ5t4O&MOyUe~DhzReg?%Y(}OU(Xqy`W$Eu?YNo)r}vYqi#HyteUo} zJHGK{MmVHN4zcN(f7@B_LI+CgpjoQ_-Qbm|gsrJB0Aspj(_#6MzUPKs8Qj4+&{{8C za$(5@Gtl8XRR1^X%DLG#)xSHq;LC7WU3pcuRrS9SoB-8rXhU(-iZAts z($bz|nO%`!kc`Lm?5D$Sn|b|Sm;XTUt}mEPcklq^37nbjf=m`yaoCp{yalDuq{@j% zMf;OdS%W7Nh?a`>mq4o150~n0!61%QeWg?tx>Q1-WI;YRJ4ao%%U89I=c8!}nd?wJ z;l1XKrRnVlO+HJHGf5e=Wt(Kxu!Gf^chJJw=upn44X6 z6iS&BDPsR#rS$ZT z(nq0Qx?X+s689(Zx2xiN%!39wR=1!tv(=Sl*%r09+q~>pRB(ttY~QoA=7eS>niE>k z?qJpdXkld?$o;2ftLt!p>GGOpPqpd?hwifMq18==`6?dLIV;&(mOV^S{qHg1wFkrD z5>20K@h7nQJAsJk^N1K(8TzR9%{J8hC7V?LCRVK`${-<19jo>u-`vBiwmcoRRW3|@ zsxb8;HBgR%ePXorNZppRZp%g8P8DX}FG8I~O*{lcGM|W1Vn(pve&_%#YXrqX=P&`V z=n2qSss1N|uR_~BR?J`j@HceE=zi3d>)3q-)~?2H`?IECmaxjyoAvm>swNB?Y)Ru| zdZs&BbE1VBdM2%lOa2^D{_cxIbC7><3kZdL`^=WqE{qKrYuZqo%8U(V=%0ei$Ou9Y zLlXP`N7>BKD z>11^u%kS?-&)kG6SYHPm)$mgd9Xtba1sCF{TGN;6KeYY^rhDS=;V>kayxuKR><<1x zLlmxGArY%J#3}1(h5i^yOzkoy44Hb_(5Ig0~a$*hu@9OPy@{rgM&a@^=dAs^+uG`I$NXbOG@ zens#&FjDc7Mc}aEi-xPGxQYg;Ux2oFTquE2Chlhtt^1VPXFfeW$&18+@-}? z^teV5x21g9N9K+<5m&PG>r_;yxqUx?b@XXO9|guKR}BTzEIpx+f0c;Tinja-6pcZ}gw_Po!0_CMWL#edWky^HVy~Vi#=s7VV zp#9zWsixESP;4#ZUqv#03n~yem;amKLqaccGv*t1F}eO3Nn>)A8IF({wnj31G?L*J zy%~NSEK8i>*@tI1DPe~9@6}3}LVg$%+)a29$9!Rzd30H}gFiNHVuqQQTU|L5)35tb zPhu+I3iz@euHw(k9Y04!lswKUNN5NijqOKm-(+zXzu{|C1DB_>>;8BuevVrwXDQ|E zs?Ez@=8(pjUT0Jn{zRlxwlGy>Zc{G0?BDccM0Me}M5wcRmP-ja2%?`&XJzi7ZaK>g zE~1(62{z;Mf9djvg8A^EV^HT?wSpeO85S$RjW?UVwE{*%Q85*;s<#5@+4ojJ=MPB# z=hM`L{twU`qxJtF3l)vpal*ev|EuuE*|+NdUM$x9Z`A*ek9GYoVq+II@i&9ER(4`2UC*{VIQ6+I} zt?mPg*B#u9?1N#@twW_Nq5s7CNYDx}RD`)_=mtzOoc@Y zxY5&BZQp`{;uGJ+YTyzK6uUT3yyJ^Z*h@LRH+r(qMLf4?x>%>}cRF2G&mAIb3_|9P z1rUhi|KkulG)Dk4voZ6Bo}-$!uSFZzD4#ZvT@p2rMdUlo{eXPB`@ylRmoV1zxFj1r1r^LzA<+po&bN^U({YYn#s zX@Eak)j0k8tr_B{<}gLn(#&6@`oM{PvOZU-~{e zwJ{|2B_@95U9GTo5C`j^6)~d^oe1wWxDv^N%lXrepU~}?DvL%REMtW{903am{!W1Z zHk;lk${~#JdOpV3^dEsU+Vpf3_i&rOiYXJ>bYm|IVbf-^Fxd1xDKrg^HU+u>!e+G= zVUJ0@1C^xP^l!H&FslBd-cpz8@!xLKH?rQfe)KS#-uJdv&F4}ySj2P;XSz*~(%Hva z<4Tke*4jspk65F*<8Yhavn4?xCTm=}^!Nyu!)*E)3K!p|bEW(@yrbLn>cqsUlK2u3 z2UkHWnoa)|6Y1b1NEUpMKM&w1)Q!DQ&8C}JA#X*%3W6^HG>*jRrN8Ipw37C3BHi@I zZ?Ud~N&ktDH0irUFDbg|-M|@5`dXAW#CY(C^wRe+VU$I`CO8PLzwG58Ecz^RU_3aW zlYWWgs4(a%J$r*e&x=W{FtLaaq`tT*fi0XNDlPRvJ>Fo@V-N47A7?!#FzAtQQaRnh zB@_(?y_6&W9+oYTI!k9CYtWBDM#LBB@exxrcRYxs(Veter;6pWP~-B+6S9yIF1|%E zo%B}}F1|rm?9t?Z1m(~=>E{v?=S$)hAP#mzE1`v0iV1#%6u}^W-ey|P$Ab3?ov{}> zkzxwmt-$s4)1Lj2_%NTK!9QcWSgwca`%N}s^oO{kV|Dq%!55i7+-)popv>5c^Y6ww zj?4dP@Pplm32?C;v+Due-|>I*Xfmj9Ea8Z{C2}1cJ0L96+wg|PUdOjf_B~FxxFNyz zMQLV18nNDMSnpi=VHOM?diMLWEl&T}&JA4T!F8hQpNVyQ|8}+SHg#jr>g(t7@3lX# z9{98;4eR~(=MQ|U!n;x1cZOB-0~=L;M>b{)sFjZF?IK)}&3#JMzaqN};Z*Yj`==Iv zWBw0MUjWPYn>zx)p1bliEG3falVF-Ku9R#@YTvK=KL~DpgLO)u()_?KSMee9e~d?3 zdKs6h8{bZ1YkOfgOUC)Xmgv>s%_33%>~mM9{6Q_==9`RF0ejbHX3Kz~4M>c=KCDN7 zX3k9M^O=Y~JIIo7xlLW^0>+=89o#pZnbu|3aNVuRRs6ZR;}%erY!LpFHR9&1-7XuVtSAt;gsH@SZPFwf6`mx6LjISdP`i5 z^)9-R-%ojOj4AJrWqGkFD$CpOdN>S!z@nI!-;TC`QtmTnVomRHP=;MAAI#nf`Pm2A zTq}Pgio5Ki)?vZPyRe%BTGf0i5?q>k;4($g5~|&r-bQXhRpObtE5x%oC-tPukwIoh z_8OozfPb;f5O%I1FxW3wwQ{!u)pkv zZ!mQ9jZ{tWDMX5n6-^5j>EgbBpej4vR~;!zL@2Rb4WXt)EmNNr@!;c>L#$OJNmkTa zwQ;*KaDVn*p}mieRFfWV{;fJ5?SJK^;cC^~u&cVnJpa{d^W4{w;_Yhlkhf71+!yUZ_$E5>OVD-C*|pC+ z_qA&C%r}61mC4>?vbT`zT_pPo$-Y7i?u!m0`~u0UmZ4QYkEAam>EDp_WkkH7ws$9~ z?OT&n(@Sdc7v^OP(cCqatTAEhiT#9stJD0zKJ1QS2-@i~z3^Fy*>dI`U=&tO-x#tk zG0)wDydl+)*T5S0MLQ9`g6s}KxS3yQ8UPBpQ|X~6VbH9!SE{Mqv8dLCXNg(#`U&VM z9;M!b1u$|<6EooCXsfinO9>~#BM?go*W6HE8d?czEpY=AVLZ37bUB zXwKhAgRxu_(@F^oiSut+Ka~=pJVK%pFk9~?1`GCr{WxGyd;6T`Q&DY~4!OxoGoL#% z+YJcD&e;Ly1FVj}R-5M!cmo75h5_%1FlQqwWg|4ty9*VQ+8`KYq87m536|hPmH@iB zl=*T;iz5G7h_@T+q57WGNaAVc4kUilsz@3rr1v`l6-~Clu3jDl_*aq}?KE zM}U^=rRgsJ`@t>&gb9R$f1-MqZsruuuoAxEZj>Xq=p{_Zr-ONLDt@ z0;yQgYfv<&|8Jr@#bF`#qgdsAJ9ZWIHQ$k1MZHD{wmbL|q!m*3N{4w=73Wd^7U?k3 zV2zjN&zc-h=_UZ^5mS83Akvjqv)X=Uw$geWr`@AO#nJ`p4^k1~d!J`xf?uIxNs&Sd z)WBnC?&QKc=s8g@tt%VD_LDv{cXW7ENE^6!uo+Pj-}mS>;5>sI-oDptaAqL6^-OBw zhUcjX;WM4bM~-W^i-suH7=8?#agAXZ6c}PWk2ADRAP1GnqE;A!BE8^wMsIqt!m#TF zq-Q+RV};=ybjG4S^sGPYO{&BsUCpE-K9K7B6HFoABJeJ@+(7C~J>IasFb5VB(_35u zmWevhjoT3i6FH8e!Rkh0))|@Y1WT!hH*ww!nZ5tV1j0}o^0#$~AQR1pYc~5K6y?n(c~S*k}RQGhq5F{?P6i$ zr~ZRX-505YWk?iJM(7ouIIKX@;9UM(C^Xg`^q@lz&B5u>&@Ao1X>b7Znj!&(z}XV` z3~54k095%XB-GAFeHVY({5|>0t{8v$b0}Mki+>ZAVepr4qiUl3 zFqir3BCW(2dUcn1MQ@3R^_EyaP_FySRo}zqXIG>_SRPmIT`wFh{i-|jKj zN8HRo>tGPQXSWCTOs_ry{xVhb-W;KtUXOXpQwEQDIX&inz4a6P8DfvR%$m#m08E#D z&y@-(G(? zUHHr5q`=|+a;HxJuldU;;3X*KKjSMS$;>TGlfX%CU8cFObdbl#bCCDZL4NJO>m0u( zo#V$Yq$1-x$5}9E%vV0WmwhMp;SE!)f9xFZdNSHM?jf^S=lEd|(MSp*1qaWq8c{qK zFtKyI6igD$?cUwWT)Ts3Z6i&@Id-7hgU1mU>l|l;S+sLJ5E!F#+0`~4HaN#`11GZ#6OM)u6Hp5%H~ zcp!DW9&hNm<2c6-uuRl}{$?ZMU?N{pH1yCZm|{l!!Qd$5qdUh>{W^g#c1@W3^mv0? zd$@Dlp;N^csYeKzTBXMuxWsXeMSaG1Z=dhdN^~kp)LW9dRIBbBj}vuCch0c1C@LX% zi%k6?^b-6L5=B^O&T$!%2G8No1TulQVCgL2wO+a^!UnZbC$M7sMj>s2W z-NC1jbFdXZvHQ#IA7(lI&pQ1Fu(u3f!JL_9wj`M|CtK9+$yV@CwocB$0PeA18P}7w zAq$w`*P>dtxcu0Z!!h33e|2!{>_=|y4>{5|Y$MGppurm}Vk1~=U|>dv6*RcE&hC`# z)mt5yalP+s@5bwNb=m1IY?`G%k^&N}nsT2K=vSxu_XY=I-C3M1QL$IrM-8NL+th!x zGBwbeKGp9{$4<5HvdCP2pf=YpF>jp2S0H=?d#05HCh_&P68|*u{@U5jKp%8o#@E*j ziMR!eM8_c!-(o`|u_@bEPn3nz16hWJTrpioA88PdOTdpq&piW0q<&hpCmEnq9>V<| z7Y5lePlIjtI^W!{HoQzF>OV9L684gG;&n$iH&MDLTdX*!&r`Wh_InJLlBaH2W@a zRv2<|229Rwnwr=Q&zzjDcBfhK@{Q`BoaMx>iGQ-y8CYy_j#-SY^sm*8U#FNeOLNrj zQn>gi9puIvOhKHdIK#Yb3HSsvg$_3MB-x)-{Zp{9JH@7Md^bfkJ)`-XZz%IliI$~g5?+yaiSV1QvJnhfVnOa=CVhH<^F9iDSKWdSJC)N z9T3Njd(PI<%SYCi{3m zd$G%(*Mq|YKgPStY1sJ1o89_~hgJ2rw=pL_L<-Dkf_1q3`-4|wXI?X!$l?DD4v+w5*4oe`ggTH$ zv@ZzN+cBUT**~GQ$!_pd0~d?;l)mTYwj0f%*E9|Nt9V-xXA-J-II4Ikuw7~c9QpC;R&XWY#qwkOUb!A(*wJ;2H zrq;9@rYg(RdorL()sG|CuLhH$DK)?~$1)te54f@|TT@J;DU1dZubYAEn^0+)e_wDi zn==;5V2(#@kKEavc2Lxe_yoZFcp3U;6d%~_#J2Ml<+$o%WI;s;dh|3NpaMxOz9xCUEywdFqT+e=i zt6-VVmFwB;BU@10oqz+`QbaGsbXbUE44;j<*-1E_4cOUjqAQyeql*k+=ax6wW;bZBs zMV;$n+&V53lBQ2lsp)4$puQYdt3Bc@X}; zBTcXTVm4*DI9iImQVQhWTVLj549}RaMCK!DO&H}|fkj0}dY%WnQCF5`;nNW41wMM1 zz0dC8DUXOypAWqZbe#Psj?p|-xtfN>E#t4CT~QA9zb-$&u`*T-TnY=Bi{8Bbka;2c zYw;NtD?0quuo(^p?7gk*@L40R{d!KuV9mNo)b*!B8Yj9*ah=1{M@Y?`B#avz1Y9_% zm2U2MD4Ei|zkbw447eDJl;BMdhA|!-Ien|CRPH zDi@x%^o#eAcH!uyUs!&mD9zdar{EH7S2uoedi$1?rC;22y-D%!SoC81malu#+qZnv zlia=~T<-s-WaGytmaNPl%TH}!f*_7!Vw>gB)0y`m2wjc|NixvwhpL*rNQm>FD`%Pf zuc)<8p17>%e05{!l*>P@^6wsf!2EE>IhjwwJeH?e_-w(m{{uouw)Bu0V)_5N=!y0( z`Zb^I-=?nYue3jDy1`GflI5!Dh7}4Qd-}0J{aRjc+h{9U4*is2uc&>`H_nw)!`K=# zcicgrgIhuCYpEnaO7OBjp;lJjpDpH9{`<4rfK&q*O|j*Md0z2TzGGGYEfVbp{uV*n zqj^*1pmn3ZZ^_pBY0OF;>ENJjby$=#%y9nl5G!4*AlQ%--kny*tk8&yH=3^-EF^kN zW3PQ60?;b5>eS`=hIlQ_zO)2>2@5_2ZpCki1>4M#~p)5tM*&`o2xt?PGe z(v71T+3<@(ZW^m2PBy@4*|>Z92+uKD&qY;nTGCKXx^Y%l@c}fbJE+ZoEKl(#;yOie!j`fB#xE49I6j=TL7W6C4*62a8kKNx;b4h-L&Ld=!-4kq zgZKRbQ^*XK&SBq=R(1o7-if2YaDMT{T!!20tMlpg*0EZUZTx89%k0b1+N;pnJ=MQAJCp%N&ov;E5?-yj^O_YH-kge8wO z_frnV-nB^>Mj9Se|9JdH)?4WB2H%Hy!V^iy>WI;IrGNYb)5n0dnm)>DzPi75mrRn!wP0zZ$-3GL@x0RFA4a*z&{+ppVSU(9}PSOaj z6TR_Xb>%cP%WE-BSrk_LK8u60CakVB_YXeD+|aQkU9?Gi5B{6`f#_Ba;zj%Q{0m5r z^3M))>h~`6DEw8fMZnq*g#LuIn{eJYFxg^|0p~ZqaflBaspiR8@QD(Z`tDtiQI~$W zoWqF!Gc|yPo>mMfk7c_B=GEXVjCsI86?ib)3TjRPheoE!>3d7sC!x1Y6@eP)$W{eMycb}$+>)IGz-+lAyNKav3|&0c zi;nkxA&d)S&uqaG>pf8{xf}zQT)zR!Hp$XTmh&XbCdtxb9z8g_{Vz2D74tF+ylHc0 zU^{@%%-6ODiNsfuG6M>EbhqyTf|S_kyQT|kN!TN}6))uk4w5i4z#V1;R*l$(fYT1n zO;Fbz*>o*=OxR}5!Kp*@Xl*<(kM0n&jB0aZN;YFG)#ht4lQNHXVffT$fG7mgZs1R_ z9EKpY7#VV6wYWm$#M!G5C`aRO&e8K*kmLb8;qZ!u!wdl_2n7^^WvVtTL0eUSjq36* zcK;&*_b_|Ex)=)7WFaF~p*d5nK{YyXivKUGMP}_Hh^#e1#Bkh&XW?3E3(rwLB^I8g zPZ_bd7{^juc#d)`vGA-V5*j&@>)e6%<48rJ#vplgH6@af{X>m#;j9Z=zJ zoWu`S--~``qVXJb3Lrtn5wee}VZcMjdU@_o+Tu^xgK9 zT=azj1!u!1dWLYz2*4qv={W{KC?6~l(oE(>TF!vTKF{Rzo?~KGZ;JIs)4M`qpzkl} zjjK1&jVxX!lSC#_`$)(nb|0xXlSv|zL|aG(L--z7rj6DAJ!Zoh{~NzEPEPdsSNoUA zi{N;0Q^vnNoV{gd7plKIc`f>jp0l7m)t}r`puJ-E``>bzEW>OunJrF>Ig{NlCIn9O z<9v-Sxo3bHID>0wsloXm6<-$OjubYRI$&XnBGzNE+`jZfu8HC}2<9JHn&p-X2AMYC z_?jPkFkWh461YzjAEv3{3JW$_%CZ-WRtm2S1Fj2itpGJopaRFb0@YYWd?VylEucQOW;~d zodIquyl>p^G$s^Fbc1__GojWdT-jntvtJ}3WW_p|`GHcbLv{I1f@_B@o=4xnhgpI{ zenVGpFML(1T`t{k+TftT{=2nSKVG)_x3Df1eCIu!kBV&du%WX)>2cyQ?bm4Y*e319 znke=p@FvGIU=%5DJu;(GYa*FsL^>8IvS+|y z`ot)Hm_ChZ`fF(tsgi#2S}3h|zU(f81*DYwH<$Raq4-jXe?PW`w0(GO&UUk=M>!KL zg|Xl~sW~&5wlo_Xg{U5`U-@Z!($xLfMFdq7Vr^yq78M(n$ma6dW&X;!AWrd@&by&Q zEG{iCt2qC9E4>D{!U7;*N$A)CRU znjsE2K(tI2;F~)|0WJ=GjuMIQql&&K^d3NMKbyXO#2dqUrBGKvvEG5P zBpp?!`d{E4Utkh?b?Js#Cb>DPtcG2gEtBAkNhb-5gqCR*v*iMKiz)frZa<43b`rMS3;xV)rmD9OJzVGx4c-GtQD)&6ymBF#d3b)Q$G~ zCB_~VugujbKD+3PSA(97V&|XAwaplthuPJA_HB&o&Muw||I%WRG$BcpPg`m+Lgj)hmxY zrE(8AQR=ZEH8x)d7Ac~ZG{kurZEKgFDLM!D5t~J0@WXcDe{TzJs5Xk>bM<0;KUV9k zFOA-a`#>86Wn9#D+z+ArN5n^hHOTUZ`rZzsh|DL&UsK4!d5s%0*_c*xx!J zz|@qpHZ{QW7-qA@ktOjt2K;$g%yOU;5D6T{2pdjWsb~-izn#?adp3=gZhSQYD;IhW zvysDG<}erA5PnZO_f-U^Infd5XiQqfMbQyO$iuDWk>s0t7+=2!-IQcG9Vx}0#)X)k zN^VZzSVI4K4ltUnF-kTsTPdVYx)!UC`25r(*bP+7lb=O@sTObaO;o#6#K|jsgQor3 zEX8~)UfrJAorRX&Z6galh=Su(6i!DSOm5w#R&q6Q9_j#_)|bke<9Vy%H2C|1u?;>4 z6&9%sSwyYJ$cV?F77ThG*IorikM#@CurD}?)gJwQan%LiU@CiNme)l;ugfLBv2!e* z?aw>$X&wI#j^Nz38PZczMr_G}Pb0|IS!$rc%7Zm77iP0A|4y|pPnB7HWAMzc^nA`S z8`GiZTTh)@ym3*gtN8VW=R*;y!GA6zX)mSkWvzT;f2JvD#1`~TaH#RxLpbP%ovsf) zV{oX`enACIvZ;ZEIDE!ULh}PV>`&rz3#s>yLUka2SS)86IDrXg2m%RnTss5*FCq*vv18=&LtGLg#D4qLsR(n@yBnTqy ziE~tksCG<1vYe)%dIk#bTgB`hoRnDbd5e%Z!y(qA`Th;wp)*azzUrp)Rt_KREdH$J z1DAgzU%<=4C#oQtvwdsW_p)02Y0IZ-;6hZ|1gGgah;n5SSFc@x%hIQsUY~0ES`AFE z`D~)u(*Hg+kfoY>poQ5*YVz!K)$~`qn1W@#+5K1HgG2ZcRDiePDYO55J*zf2pyKsQ z@$<;b_<58sz6Z^>i773|y_tBV6}<*)>K#$4JGK2E9ISxDYk|Z2L*M^FF<6{((QqRB zVf$|tU1FJHT;Km!&I>;SMtD~y=DhHMlQ{#?&kNrR8tuGrvbqjx784q7l77p1;dGF} zr81rue(NPwEpA=x}UQhecsXi z9PuK-|F->{nW#F91>DcM?I!f4(fc`9ZZ+)ZOu8BSIm85qcd~;&x}RhCnx9yb{h$3q zVLy=loCj90vSaphzNAAQwV$&b))U#!8M6|=(fu4O)3^T1e;lp1`oHJ>oQxY|=L0tunQeUck;UD4&TptJ;24@@3f!u6OjHN+t1;wHTdQG+FQ{-sQsMs8xrs5JhEJC z^-=pdgYl)0==~hlr`*pO??;>e|9?Mc50v%;S})#sEWN5DzFzzk`v17=#W&M#^mW-6 zxvC6Da|MyT>$CdW@c+nqvE_QrP@~q1FMm09`6y(R>rBekE00_+UIGk4QI>h2g2RieZ^^qU^`r*gdX)Ppb{m}UlT1eFTVfQkS9ln0Z zFRna@6J7t8)(`h_We^)C@zxLV*|q-omTV?|1QpsotJ zBrN;r&ruB=)xc2=9M!;44II_LQ4Ji`z)=kx)xc2=9M!;44II_L4^;yu@%E^qWSw3% z^U%n?4$A3$=9v;r-o58&;hu9QtgyXxH}NXo~_~WNBP3xYh!MJ0w+Q& zKSW(kQ4j%JDem%UG;zFaBEw(FaK=TYE^C>0VQr;%g4JqO1njP>@KrZ9*H5r2m35xWYnnaPO@;Z2d-@bl ziOV^8+SKxCGnI=PTdmFBD_XrRK5vz^qONv9L%p}bXU$PYTYWVZK5K1@wV}~xt*|yV zF7h^88>_9pMU5jBpRz=`NO4y*SJc&c>#WngE#8Xe${OnwZ*zmUuEo00+uTyy*kDyk z$BePt#|eR^R`{B07h8SJ6%8#-jV%y$qV+6m$)z)uSuGU{0G@fWwbtituJ9pc%h}d5 z>$L=p%~js!v#l4OIlr#4@^Vj0?UmlMFHy>;Pd{(^1VaLAeQS%)I^SzO`z)(nkCu6! zM-~x+1x3J|jU8XK*370nC0*3+$xC{T4><05NwZOi4#wAT9h znlgIjwW2JF%uYy*ijWr~CQ@`OMc0uG5g{})**(izC8`e@R$ks>o#Zaex3;u4H8nQ- z#wznGT52mjEzn-Wf(a8mQ$3Z?kFUA4(r0aGt*a9iv=DVyr9^aldT!O}ElVov##UA; z&8-c}jK;cFs?{p!Q2G{SqE(rW2W!sMiQ`&E!%D0r3l~^@D84mEz*55{3!twBq-?pO zwW1kj;%#njYzDn|ai!N=)dD>>ds}K6>#AUB6%90(N}*wEi?3?JgoWNpAIOC!rOxXs z7K>`k8LiB$@mgD3yq>1oh6Znyr>3#R=c)JBH#RS^N?xc@5L8(g)%t3zjE5=KHlY4u z^it8(&!-7)C#=|z zEiH}Br&&eS7?bG+7G*zzky7IGdFz{eR$rsF#p{Em)%j|h>b%y9N@2`aG(@P$Yprao zudisRvMy+DY;78AEj5CgeX?$AQAkK%0cjf+fXn5gRawEqySS;Ywz3vEXgX?alI>+I z3NGr_3QMb~)5^(Q7t}S*m#~tPGue8I)iru7wU9H~de&K1+i2^kQPz_rmC%>o<|BoG zD1zxp%DBcR-#9A2Zd_x7x1yDg_;Ixnwro%O_;!_2@?eF%-pl(Q;x86trE{H)7vF_hEbZda%f4(ZpiysE%)uqc}(_n$bk1n@P*V(4Uua$H*o6T;^wdL9JZ3VVM z+jv`%t=Mj}+wHmbJbS*qz+Pw{Z!fYJvu)<)=H}(*=N9A^=8n%T$}P^b<=OLc^YZfY z^9u3`^Ty{D>1@?m6g1mzKf`Wp=g7F1K1;vH7 zLVICuVP0W=VL@SG;rPO$!s79^@%Hh#J3gL9^*yu1K?Vym}&VR^o9Nt3sHMxm#CR^7sKkEUVW zeqX`o(|DoX6tmsbS6uG(wA660^Y|(jh&ba+#Y5%zez4l*H$1wvVzlw>0x7j&uhTUTyFJ_p~DyEHEtP@p<9xIlU zy*Lghv?%S;>+My%gvm>9@0}7(m*o4EB=UTky!prefEwtULg$_NuO@j{;#m4Nlxwmu9 z92y)3t%cL4&9He2Fu6wysI9LrpEhMyoxqL9#J38Ac6qs{5@X7EHVV>?Z>aKMMqBT6 z)nXVy?2HnR_IP*&<@aVe5yOk98@*nBEX$$SOS*(t%P=CpJs&xWNk?zvK>77_+jKrL z?ST9Fymb3seY*z7wgVGr(n%<W9)nW#{B(UR#_1X3 zRfEHYyqfOS!g>Fxg-v>RzXnJ2q>nFg>bdHFww@tXum1N+x%GO}^dNHlm-Nsk%iYpc z+YDD!(Ogzn?fmK9E2dV!v5L9a2aMAO{<_-vbX2YNMluoK(&VkIMZa>T@Ef&>VMT+l z1-uTQ$3_#`DoaN8#H z{n18;fb}potIi!-{I=J%uuJDF@w&V^-lUgI#>et2>K~0sPC7Y_+Q?A)C~Yz-6Kgb- zl3}V0`^d1L43ClFC>dJO7LNfn^?Dw>xKh_x4CV^>CWDLi%`h`sGWnQs$J86<=frFA zuEXO+y;{JFX-~w>XVURG8UHBnYJBH4RY(E;9$M%^&&1Z+I-j#)vT&i`UvbES6YYd( zpfxLf5mwe!w6u(+M3L57AbK>}iH_}cZ!~XIq#dYG7je?cmR?ZN++4Bba2A?RFkym- zRwhI8Yr%(lq@JAkB}_bZPdyJ8aI%1Lh7xfD#Peu8f0uHyPIP{!;n|A&T+|Hn8Hjrj z?!map8`Y>};XV(y8~6FRXW*WRdlv2?kc)CUaPuNe*{uX{je%s0r`!yA>9|ugP~x<7 zKax0|CI>L(M}4+&!1r|%=A|PPu@&_24G!ZCd#ggUE@0 z>~0BUr9MaCrtD`yD(dwFiPLmR`_$}>{Q%=kxOJRW;t(inK{pcNNw`VNvgo*z@obg2 z-$}Y`z@%l|mym_<@wkWKri^@^gp!R$cq(qnehTg%$vX!jWu}~CaFd45)P0V>Dpvn76QkzTXA#~&LnK)gRr6>nwEEhDGb0It%IMNWO)9L9a0mt&`cs;!e9OKyjnVvMn>2zP< zNw;r?U!ye@;nw>E7jV4TC$Y^k-BkI^zG#|!Wg;A@13@DI=ftwmlEels%E{S0Iiwl-bxP%m*Bsjyzaa(pKUoO>DX< z;HbAm`FRmXn%H#Jz_CserCWeF(!{2#iIdJKXD#AM6PxZb;Ajtt^1B>yq|wvqZK)18 zmYsE?x5Iiovs{eR=^F4%-LgJ)TqB-|Cq3ICZLbM8?T>M^t1A#T<0hWE=D64*aj~-a zfM@xbhb~Jio+&%y^n4fMnfjy5I&Kl3i6^}-%VIpU?J!Q4WeJ{%*JZg<#P#ZhZJILv z7&ql)JlizW5vR+nr@sm~mQTm)>8}BfaaZGJdeRW5)3Hy`?VF+ACzysMpRoc&tjpQc z!{P0C4qgxrp9W)2zc3swGGlB*=th`>a1G*TBYX^D*6eWjAi^AkRiIytZ+F~*aOd1` zxIY^7;!D5>;c|oygz1-t!}AcbZ4&io$I}yHvFo^IV!gNnK z{37_TY7U2U`l7A2fFHt~_Hg(?gtPIodU`+fh4|QkB?U0TY=kukX9K?uVG-bM2we!X z@Ii>>#Q!WD-idJ4&%%p-ADM3gem24mgf$3PAzY4d9l})zpGLR_ z;aboyeh6|Stod^|ya)Kj2>;G-9rTI;YBBx@Nznkr|0Nu*LAVZo^TQ4NPK0#`m#;@X z0l&Bl@@4?u20a0P5aB$)YyKJz{|4bIyu0@p@M{omXZUP5oPzpW^Bn3G@dtN+KkTaJ zdB}y3*I+GBOjk}zY z4-6$EBNf}w^nvYqO-lm*${28a_WuHVsrme%BH7guO(J_!`SjfK(_vVYj;oA`s z*PfKrnUP+eL3-Y+ahJ^rhhc8wVwwkWJAh-`m1)W|(vcV-<6p$R8u1)+Wc-C9zBD79 zG=Imv2k}AAGZEey8_C;B>heR|i~B zr-QiDGtx!g9l+c0zC8kQG5sn4rvazynYjCbTMQgbQ(V-27jQn{CJ`g_S1pP?f7G%EVs1Ba*S(dnq)&7hn2aD02& zgSaZheOvu9uXHHl!>)g!qHRv9khVco{ ztdlvY3)U4c)0J+Q=(W?5_vAU0WWDdsM;y!a82V%vDki1#BL{UpkxH|cWz1_&xiR^s zq~z?R3@h5Sh(Y%`kLl*(ei8GMi(%9$JCP1~>gn7}cYRXQb9youMoETS%w=vty3;U^ z@ee|}4_a?rNT-#-D9hxG?MYFxZ0Z|d z7S7Myki!R_bxXqGq2xLE)9=J{?YHoBf#){xRPiSATL@F~_r>E`mf?;PbDSx@nC{cS zFAK7!^M!vrr?|l{s-0b`H<|JbEBX@9sGwgC`kN7^)WprdB*PJv{~ZbPca(wOD)8C4 zA{>5+<+&w>A1a!b*Jp)Z`(sVC3fP9R>gTz^XAk%^;fod2ead56IfQx8mcWNhR%#}E zTatl;Fag`FXiRB_jXd+w4o<^-H4E)2WoL|Rnte{r*b{A^YD#=%??L&( zr1C`3ITv8PTG8h8bO6F)C0ddOkQV5 zF6ul7b$#&n_)~b)W!B&0)lXSQTU4DVT@zmsZOBsS_A2n)jB!kq!IpsMcWkq?p{K#G z>W|WfTygpR(>6q3rt?v0LmlAxPudXcstP>5dQ6t{L5;_Ett*!6H%R{&##5pDZE?%> zPw9Rzc;4S1edgqF7_N}QK79(tLgF_A-xY%|1O7GOmp>5>dr3bj!*WxSGb8K9Wb}V4 zQ_3@PR;0pmuJ5B}6t$;K&Tuq{nm6`HXjEd3nvD$Cf)A=Ha$z<#kqhp}4d}*(%;2X} zlnDc#NKxDu{jrbon2GRq(*XRz#rI4X?MP8JB>f!*^rw_m#I8$81?KUTn_!hZmu<@6 z;^L%ZUNb4@-=N`=cP5+GB`eP(!{<{6zdAVWy;Od@{jN0I_F-wyrO{t_A${<=L22)& z^TUdF^|O67B<;C=0)NclUkprp{}_QE(C^lPY4;BJrRg^#)3#>Z3esI>^GlZG56l>) zzZQ=xEO@-jLhOI>aie(LZb?Owmn=CTvs~LbQ28no<6dB3Djqur-T=9NG1Rhgm~!9H zJVkkZsO68tl#hmvRg~MaEWaP7{58vixTlUw1-kS2RABym{1`mHbNm=Q-!u%Y?i`j1 zl3x!a$(mt;WZ2-PKkEC}VZVly9}lcc-(viwtCwLHd z=ZUF6zjI8k!SmZzFyCiQ1<3&`NxrfQl96P7*T}oUeB()$k47mwPRhmi zdQY~zIZFBI$*049PPRNVO4)m|2XUWdrvm-ZDXGA`cFGt$`$vtz^Mj-M;ft)JQbDq5 z6iK#?BFQtOUFT7bldnc^rCmJZv=(<0&J2c=N1IOID&8rX=uRC2 zwwO#)S3;^+O#@y?GQDr|^={*V_KqY|*mScg{f5EH&rRq8x0uqe8>~EI!iKH#Lej9c zL()D@dKu4~Q-=R!aN3@fdtg7;r}evSXxeLOnRgCNJCHVD#n8ST>AwKYoqdnFWoX*H zeLqI+{mi`E^z5<9A5<1DwHplmtLrYN5uXIXxNvh4UVXeY;84vkRu9PdHghG7}|M<^c+ zOGV_LhNlAm#_%zC_K(Q;bcAxph%vx)kATs=IwBS1yGM}loe?AqjW`MRcEZ5BPk;k$ zRA;m5WaKjij-zE+NDP1EztG z^i}>V$=uync|LhC))*g8necR9<*C%ctNSWHQFUUEh9) zUD0<1IYOp9WgParEPv~-{3^NM?*7VuCZmg1_N7h4^MmPoQBA+;Kk%vk%ESE!ZtIT& zSK_Z;A>C-d=@Lv)-ZL!&qn;$oUz3$XNhhMKOCGpBS-C%XAQ0=5_er_Yjz@}BnpLKL zJ4~j#P0&Hel#02@r>4|5lT7V=HdEl;O<+fy$ZAP5I z|BmdAk{#8+5!HY;o788Dy&?A*c9AzBdm#Y|VbESh}UY!?2xWX$y*UfA9i7w5pdb`EohP{+?cUW2hTE3P)FNPRz6 z&;OgSR-T&UH2ScFb?1Grgx6+i@fx=y|GQJZ-xr&6p{(agGQ2>B)iPWt!|P>un+)%f z;lncAD#Mp$_?`^+%dn4KEB~=F93{g-8BUVn1v0Fb;X)Z+FT>kpc#jMpmf=DMVYd+rFYqi>KV{^yaPA_N`p#7R$+gMxvXo<#K*LWE^ zXZmO>-iI!6JFR7IeDQ$qb&I#6t?uSVtM+EJH3y+>Y|&UGFUlQ1IyTQ(I2pa(nj}Xo z8e8>A%I3KE(Z9;XPp97dFQ=Gz}_JL2IjivFIAF;Oy3+UoDkMB@i4 zv-iZ~Gf3eUbL^Fj)p>oKF~%jM*Vo6FX#BB?LykMq_@PR?{x2D)MS^B+oQS5!C@05{ zX#DZY?1AyH|@ws;y3d_4P{Cg^+HyZ$~~wZQlG5BhwmUEnj6d9vTI z%6#vVctzspN&I?=*Wbs}<#{6k{*c7$@AK*Nwjr1Rvb_5Hh59^gl*H@r7oH&VEtYuw zJwAuT&y;xmeZzqg-yre&dxkpx28nOlu5qwR`rk=>r^MSNeyham?@4A#Jl2RsJ?eg` zKL6e=@w#8CrV4au8H_7KK+U@Vd=t8147Q1o%G# zPyYIMFLZn6Ib^~AW`|$^GZ5D-_&M?T-vB)6_4oMo_O%6g@?ZXxL!{5v*eD-Kd~lnF z*Tx~dvxkO8KJM!@{N0j18~Awn&J+0l%IY&Udfk6oDEa8`>+?JWuj9~xWa8~SeH_>G zEd-wVHp%kxED5jK60fv71bmNg0a!DTko8$TZgjA5RbOCBP?RN*9m*0^o(7@6q_zNdB(@&-&8e@7MkB zLy}MId;G`2VUEYY0eJG)zvlpR64%qflm9_kt{jPf9S>IMA$I=rck)T)&V5?fFUIUCSv&37Q zG`!yK#$f=8$A1p+myp{kj>hVd3fS(kR`H9o+Q7$?e~O^bP`aLV2uhozf8zfYcdap!TvxfX0<+{{ zZ!8D_q-0HO9wF{@b@#j+De2jr9#6Bgv!m_t?t~!S?dh(WY47wS-PN-@8w4B0C`56{ z<_8cd5n3rf0+zx1BSQHDtb|3Zkbv`t7y*(E!CFv2`v)Q%Q25Td=Tz6@);Ken-jp;` zU8nB*o_k*R);R)KKO#DhuL(RS=@tKf8uLTr(0|sJ^BpPYGl0`Ls2?S82)_OVaGKxB zT!o+MJS<(`6!?oWU;K>$;E#Y1QTs1>75pDxivk}*?5_sk)E@PhrR(>FjP!Ri;BSHf zjoMidJOtV2>)i~Ohsdo0IFG|0R~V}6>-gL3yxfp^S(iMI0Z#o&UM;|>oV!m~_!XU3 zr0Zv3kaMMfAK+Z?A5}P=;w||iq48IL8$*7JZ}~8xlwSS6{FlJrSVsBB()#b$k)F!A zC3uTv!q;8E9|V0nIaT3j9p~eiXr5o0N04{E-U&F>d;N12e%8KRl=OF{96DD`*Y7ag z{*L~?nF0SFz^R?;|48kvKZPG!SG1g82b|iYeqNrDm!B8-^-+Z(bmo?>zt1S=+YASt z5j;6=ljq(52E_f9dE6HG#{qY>=amfjcLAq$eMj&oTz62RX`F8*`1N}Mr*Tk!Hkc;9 zRsiRD|Gf%7f5(Dzf5qjoZRfv9`ufOlc+$#aC&WLU((`M8Q+oB!<4OAG1g`#S6#gy1xnI{RoN|}{ z;n*D<#x*#Ezg0e>z7e&54(eAGYJ zuSj8Q0#|=gXdYkJ0H^j}TCMQ2@|{1^a=v2WA$=3L`YBTQ+a7VZe;#lufA#aW9Hqn0 z0#5jFL)Hb*iLbxSDCb`?;BWbPTaNlyQ+)Uk;8btr4{W`DQ&+e`NUmp_+z~4{w2kpQ8oGstyTJSwO z;MA|Xmn@vV7ow{JIMKZmvX8fF!MPif{^gHV7;;A7uL}IOzy-~74?b%9b^Pl#y~<0U z0G#Soe`-3O_vLN+7rtuKD;_=!IHgzre{Yg{p9WmVL_mD|lBDl{M9Ps09>M})dM^C} zy7KiH;8gz2kJ|Ll2)qfntNh=j^jH_Gl3ww4p`50zdiJHocC+_W*bG zcd6)xU&?@A0i4=@OYlMG_wO>&znTG`FS+aeAmCJf<@as-|5G~rX~1cIe>*Y1&t{ZE zAMVk*DBQB;Xgf~;PUEKjA1}(dT?U-$RsXI!FQ1U~OER8IlKz`qzC1*3|1Rm(-=*St zb;{PO{%{|XJWm4tU?ji({3OGHw~tjA^f`I)6~K9Xp0aT5*Sn_O<(y(T##89baVh5_ z;KZlsdjPr=e@1|xWJvtqQ&RqoawYcS3xM;u34Sih+ec>Xe9<|1x^x`g4> z>m344<8U)kzT2NVc83kQKJN!`4h#RYUM>YGr$JL`&~zF!lLpPDK*cn~0ZLa`OjlS; zS6E8-r<~4Mo=r8PoNnnNbKAJl~Ud@OrR3YfKxShucA}w~0^z zjaL53fPB&q^46zyE>Fc8c?rkvI0-=BjI-eEq&@wz7?JP3(GGENmbB?EUJaz^l4N|aCm!Qo-FbohZAm7Zv zZ?V4xXZ-N$9CY9+7iT<%XiBSRtiOEI!Ze@_qpLX@)*bkNZefI+Dae0yn?|6q$K$b= z+a!YUpJ+EnO>;ZS@nmTa>2adaS3Z+9OgFa&y>72$F2FPUa1SWP_|?2z>h`WwcVivTdvi9^ft~uxct%{4 z?&GkW=06h*w0zs2S2DfT{Fcs$mK_97J@in*9-|(GCYX;a=&8Bz(W0E>>61G-EwXq*y4}6`%#k$Q{soV z;%eQmuI_ep#LL4E4}x&W33^6r)Je1$O#Fme~(W#A_l>0ErLnUEMZrIRr(6L7pQiz-6i_9cVF7o4-RTfJw< zKPz!6sx)YbG10s11AU`&n=`d@hzZqdcMXL{qWK&%y)^meJ{d%6>Rmy67YV?S!u7Q} z(j7Q-3@wg^*?_3jHO)rQ+^$r>v*%CMj7OqddeA0K&gT#S;ar#Kb`1+4BUZ=^LWK+= zfgw%^CrX40Az~3_2E1U>U70lTL!7T2zbk9S6mb-Jil=(=l3E0v_Fpc_IWkk7t`(Ug zCjy};FC?U!Y&@h+qus@*)q^r3gzdPBqlp^z~#N(F(MuwA*T zSOOrwsAd@Y!H(%4pO4{GiGggO*|4BwZy=}ub^j$(+&;#N1?E4 zPgbkDtIa17<%iO>x4?gCWsK!Al{zC!6hWB~I1P$pd_grf5N9tALXpFsaAqpD#$AY$ zwNPgArfiUkPzdx&Rvbk8Rf2N0%s~f_IGhKy{7@u4+FYk}ptGi@psZPk4%@IMP_>DGP zCWmn(Dq{^kY(cRGlb!Yk4YZE^y+V^ovHlAypgW`lG&b719^^xqgD|Zo)a$DW`j1tI z7=2dY+Uac}fG5Yab5O$Z7#);8dj)1MO^R5F_MgbW@j~KlA059KF9AvD~3%I&>TgTJ}+N*lrz{hq%mnq8rgR3-M`|dVz>J-EyXIh~f zLQu26Hy%unt*dN{YitZ-h9S@~r!t&vs<5bgq^64Tcmr_>QM?i{cucj8)qH5)ZDn}` zJ#D8@i>%>eww_?=5yRzX#BdqWnUX8whqLhA=ERuM-J2fuVcqjdT}J3qL|$g>YwfHi z<(LRJ7f#jsel>}s;Z$7RK{-`al69{Xo|+@Y^GvG^EOta((u8=@;pYpnY!tm0OB4LAF& z>`DY+B5^>00Q+jGYnB1+t0E&}Cq%M%)Jfu65o@uI)>yB0XG#uhlseQ@bRff^GdAwg zO^a%H`%_^k~*jB{ky>{EFqfMWyKM^4C_zWbPbM>>m)@aAKJ%jK- zs@~e!)4CH4n;PuJ2Hhc(8RQ?Nf)$E+L{!e-qvE%&q0E^J2}&SZIjVZj486E({IgFbmwY^w;PoTO&?TbpvBy> zeGZYEq7u7TH*wXpHWx$p9PruiD%ym$^yFk{XFTc5B9vT^6(`$1EC*s*f~9jn$=VtR zos#wINF2#d#%a8j675nRZxJ;c!$3zXJc0omdb2lDVL1d7s_i2zhw74u?=EFYURBFp zY3`t^68LG(o4%i{mPqECg?cD0}R%z!aak;nd%s*mtCw zKM;+Ep}U2Rxt$#o?s5Z6JNl&L(}AFrXyxcg3415D8|iiCBe5z89iFyW6SlzA z>T|T{NT+=oBf1(4ERIgt!kiTU%?8Y(nRpeU$ZHPGQ|Rnrq*$_E*mEtakrQ5H7pdJF zV#|BzmunCkPN&WDl<2@x1D&}T_-rKwql5ku*H-d$&I9L5Hkzh69I?QH*6^NJM)-8% zFJsA9Q8y~xIt9?F@$ot4x(%F2Tw=4Wm2ll#d(~Opza;G^z4~42In)I1IX1gG?m3pw zk?x&Emr#UoC(dgrBcGUB9o@%`?cw0OT-F|rid^G!J0d+E7|p9~2nB^9_Fes!XieA! zfV2#Y8BSEiaduo?Y{D{+1z3HL$tGBf`{YQLjP%1eio=91>zvg>_Bkt}FHYSVNMiyR z(yLS0C~9oQM^h<)1J93?RbZ~n*43*9C*p>^E@WPui`y+^F)+6QO=hwW-sj_N(Pe+n zH1R9&Q@nq|yhv$O+P>zZXbJpvFKtM*Lu`Y9i8#-VOf|80Jg8!Zi>3xfgFoz>XvbAP zW+Zn?2KC;~u-cBPvJQfLSUtldDi-pE8jejsB*1Cg$h`2buTl)FYkD5OiRj=AFP*bM z*P=%&4ka=UAiDZu z=7_Hkueg;wb$f6&PFj=2)av=pz8suR?^6TF4mJT|A03hSxrVe=Oa}X~I8n{fM%rcs z1;Qg`6Y5z+F?Z7s<`+;P!>wdm$Eh{J0v?GwMf{>Y!!S9_bOFv*46#eCm^?0v3y&(G zl=QHJ2dwjh3yqzTUvFRYNxrs19?QEp6VXl`)nk>hM7wGcv6R*2u=$CenypT+Yb9v@ z*n^In_3@#R*=-MkQ3Hxm{20=NY3l2L&kzxh;=_*9;`<%*$}mSKMfUWG8v1mpgKzI} zhpNc;ZT$tr@CW@|zS|4@JkFp#N}M&n)g9%rW4N8mxAwZoZ0`ZSp`tgt;C(p25PL9q zhrRFyWsqO}ogkMd+hZVET5ZSFsGhaS-^KS_1dpi z?f$}}!+v{!uh@(`A@4zGn!LX)Uj%Kp&=7q>3!6Ad68ZShHpqS~WE-TvsZrDew2yn+ z=(L;2-$T3cFVCIfhUcNM<>&rsrmIg)6;-aQq8Y^!I$0)ATIR-ZlBWqr|;j% z2e;-|f4sM(f-g%(ZNH||`#Ro}Pj7uzzrEY=yF=Gqdt<-lRED-+z#Mm;;F@3k`>w-( z4qex&)3~&L&9D9cu)I6Psm6ZEUn0c7rGCsQKQ$?t|094=73yCi^cP$J5SJ8VAMjt> zp@3cdh~`(n!Y3s^$LQ9?xa)VodjDu5|1J40_!hpert7z;fssq|Dd^+)5v^bS&d=Vm zDc+(9<(_Q+rzF3Qzxtoc-L@I7zg05IEm{BX0#5box82vjQQ?%=zrnX~yzP@sOTTp< z{v5w4zt*q*k#9(T^_xn0=t|cA&wxeqt3S&Z{)v<4p4NixV`~2HCi1Jl<@J|s28t6O zy|mp5dksIL^{Zdz8~`Uqxtsl+FXj~T87rI_qQYe1Cjjd z=eZ#HljEiN^!_mNQ~R}k^&?r4{1jt0dYz0u;5YdPrnRH_)o=7m|7R1Pko@}nzSgVl zdMr`D`j@`*J;`rx;eXAq_YWlUkNr?nui)=8(SFUR_oYOB^#`+l%;wj2Y6CQ%KCd7d z!L)BHrQO7sE0>r#Iq0oVL~0 Date: Wed, 28 Oct 2020 07:23:11 -0700 Subject: [PATCH 66/66] never commit binaries --- Cxx11/nstream-multigpu-dpcpp-after | Bin 59936 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100755 Cxx11/nstream-multigpu-dpcpp-after diff --git a/Cxx11/nstream-multigpu-dpcpp-after b/Cxx11/nstream-multigpu-dpcpp-after deleted file mode 100755 index 5e6cb97dd3315eca7bd9206e136f7023375bf7d3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 59936 zcmeIb34B|{wFi8?B_@mIga9t90tOr)#%Ltd=lW&u9hcxWt*R$MB|&GbpTq|Feo`km&z*Gzis4*vrW-uJgJTzT>XJDz*}+Nf_z#nREdzGI$$ z^77S3uM2E-&$xX`)!m(+&FpqxcF!!=;~qp$d0hjBrl$xg2GgnVd%$EW{FhPTsqkfK z=vSn{fBulv={yS_Q>8yA4gPCs@V|%fQ>D{5DK-9*G;+vEBR5Z)bgoXr&y{KDFG_>I zJqzP)4yGyB>NI?|rJ;X5O+DV9Mh+YF@^v@p^Nk~oH9MUNLF)m{G!8fRsd0u~gzRRg7rPBj_n1*!rD*6Hy>Qn7>*q%uLh_0`hiTGYs z-u5K?y(-`2avg_!j>m7`35n^ff}E!rM;SdkB|abd@e0A7kk8GE&mxI7j#c`m4op{FjVh9^;C{A<{3WUx|X(9UoceH5%T!}wuc%!0(JGi%K{sGe)vKG z!*^lz(%R4&KA*q4+u?AOuB{6;`4Od~sdb&(UmJ2b{Ec-TzEDS9Qz+>6yK3ujbDiOD zYiY0R2)MjU{YYYkTe`KO(h{E!nY69J1zLRm<~ILjWvy-NT{45V=Af%dWIYFrG^fm@2FaI6K9D>_+u~l~TT$Y3 z)y{+9!DSnZ($pFZbcEb3E>}%$+lui4_11uoT`7x76~m!&$ifG10(0E0O`$~&jsLjS z>?i@#9f9`dI)A{`(juz>no=k07zJCgsv1O8p1QS7T@D9xXs!zeeW4BQ0hG6)%{88i zN)l{JMabF=bo&GCp{BN0Ssszy+Q7P|R-eBvjG z_QWlQAa3XgSZfg`u0>k6;N=~m27`B_y=}dLfOURYE(SUb-^$kY$ktaA2zIsv7#Q5( zZ&qO!R+svl=MXW6sk^*gHA|~&OMG)L?+kPXps+rMLGjl$!60<_5L=X8$C7jo$GSks z=WlBb1-eyzYJz$7flysjv*QdCdgbkCYiPm*aGe{iVXiR7vi6gy%ca$AYS`eS(X48# z4+y&x<2HS=WwemOS}nDt+STL_I7-PST5zbLt)qoBvT&WPX^V$!7;|1z(6_F=GflE( zjdejZu27&gh^FeI06(_DqG^Z~u3YozD6?`LOq7)teXw(FOH;_#)Y9Gz%U<5l+3Kgs za4+tvVNIUVSO-fT=y1WHimKJ3EAvCDK4BZkcB*PEVfIo4sq&uJ5@_+aZ-CNxmv*sr zzyxZU$8A?a(Hvzh!XD{VTVgy>&Wb~HsZx^?c#2C!Xq#j!=wcb?(7HMt5+&?yVsTV8 zma{k{b$ep5RFG?qqpSiVs)q4$l%hYcYtbEj?Hu1~7#Wwt(GduBcC^a$Q)(FX#IN*Z zh5H}LLRDbNdWEiH%Qb=3Jol^yGXm96zPfy;x7JK9=2buDY_ z>s($xdMcJxS9--|fsWQdvrm?=J`nW#LJ1Y52h?j`(N(T%HR{&3!Op@!h1#0wV05R9IXc^Tk4ux4O(8q7pSib)gg2(h77_q0@Ye?EUBvUl@*s5 zOFZs{Rld^V(u%mZcy7$^m?OQbSA%*^vBQx7Rv1;QS641{`y9n(2?3>vbPkocOa(*f zOtBKDht`=`tINhZ>;&NDeFFRw#Y!D9m}KC>ltNHs3Oq3rjY+^vfDgm<*d<|11Q3pC znn;s5OcoSV;L8P$R2c~8pPVFQ14kdz%26q10P_Ko((^9wNgI&N6yp)hOL%2~?l~1+ z0yup0ZAf^s@tBym7)IxjO<+6Mc-lh0P=%5H7D0cA5lx_V^@g_2snSDLZ|c7Cd>Shoc~B}Q!+Jr?{@3x1OYKh=UCvf%S9_}eXb zn+3nwg4ept;5#k&X%_k|7QA(RaH|DxT~r^o;19K=v)6*3VZraS;19FlpS0i)x8V0% z@JCqi5er_geK2^`g8z(#{-6baqy=y6a0-Yza+C$1Yr*ShUkuK-;E%D;7g+GeTJUxY zo@*!iDzf118VR?=f}d%@ms{}1TkuW`{sar&wBQRZc&`P2q6NR&fvT z37@w;2%o%_?{o?e@zN(i!?{yJ#8*m`xSKnj!X>^&`2OE< zd&73@yJqi8x#mDl^uG+l>>mzgj_oshcjxXB1sI$AEYM>oK8L&QI469B8yObeKZfhX zKN03|D!Na=j}gu!JS^Zx2xk%ABH*2b+0{lj3-~_5>|&!s0=}E@1j0Q6{yt%fHQFxV zZxUu#8eJpcuMlPz8ubeJ3xwH4Mx6q_o-n({Xo-NYCCn}{Y8UVogxTdq^99^NIFGO) z;Lj3f7Z)A<5Qr1k5@y#H-7nw^3A0O!?h|kgVRmKFVF8~@m|azLi+~prW)~IREa3Ts z*)>In1YAm(T~f41z^4#qR}^g*@bQG%1x42g_(;O+dZJzdPbbVSC+ZaNB*N@!q9p>( zB+M=*Y8UXkUkA*tC7Lhb*9jj>*bwlGgpVUU`T@)TEMYt0{Q~|IVRjwSeFA=rFuRQC zuz(*Sd;;Mu0^UiOT|{)VfbS#Bt|2-k;JXQ*M7T%5-zR)B;dTLklQ6r0=o$fkg)mKj z)GOdG5T?nGIt6?^VVe4AiGZ&qOcNiq3-}7cH0{xR0e28CCTs}!vxI5NqoePO`X@{i z9^EhC3klP7NB0T1hA>TbbXdUW5~it+ZV~W8!Zgv*%>tfJn5H>8B;ZoQG`-Ot0iQyc zrZw6w;NuC?q(;{W_(;MurBSbdrxT_LjXDK9i7-uPv_!y}glRIPb^*WpHNZ5L(R=~F zPWWuXhJarrd=BB!_eA{@b`st%;6D+jDU9wD@MDA*5*`-tBZR96ZxQfL!ixxR7Vv$9 zU4(}Od^h36gnIycx8JJG}{Q?w|b}3SS{+`yS*fg@LN$;t9K0z4Jr6R zu)cbi?Kp$>&GE83{OF}-_*t{}xl!*5_nzU=pwlq-?C25B9{XvqYll7i6~lOQi1fP} zifzaBfdf1i+PVf=nP>4HI&>>hG?9mFCt_@W!_cmVI5F+xHbH;QE5Zqnnmb-O$J}u+ z%glJleDuT6v{(ugRSL=RnSak&Z$Sn|=bKl%_c#vRM{^iC4>I}%4QKDcLq0!oWa3sQ{NY`LRb&0TY_|;eKV`dh zN8gT6cKFGQc5Uz3iL(5LNgc_g!h6DhiR?oH+akaLjATi4HtNZNCyRo{3aS#Jp`2u4NL>L`o#$#BI3>ahycV*!F}%_Ew&QvXT;4Zq7Y-8{c^1gm0j#!g(=g~_zN%{EaAvnvB#;H~HY&oH zx5Tz>)DwO?ayt1yU0(MNb2zim1D^6d6=R{C$PC1hTpbzXY7NtoT$MzAA`(sD$_Z2& zu4a$t>Ufbr3a-BVw&dzHa!0NNL%o8>iwlcT%254QJ`bjXPuq&-F*a)3QI*?!Fe8*d z;vT5V9W=+nO9^)_Fb9~!m^VK~j43k45d&3IL=3{6hv*mu=D@{;<^Q?ddUrc7s=Tz) zw<}_zsF5n%Aj}avZ%6TV1#i=EiwmVyh)4C(mJ5htEP;*S=^ zCzuBna+@qP3uq25D$F-4_SpIlM+HZDV|!gGn-iLmXijKB&qVHg3yLz>4D#@4h2~Z~ zZn~_o!`EPjk;0xug-0~B&zobylgm-yLyHQJg7tZx5pB@4N-0mX;#Fwhr=zxvEBv0Mn5fqPU^}79&O0 zu73l9fRTFkO`ZG&%#tIx$ZuHl135px6~+&S&>XnEeHZNl?BIIlk!T0E2r-5Zo$06193PZ4$t8I zh5JG5fhO8L;ZWfk>Z>RGswez>q(8>|9erS)iWr$D*nx$oVK0(=d;mH#{9sIIcjUwj z`$QP#>kqsiboVB-%7ixjxA=JqHyFk6|mrRz^H<|4HEEJNp^_>7ExR1u!gkZJ^ zg6AVozeX9}CEMRk=Ued@+Kj3%R}0*E&PJ$`WaJ_y=ZY<=HFnWSW%qh!Pg z%11^}nth@)uS9+%82hP!&4F7Ap9Eg=HJ2f&>v0%!F)F65kGf^M2ZDrDF}d!+UDRi; zNH$4N*+{M)WvY83e?Y0t0mX;t9RnyjDj@PhMC}{4^>G`WUlu5_N+HK0?UD86EK+k} zH*IB0%vQ$61}+*%j16iM4CbO(?Z-RAcoa1_I4;3zPK!k;Q1#Vwe_;_CzY*$!g)6kf zOCFli^H80@gEN-kNAPe|0uNJTQNV+N{0|~~mH#Mw|JnShblp$`D%~(f#8T;w?$=6p z8dY{6=P=Y6K4T>8@fP|D@7oVvZhDy2kVszAT}_ z&xzIEM^yNc2^Bsi7NwwJ=>Lko+4>$6^0!Ec% z-hz0iLi_Hi7{NRV;mZa(bC>lW2)(G;w)JsgOy*x{+p(B-!`8o-15mEup}kcOEasT2 zGF;8D&Mi8Ncx|eZGrAaQ4MsZ=AMrT)LOfZh5YxOEH`!nwrIntJEF$l{BpS?fH;ivE z|AkdTF{C-0Q@iE5+{<8aa@h{oAgbdjj!xEyK}bShXzL%v7%$2j`_ylXY*IF~&%9{q zQ=3N+>3fJIMm`)Ho9>D~d(ioC%wy|AXJS-t<7{l?7>l097EsvtdMf@FYIMUs=h8O4 zOo_+qSc>>@ii3-;Rt@Rg8A~yrkrIt_=eI|0v}#1>Ez!>*6~X7S{^ux?7b45Rey<#^ zX4-D#va5UG>Ri1dat?(M?kg-uQTm5%H-1)z=j-s}5bh4|cJ)7J>uX2YR?rzLx$GueG?g(10EFd)G3ALj%aqpUUztp?WPM5ZJGZ~xBs^ujPwuLYwn2HLk;jd9&?9( zZNB{)^QS1y{>M5Sq%t)2LK$EjjmRsg3V~Y`{xEW$;5OxedL?qBNJGpE}nCD%USYbgo~C z<$ATw)e}AtSqp+R8IGc*CS|xHZHA9LFN=A6EW@9~GR)H%vY6C@$V-B~l*PPye6D{X zk8;|VB-|A$*SnBjkrchic_z`bSye1kLcDB1a+y##>Vo zNYr4{3}dwBI6$&PLB?55cO)!o=B~m!!GA)e#B_Ipqw(p=(ViK;p>PP%-TvR3gB@ex zeZBi9nhyx>@c7N_{cvJ1+i_rIq1pTFLC1l(HE$Tglpjr~`L$f$ppVV3VLsdZ8p_%H zS`lw0yp@~159OK_uXpalP}0_i)&d=$@|xo@#(&i8-I-xl?CX5P6aH!8?W~1+sI=)t z^!DFBx+p!#MbQPQOfJB`pZ`tB+toj2+q5!Yj>s{UU4$tS=CUOeD8fRva|h3SnLuP2 zDrF?oG0e3R$ZSzz(Yb@CMIf@m5)z(q?%>ga5bab}`_U*mgNxKGc@gXYlJ+t+_ABor zeB?-xQVI&0k(0ru&dAG*&Kj4|Po9$*nJSq+VX{~$!^{g8vm0b~g7Nfp%v8mOyA?5! z8OnHvNQt@6ICpSHyqJ-4WIckeaToPrGvrBH#?wo(9BBxfjdrjUkFpfr$#$_o!#|Y_ z164%Sz|mO1Wfxd65?Z-+0I4+xoI%DZOR^Df%I=Uuda&%0>->i}t?=;b(zhQAcIOYjZrV=4+7a*EfkoYrY+qqF0AV`q;QU^>`2Mi1k0Z9x zUT1DblxtF4?;>Chz?hsnkX_E0PckMDPL0?n5VApP++O0?An8KuhmhK@dY!_5JNz%f zZ)pLWf!fYTz>z^J)Zh&=VmOyxkzv>revwz$%qO1JX@<=L`elBmoeb_nI?OWy*L#u7 zb3gq1E`}fdVubO~Kq(Ioo*^PS*^9i+wuiYQX}Lf#72{1T^{KT94$emr1A}v{MS>(g z#rO;%j8w-)F-MToqdtu6@%$FIN;v?o49i7bryIRH#`e}l-RbejVv%C{$dQO8XBTR5 zHx>&IG44{82R>|A_tJcXl4oeI>+aiOZ1pUpfx^i>-!KZ2Jt}Z&qvr3*@i48P^_$e^=X=v2Vnw7I(J5Y$ONrz zGjb8aF%p;g;u!$@o;7C0gSNiY*g()C8$|_YKMm!A^+iX6^a%v9N6CISb0&-UU8zi8Ti z^gDCMvsq@Au-&5lOtDk_sB2U|Cg_2>d67%xp+B=;?kHs2;1jAQCOkvxg? zF@pIsms_dfD6f?tTmSzcL}w#-ifNYM$<{x}sGrBp%9^dei|7UsIo_?(T(T~0eKq7E zbr<^)`jw)8J^LvtS5p5P;qDgQpGGdyfb2vgO9@+gPFvrK^xRw8-e>0h6LG2?bl|-+nx8D{pjkr!^j-W#=M=m{JoqK z!+r-OH)S7Uu}D!lk$AN8MPQ*#uiGwu5J5RxL5$MgCB}^Ctbr`f$pW9YjXy!Ea?<}= zFXrOV9V7A%D|sO2*(+F`c&N3fXVXrnksqU+!L_0zN%>n)ir6y0$abo0c-OY+q!6OR z^!ocC+IG5*!$I@y%pm*>$|08?BG<`yspi%^$3U(u6d^&bESv#XvV0j6#iZ1TES4;v zu35G`hx=En*CY`p2#eIf4=eqn(6i<$Oy}U63;%+)JTp7vy|N_^EM7XQwDkq*n-K*R~s& zA2(r;jaFm{Nh$HJktC_al*3Z;iDnZ-H1atvd@N+#lymc~Psl>%#|n8aGqtmj?*oU* z|9iKpe2XMG0u_>+!{t90^Z1LP7AvU9^MB;w;*@Pqw;$rReH-$KWRa`z3N8bBG4EPY=rtc;rQkXT zD9CR`p=n}BzQ;*P7!Nt~QTm8^DyIHs@4=btYqtM4)A_~k|5xecudkBn1ewkt6m*-b z7mZ0+(pP`SV%8Uyc*6BiRqJ|XC+5;}tuniCWUY*NG$T#}uaSGwN3``n2#R>*=x#>1 z71D?d@ixHQMkO5NDTPy_MN*V360qE!vi`n_+A;?`*h_%EugJt?8I$@Ih27^4W@m1v zu6ru}8p<)zn((OmVAl!e;2niMK#MT5;xABPv|wToY}@z?dXSkt^2f)}SneZ;D*DG9 zGW%T5so90@9g%FNkjyT0?Eppt|3+Sa6XSI@dHuBlvGRz6|30#&&hq$P6>Ao+*n;xJAYgs(FhXIVWjn#a#m3IZ0~NOysRc;M-n?u8J-- z@|8!4zDv&g!gtB}qgAzR{bX3NftB#zC9wY3_jH zQyb5FnZiomb@?RkvRtWn-wBQ-?|1%|RWP3S!UWz=1QZ!$F_VSYw+l0$NQVj%T=Z=t z%BN6*mA-q4@(9#8er)|)5GvkCFvIy=gxXIbbEyP=S*2J)(dw-hB0*w2Bv(9)ZMoN> zMHn-kh_xs39aK!T1-thmPqA?0IQ}(x8CF4(W6laAKU65mu_)J91S$o`_mN}X{w*B8 z0l`FnaS+`so|FAS?5b4HK&XPZqNLHU;Wm;va%4PQDRpRSEKg`y+i{&4{;dg#!|w-_*GBzvTA>bOr_5U}b+?bi?u zRvC`rQ!CU7VKKJFE@LyiGx80_v}16&sg+udnZ@ANo}Er(V9RFQdw-J=`K=IrPNNQw z4L(DY#lUkC2cAbo?nFWyPOhPwdSBM#za+_?$ZYTsDMzwuY>CyM*!ZvCIDjg+2{%YT zawv$Rf58wp`ahYtU57$N_rND=MvgmeeFF^lAV^_3{IvC7O)PJ~M%OF%O6Bt4MdEv< z%kf>DgJH~Wq9;$l?Fwev$=m6?UB+7jZ*{maeih@3Yo%jxKS26#1qjyk$d3^{$rb)Y<;3T9#mE{w!ta-Eo8!|8&gENj$z}Xj08!) z3iMPyY!z0!qCU|bq_IsYP1a3NpLCW7Apk7)tf6>OA)}NBS6i0@5nJVq?|@C(FKg>R zSws|*C+qTRJVw%en-NLE>8CmTi4|T($;q-@*254vOqAZ4a$5>S0Y6UC9R1As3>o2NK^Aw)#)v( zcp`&>qnURur?T~LkBLFoeg7VTvRZ`xUlB#}3Wz7s1aZI+{Wfm@898)*oysHY6m6wa zl=IcdiF;XN2`XGc(M%P1Ygzi{0RNxk6uSTxiLq_8|6l(s@@q#B8dS&3P zhq(LU?Z{N9PJFeI`@^_henqXfcN>?Y4Oki0c=!!-V4^u-VlD>_HuF6L73Kg2uFhP~ zfZZI(FvAf zMb?orq)SVQamYc$U}f|Gh1Wy8SRG!!(jCU)*L&f8V}%cHhB(YUSzEyL$XVF1XNI#z z=1PGrL+b82<)~)Somd)V7Ki8xyF?VQfws<{9YP+-%P&WbB(o;w_fYSyz0Duo*6|{>N-LJ_;Al zy$kJF3Z2??yjXt5wluy(hAlhgwjwz(w#}LmLwdQ5qmc=4&tu>9@HE(_Vd~G72h0RQ z-10Gti6{{K7Rg7U3Xdlch(tx) z)eEkGr+kPtaX!D8@(ynY@hpb7e9wn?WFi7Q@4&a(a}f7wxMMR#rdjcB=dJ_0GD1g# zGMg#nxRET#vYARYQ^`I^8Q!-OWw>Vd7H}coZ%g211|r1`DBrjIU-ENHjGue40Gh;4 zG#C@@3BxHwdm~0_+csOx%&*9OcuJwvTv;oT?J1(xBI?L4Oc%va4_0X{GGO^%L<;c| znuAMnE$>(jMSlRYc<_+reXHJ=R&#|;w7n8@z-B`A#KtPNY^-(du=SZ#{2ld$xmkEl zZ4TmnMJ{(@i$imv8O55M!ROVeFFYP!z*orxMfff31YkrgF6LtgMiUb)z{Bb$kvw#L zICOM;j|bm;k&mot`o;bZZa#WG;@Czph==eSDNGC>kN~EdFGdarjEC;rii`o>fzSp$ z9=ARpj%~2+I|2zu`OvmuC~1E+_CLri-0{r^@$C;{e|2d3$We)#t2-y_NO)e)Hi8kv zQ=zUn3kAq&D-KJ0-^^JVb!ylTRG<`yp<;+JXht zeC&2>qz4sgREN(jinrYhF|yIgTlb&G9&S{CK)mb4{Sj9FZ2iY_RHUDA=tSZ>JJdt; zgV=(BogJ1}J8k{X0jFaYM4uqkyL+|e5eF9$FnRQH$Ws?fIyT}S_xj;kDfWrPsgtHs zols^Mj1u;>6pTP%A}bE`{zF+o$NvK_RgR*8B(!I2h~kPndcFeBz{$Ej%!t*-%uO9!%>&2V<%(t$M>`$n|8r#vUY-YxMH30j`&?t50wm!S1ovPPJUS?Z3 z5_sKzS^2T`|3q0Iwe^3|8nqzWN3eHyxur#jc{z73!J2%+ z??K;lKesV)e?fx&P-{E`ZMn-TUb6MQ1S#NUTqw$CEb@S@=YBr<4&%cl5vqc5oLlQk zK6T}&n68Z9kL2cD1tw|`gSg03(2Qk+b5Kbaqtijrzosh)=V4flXlLKZjE02gueSbH z5NInCSCLqCDEnlBD%n19mZ@VFL{B2rySrd~Z3vb=;o2zr1z(QtgD|l}F!KGMV+9sBvAJafkj*^xD4A+^kUHzy;+nSyy62E8m5@O-Ny3lS)$6AKf%3cu`cgK_Z}>>vXqyK2DAt7yM$C% zu6nx8pN!ZN^r!QiS_AS03ycyoFY+ONPqN-LLkv1rfru5o04TCa(*|StYNQ;MM?0A7 zh6!E9(xRR)C&vi_ZivO5@!+!XX@yDpw%v{SXk_X=*3lZ~`#hpW&jC3{7aH(k-*D(O z+pK;ka<bV#S`u?0ooHDEQjOfd#wv?X7Bz{ z_pCjh@cZUqe?IVL*tO3bbYbe`+G}pLBexQfqiY!0VGpLB;U~=SBX}ui?02UBN%!F5 ze0LbL-AnfOzL($e8FR36KL(_Dgt|^B0%rZo_PfK?q6rtbs5uyjh{60Nma%Mdd${JMy9$)rT77Pz8@Q~XR-VZkNJFt+7wRGB_B5->zs|&#<2dvO{PvL6XA;t0a z%M8V_iJ=xaK1Fbh)ELLhI3~Gv@0qye0jM#C##jYmhlBb?l9+v)VV?NV`<|`qWM-VJ zGj^(sL8>w?K*pVuMovIG{U{c`hcdZg!o_tCRZTF1!J(%xSBS8pz7SpcevZPKnJ+&7p0%1g=cQZx# zY@1K?HUXKi7b0{xpOH#5w=2k}<0BJo`I*8{sRzA7jF--Pnq;q`#MFahTm zuy_+}(=u5+z>sa?z8sXU(X4m|B{*8}$wnXwI~AZSyuJu4lcO?@T}_v zL>Qrvui3p9CARd=+Z$*tmm^H>rFeXc_Wd&E+Y$5q96svLb`}MXK&}=;Ey+{o1tS+L zP*lJPkqs2U(WIND0&-C^ovsm+xYGqLRz2X*O%73E<16KK5h7YVX27zdjsgy*G~#DF zWHI?rXSEC)1D~AIAAvBnUzY2QSZ6eqbs1P+&iU?VV`C)CY?W`iVzyeHG{rqz&sO6D zBugYQh@L+3nWQQ1AvzMq^`C>ms#4rvA|&|?`Yx=hUK;&6sxRJGbN={$jNc#rh&BIA z3Y;Gv>tkc4(_~#krr5ILCOOUY|?Y& z@K!eSJ7c9y^=?m-178Dk`D)MbNw4N(HP#q;jrK@4cawC9JIvKJY% z>>+$x&E6EWx3-1sb#{E_G01*KzFl*3pxKUZLgIt4{zki$V9?%$Z-U}$fOezm)KeXf(-(Q++G|(TxGI;~ z@gdy0`uX-s`&ssd_EYUae_eB3hy4_Lm9a8dw=OW>e&z}GCVV8aj-MY4o^3y~E8q{c zb=c+SZ_l=0bY^ET;A_Q)6wkiIaMjc-ubFS5x3_c#L-w@+``KsN9XdE6g%|}t$QEkq z3J7`m>sl!iByDeL>j>D@cLBA`TJ0Mk)W*8505B@k#8fHVU}IZnb3JpfZ|Z6aHmz+A z*w=2bpJ2zA=lns$tnO@CD+R98hfE;|OF+EnB4CxZ@FW%&Vk8C>@dJ#_vzSyXJ|>5+ z<<4&DYz{R+O0(lz@_V!z)I--=s|k7+_TRzW%Ny*O4C|2sqY(yG0#qoZ!a zBtvE8>TU-Mp!GKf{FmX|pHK=Fw0tc-zT1@;q+}CpYwi@u);HA)UF(uL1&mr9qA@|E z24B3h7cEh#_CE!ym+UN;%1)67H80cs-P7!`P{q`l^mGwKT&L)SZ`Rr1T9GLfl5?Xr$1zG?QCw2%gH#gxzi{ZhRy7ia^mUqcViAGPj3V zTzp~>4Fa0d4qe3uAlc^Z_^>JB1nl@Gbql`MXkXXS*4bWcud;%&8LFSP8+xY^lN%GR)uM+@n+^foq+f{juDp!9eJ(hlHSyQWM5b~3n_{3v}H7A`W_*P- z7(#jLT0=oZ`?pn{;=`WQUuyP}U1u6Pe*ckIfeJsg&naN-3czw(6PDqd?vU;l6>pz% zhm?D#a`~M|UOHR=mRk{bl3A{-OrC?CVdUYPlXocmG>o~(CEXn5^5Hn)Ny?q9+>?}h zGTh0aX70=J5%(%jF`B!9bh8jke#_E^@7^;Vrt$Brmx;zq<94K}KK%;IB0gbT{O`)6 zxW26u9p)*gCAu*XS)-I$LS~yc@>`yi1j^{6mUj6Tb~ZJK+^tKR+t$`KyIZ^XwQ^W9 zH^c+F8IX#B(F?w$p2Tr%keB>XT$eB~us4>GUU3~d7N$4V;Qpg{dhwPVqb~M0&!5ll zUeBN31SWEf>Ec^|DdMPBIal<&YE)^7Ex&y|f4&GtI{5}puHffuHODIc85BW%pkppR z5$o2YQ}Nwi7D0S{7~fycF%A{qMDtx3H?|)kgX*O;vx!om{hjL*ay`_jlDwRu>cLZ5 z+Jw)skFN$%t{BS}p}JO-__`>cL&Uc|`I&cJuo#uUi(lFX&EW-0d zcS|5^>b*)|%ll*LzRY`usI#PWVygDfOPQ&Wufs*%C8d|5O@F+Umly>?*HyazR(+Rz z%erNq+wnUWH9{RJ$8QmS>lKdrk%+Si-M~(F)QADiH4Gl>^%?w*!_UO80l!B4HsW_3 zeqX@vi})P|eWI>7@#96<4wpK|i|`3>?F4Y03Z6($-J=fhI#tmWN-zdfPNdP{v*Bl4 z4RhwqwxH8&gI`}qAWepmo~|7*+Xr=@@SzBq0QYb}=*R{Cq#@s=A#Nt_hrlHr@%#}_ zm$aPITVW0n2=m8qZuHf0jzubLW5n@C8oH$8dH`XD9}V~zxLQWXg1@5x6GuP$D8h_) zEL_GTj_^zc+ZBAGf^|L^_6dGSKNIxx;IeGQGyZ(I7YOd7_&3@+pAGPNj}1^ye6#hhOur zaRUm+bafs#!%rTFTYxmR9S%=cvc$cs*E z1Ja^i6sa)s#khnujZX6v;B=a7_d3l};b&Piy-t%hj$urbaS3Z0rb(F=Xe z+3EOE7Mh-Ekd}Cj(|kFA(`l5zulXv4pR&{Rny)h87{)Xim#~$F?O-l`tiKA-P|qsx zI|D!BnGW050{ob71t>`4#P2Nph-X|~PnGbKE(7&Mza761j11WK476{VKGWvK_D($} z548W((S>j|A1?T-R9JEv%$pZ&0@EOmXDdJT@f`e^-eS1a@g;Cg{IrhJPkJp2%8_Zf z@ne|AQ|`1WOH~;4OX~x5$)mzFJ>|vnEW?jHuq+Ha55ML35x*EX#;w7R@foIdp;qB_ zdOH6Vz>_!Tuj`#@ti+FDnqKF>3OH;2=L2T`#OwT5!_W8()A?TjKk+)fe(*|}U4$QP z62q@ixQpQ@zYNoSUIM@7Q@2I(c`1Gj)9GqHeZX1yTmzVV60iBJgP-x&;HUXq3qSEX zJO5(P({#ytME&Ar)x|}CsXLu;wJvtU--RE;tZ6V^@|%HQHGW#3z3?-Q zdbrd-_B##uQI>?)!EID9`D7U5Xg*m^>QNJZ4AXes&OQqqGpAP&Ort>6!%!_!9)3ib07^d->zmUSy4$x2jJ_Hr% zt+s$Ny7YR>-stcQ*0F%nTDp*cKRmZZorT9q_N70a%32<0sL4t>d&C^ zGp!r(W7&q_vTQfQ{Q`d0vVBp(EC<6FU-Lm6!+6?*V;(vk=1E%OHBOg17w%rTMc0E4ZaG{Y#A3qbHuTkSc_L3Y+;X_L!}Y+u6Yd(g zd*OD&eG={>}dz6Fj{lZssT|4>Y8E65*#k zFgDf*zadg6sLZqXX5E;Xxh4-@Fp*0-HmJ~}V`JYOPj{P@t_XDRgYNs|>AtS%3J~T6 zUEgC$&d8=W(+g=yo|+N131J_V24#2)=<;!bfz~nN?gVa167GKBB1n&-;KjPy2V8dF z*qEIN#Le&2@u;3*r~CEs?c`W5lKjJGi}^S3&htVS^|xCQb4 z40~|`8dvtBSUjsNIHsJV=i^9>QrB^C{P@Y0_*@Nb>G9`rFR*_{L&A?As>FYj2wL65E53COz+&lp)gD zjn~p>_p_fwda~Y7=E}Sxw4&ae8)W63mO)sxCsBVTUc^5dJke~#e<>wDOY(X$ z6ACzzm0D&!NQ*qR!?ok}*OQp0=X;tbpE>E*=NZG9lfPMG{4(>djPI;6wq+mo+IdD#P6_;5a+YDNlQX0DLgVHM z`I}c6Kc3LIca`yjiCw_#&pqb)TI2ddrvJ7k=Z-_B-d}6{_aSd1&COF7`uNmi{!n9# zO`X1{Cg#ZY>eycj`fr9I@ zfZBaj&V%O}4^Y~j>eZ~O?qXO@!>4{8z{pm zQ-@dOY(3>!u<@1Ilb>H@d}lVT_Xowte4)np%IQx7|F&atPmM89dddsu8P}Cf9;`9` zHt+V#_s%okE59}4rSpt?=ARAVD+^}B|L_6_{O>Nf1MzMycQ&bGZ#o%P4F55*T!g!esqZ#;+CubngVwd$;G=N$cNb=KH9Ga!O5Iwzs5 zPdO(&S)KK*%1KXDXZ^5p2K>)g&Vc{B3nwAFEemsy_0JaOz`uXtvD>S&eqA-`>(yB| zEjsFf>a2GbO}e%^Ym+Mvn73Rr`>M0{EY1Vwr%Pt`RA)WDWH&|Pnf~0etY0ph{_L`> zrC1m+^P6vCB)_KT4znO#cck{zJ#v>UUq2JKwDPPUV|7Di(gN&Py3gF3~oAhN2 z_A`2s4dHy_VfY@-oc?mA@l@uC0(bG^Y4_$D=c6~>o-u7lE|k8`Fn*jl=}>e8h@T}p30ghP>YTaE!}HMkLco2P;PM zDwymP9(4WU@@6@OtJuPLRN|Er%kMCWj@>a-kk_zNxagt9b*D<0<8xkOFaq}o0by;X zz$tthfA9zib56`_Sls0^Kzem~ag2~CPp+cJ9(n;QaT}uwzVvXXO5s22?0+D2zVlT5 zE>`X;`v2s@_w?VmG%H62k+m!oV<=&&*hm`w-a-Uc3 z0p(6m_VaM%o}}D)%3ZA7RmyEpZkKX5D)%#Rm0q`9zBGV6$L-jv-eE5-DRmS(X0Hu|>PpVA7Xd6Oo?Bc} zTr#g@4o21g_{xHnyED@{j*vSicc;W>#q5tY5_ceKyOW5|F|>V3#7{6%wQKGq(e@yb zKG)E8B@ur}%>E?eVf|D)O2kh#>?!e64Bc-g(oc=q??ika9)38jDB;h-js;5DWe|T&znTS8aNM-l3 zV6{iLJFTR`pT&if;Z*n|4e!oW_@j)Tlz83S8L=w^zHH;Qu^2*mW(s$_9OfuImnf~* z5{2gyr1iQ`;kk@xy;>BWOM%v_M*xQ!T+*{%e2&B2P+Xd`Uf&hKp$3=Stk=B?&t)^~ z^(z3G*m0uu(5?dCQ+WOU7T4T(^+73;rph36sDy!3d6 z&uz$$ejn@t5sGmQ%169@k4ulM4paEy9;cwx{C5DKN>9RqKHl#1{_}4Le7v3NamqH8 zeu-+ocE!)16khLZU!(BvDEy&)PJz*K_zVP2{*&?Z6khMYpQzIDD!ks`f2_jSDg2gA zGJU(kU#akVpFbAp#C4m(>wWoJ{yzbp^*dbX6m)uAx<}#lJVE2120oQMBZ}UBqD)65 zhTDq@?@hvgpzz7_k3%O&{TWKqpW_AoFk`P8|LXMJzzhD9(qAd)XBbyXi&^ozt{79i8kf|KOJ~OrK9H)9)VXx7N;c*eh~P4?ATnh(`li`GlewtZ>GUd zLqSr>p&oehzj>WZpU-i4-LB||?M?yf+dXO0nT#O_>t(B|mm(D*f`w>1_)qR1%RoO3 z^m<&O#A$GTnTpRfY49P9pP9%fKZ8kr^m|d-K0FV+C~t?P*Lj_c2_x&vewxIefaiF; zP7`?Px86f(@LyKx>^spZ$i=g9`0h?a|C==UchcaeLZvj%R(|ZjPs=>gSiRF}MH{n$ zpNSp1dM(|qLMsJ6&&XBvrTeKXMEX;VEvLwI_#Bkiw*(%~!jj~CpQ87k

z`SwDQa zFtDlkEJ=f3D)7ixrGw@#uFs~S?*X3W8ddgiwW7Z}4gH_f;9r4aOwSxY?)`xDnNy5> zRgXC;{llQL)RQ$QI|YW%mw9;vUiz)~OVi+g4m|amYZl|Tep9lOjj8}VhIz_tqNf8uqs(i5-JE8VWYA@GvMdjBocLAxtyatbueptzm~ zp8a5QKk^3Yk*`zHi#<7rQ3M4>5yoBL2Lt(#&+WUM31KVJ(08Z7-;f6XGvJwivR>^F z^k^@tooe3xkS3k?)8JQ9&l5P8K!?CjF?xcsTmeOME%2c(xrLF>Spz0#E+Eihs?|x6`Ea zvPcK|aG6XeL+O9R5vlpSDh>YDH24?N;NMPzuVMdUEs*ker@;@U!9OSP7-y>b%~9#R z2|Vj%O+c2H`+9htg`sw;e7$M#mkT`lyVIN^z7m`9!!-22QuM<~{mx&3w~G`KZ==9d zuk`!zU|U>=pyMN-LxoO>(D#3Ihy@lDeSvO&pgrUZ@y$v{i7^i47>_C&kD4#=q9<`1c%6Gslku zBwRIkAK2k5uU=HMC=m3oaCr$j&fswX-j240re>Vs;BIZer*l6sM&{wYT5qTW@7;c4 zSu83^nYzbuhI)HF*tD*-F4Wl(2#SK%h8%Nzi#uDz`4>K%(-Cl#`FJpb59eRtP?`?6 zg%>Y-pV{cijmr(yr8bnd8uu6h( z?I)R8twjpVY}F^0SzALxb6Z_~@%j!Ps?t&1w)V3)2*xPp`};l>h^B!D4y@;N_+lQg zGCr^#O|7G;9j6$j3hQd}soNuia1zC7tDv#p*orZK_5;SNENif*P0>{4mE1`;rtjGje;tU7u44g zSZ5^TI$MeLcZ7nW&W46!zk$OF0_$*o3{E=bKYUp-`o^gFu^ z%AgtNqSP0cm&~n5P2_7p6JR0gHi#l^Fd90*xUasmrDX%6SRk~grceT@&$qaya+%BL zs$S$ntMVk03TZB?F?@?IsIFY*u0mKs1oR91ia;>ESD6}}af3q3Bj z)#}=|G90qx@;jYNJnn^6t5^HVisy)c&^&)*M_X%KnQv98qcpeyVi7X&Usl%Iw%*m1 z5Tn=u8CYB2N1PyIF|!r2)5N(Li%O-5jGeVm-mXsla`4~-pE{t*#k0=lG@(auI3!A( z`jOB*%oQgkIZAj)kq=tYy+L#{B|dF_%KUBh0UwX3;)xt0ZW#_YD#P((?s^x*gFdK< zGSDyeCmpv_RqFCC^?wwFHuy|-PVn*$oPVLxWwNm{YEI(?OzbCQ_&gj}M@hN;=vN&6#yT{j4pc$V?RVAI;pRGnji#<6 zKAJop@=yam3RW5Y< z9K~e@PTj(Baw;!hX>n=sT*~9)bI(?kisg70{822&6+SIhmAh1^QPTNSvOS2S+^9Em z0vJl9(gO}p9A#2lFvR1*aBf(m5}&Jfo-cuMB{6Yk8T3z$uF+f*1gDh7QN|X8v!K?s z@?=5OJrC3C_MzbjWU^#yL!~7?UkK)Hy-%LwCr`IZ)=L8kphroJUbnWbBMF5rBG~4W z11MD73b#neQo+Ri3)LAjO>@{P${Xu~z6P9h$R?{zlWhAgqbu0j-qF+=YDi=@CLpvs z3?J9FwKWIoT1A`x*v(gPQ`;s-e>iF{!#YpFwDFOPy_o0S6r(KE}AxVdR9MY^gQ!(5u1y0uMR4vbsTJ>&2sVg1qnVTO>T z7*Mo|SRJ>mYx1Y5V%aijY0~LfeKST6qPGa3Nu$s3``ViVx+J1%N*zM@s+aQVt}9hV zu{Ri3N4hrk$#SLTfo>R2f2gu!U9hsXz9w*aaB)Xli>Iz-ZGD}~>u>RDQy;?7iH-_2 zk3cQgwSbGD-xp$aS%$+&eVC}snDLBtw&koAiqq|h^|Y}-S7}+vJoFgV80R}mWnRj} z*Us^Y1Dza>jsWH{t+H3{Vgo|g6)&$tj4ecOTemitx>rOu&Hl?#Mtz4I0yxtT-8B2D ze?@17<%J39ZW=Fl!3ewk21~bJ-|S5^q7Y;>zXZ{tiW^;YxY>Dr}*7b?~ ziPrYmpi}C4tZh5WV7{PuO08Ti$^AR42%Rd9nCuqS9gC?)I!Wj1=|PrOH$^sYdF{#& zwCD^}{_@&oZS|ea0aMHdVb|FvS1ha2<4{P#G0*2y#v7_43_*%6Kb2@=rryhmX|qxb zIp&mR1JlX5ejY=G@dD<&%Y5sb>I1D}fSY7ViX9v?SQI=>nOzuk2Lf>olznT8NokU- zTn3~OXw{k!@0DYXMvdOY@Kjd<7ZId{Wbd-Jt{(k;Fr<@-jjz#3`f=Q3f(Q^dJ~mXX zAXS4@L9RKDvWkz_9LE___JhHue`HHzdL>3=7>G(m)P0YrhJUGTjvw=t>mL6j#(fwA z%ZPI;*%!;9UXo6ySVNjCrb42VLEq834i(ef?28R?q30Y})5LSa#Nn47bH>WAO3VQ$ z4*Yagw4;Z^`i%cF*K!~j=^wLEhQX0!Ii^XfLLH^FzOl}ui{rvjn{Fm?4NT6X9CTO= zXlmHt>gZ_esKV^Vh1rR3E&9h;X~f`KOhvSyRm zlIjScD42(|ptDLjk{+k*^O}M_e|x8>BG^K|&mZg*Z691%T5PJWEMZ)NDMOlZiDmE} zpD>8NPzyEf&{xjsL~nu`Ofic0`POxJ`!I(JwzXo|iJ4SaNz$MsWr=YpvM6yoOhy3q z;}Imqa7uH~)imB%ete@d2Wv9*VrrMz7Z+m+ih+Pd;$j%9wL;r^ESk4qRiO!sKMJ;m;sxW|Ft&Bal2G2S53?K z!uwDis>N~Hig_f@$Irmw=s2pm<0DKnq~Oz56JtUgOli>OjGvY85K8gDM6LadtJ_%S zrSM^56Ca&pIK)A35C`FF8OGB>Tb2b8h7Yrgrt$M?CSAtiXWSC{*0r>?N@K~@HmkKs zNSnPvqQzV^&SG*^v;~iW;KF_jw^930kU&SfQH;ag1I3vB&xZON#b|8B80hoN z@|__aU%yXqDV}%p`mS_h{A)Vxu0}ZDLDH~(?;sb?t$B?~NAOP>X#N#ioQFlcj<4TO zIH=+eE5c;?>GEHWaE2{ZboxE2t%^XucSRaJDvsr^<9`k~$^_dt!tb7 zxGs3C@$D+U-7YD5k|OARIoiD@DZbvfzPV7wU%6bm`mWQj%|`*^V=G-OzJ5<-SjA74myV}h zo>|25Yc}