-
Notifications
You must be signed in to change notification settings - Fork 687
/
solve.cpp
154 lines (133 loc) · 6.6 KB
/
solve.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
//==============================================================
// Copyright © 2020 Intel Corporation
//
// SPDX-License-Identifier: MIT
// =============================================================
/*
*
* Content:
* Example of solving a system of linear equations with general
* block tridiagonal coefficient matrix
************************************************************************
* Purpose:
* ========
* Testing solution of a linear system of equations with general block
* tridiagonal matrix of coefficients
* D(1)*X(1) + C(1)*X(2) = F(1)
* B(1)*X(1) + D(2)*X(2) + C(2)*X(3) = F(2)
* B(2)*X(2) + D(3)*X(3) + C(3)*X(4) = F(3)
* ...
* B(N-2)*X(N-2) + D(N-1)*X(N-1) + C(N-1)*X(N) = F(N-1)
* B(N-1)*X(N-1) + D(N)*X(N) = F(N)
* Here D(J),B(J),C(J) are NB by NB matrices - block matrix coefficients
* X(J),F(J) are NB by NRHS-matrices - unknowns and RHS components
*
* Solving is done via LU factorization of the coefficient matrix
* (call DGEBLTTRF) followed by call DGEBLTTRS to solve a system of
* equations with coefficient matrix factored by DGEBLTTRF.
*
* Coefficients and right hand sides are randomly generated.
*
* Testing is done via calculating
* max{||F(1)-D(1)*X(1)-C(1)*X(2)||,
* ||F(2)-B(1)*X(1)-D(1)*X(1)-C(1)*X(2)||,
* ...
* ||F(N)-B(N-1)*X(N-1)-D(N)*X(N)||}
*
* ||.|| denotes Frobenius norm of a respective matrix
*/
#include <cstdint>
#include <iostream>
#include <vector>
#include <sycl/sycl.hpp>
#include "oneapi/mkl.hpp"
using namespace oneapi;
int64_t dgeblttrf(sycl::queue, int64_t n, int64_t nb, double* d, double* dl, double* du1, double* du2, int64_t* ipiv);
int64_t dgeblttrs(sycl::queue, int64_t n, int64_t nb, int64_t nrhs, double* d, double* dl, double* du1, double* du2, int64_t* ipiv, double* f, int64_t ldf);
double resid2(int64_t n, int64_t nb, int64_t nrhs, double* dl, double* d, double* du1, double* x, int64_t ldx, double* b, int64_t ldb);
template<typename T>
using allocator_t = sycl::usm_allocator<T, sycl::usm::alloc::shared>;
int main() {
if (sizeof(MKL_INT) != sizeof(int64_t)) {
std::cerr << "MKL_INT not 64bit" << std::endl;
return -1;
}
int64_t n = 200;
int64_t nb = 20;
int64_t nrhs = 10;
int64_t ldf = nb*n;
int64_t info = 0;
// Asynchronous error handler
auto error_handler = [&] (sycl::exception_list exceptions) {
for (auto const& e : exceptions) {
try {
std::rethrow_exception(e);
} catch(mkl::lapack::exception const& e) {
// Handle LAPACK related exceptions happened during asynchronous call
info = e.info();
std::cout << "Unexpected exception caught during asynchronous LAPACK operation:\ninfo: " << e.info() << std::endl;
} catch(sycl::exception const& e) {
// Handle not LAPACK related exceptions happened during asynchronous call
std::cout << "Unexpected exception caught during asynchronous operation:\n" << e.what() << std::endl;
info = -1;
}
}
};
sycl::device device{sycl::default_selector{}};
sycl::queue queue(device, error_handler);
sycl::context context = queue.get_context();
if (device.is_gpu() && device.get_platform().get_backend() != sycl::backend::ext_oneapi_level_zero) {
std::cerr << "This sample requires Level Zero when running on GPUs." << std::endl;
std::cerr << "Please check your system configuration." << std::endl;
return 0;
}
if (device.get_info<sycl::info::device::double_fp_config>().empty()) {
std::cerr << "This sample uses double precision, which is not supported" << std::endl;
std::cerr << "by the selected device. Quitting." << std::endl;
return 0;
}
allocator_t<double> allocator_d(context, device);
allocator_t<int64_t> allocator_i(context, device);
std::vector<double, allocator_t<double>> d(nb * n*nb, allocator_d);
std::vector<double, allocator_t<double>> dl(nb * (n-1)*nb, allocator_d);
std::vector<double, allocator_t<double>> du1(nb * (n-1)*nb, allocator_d);
std::vector<double, allocator_t<double>> du2(nb * (n-2)*nb, allocator_d);
std::vector<double, allocator_t<double>> f(ldf * nrhs, allocator_d);
std::vector<double, allocator_t<double>> dcpy(nb * n*nb, allocator_d);
std::vector<double, allocator_t<double>> dlcpy(nb * (n-1)*nb, allocator_d);
std::vector<double, allocator_t<double>> du1cpy(nb * (n-1)*nb, allocator_d);
std::vector<double, allocator_t<double>> fcpy(ldf * nrhs, allocator_d);
std::vector<int64_t, allocator_t<int64_t>> ipiv(nb * n, allocator_i);
std::vector<int64_t> iseed = {1, 4, 23, 77};
// Initializing arrays randomly
LAPACKE_dlarnv(2, reinterpret_cast<MKL_INT*>(iseed.data()), n*nb*nb, d.data());
LAPACKE_dlarnv(2, reinterpret_cast<MKL_INT*>(iseed.data()), (n-1)*nb*nb, dl.data());
LAPACKE_dlarnv(2, reinterpret_cast<MKL_INT*>(iseed.data()), (n-1)*nb*nb, du1.data());
LAPACKE_dlarnv(2, reinterpret_cast<MKL_INT*>(iseed.data()), n*nb*nrhs, f.data());
// Copying arrays for testing purposes
cblas_dcopy(n*nb*nb, d.data(), 1, dcpy.data(), 1);
cblas_dcopy((n-1)*nb*nb, dl.data(), 1, dlcpy.data(), 1);
cblas_dcopy((n-1)*nb*nb, du1.data(), 1, du1cpy.data(), 1);
cblas_dcopy(n*nb*nrhs, f.data(), 1, fcpy.data(), 1);
std::cout << "Testing accuracy of solution of linear equations system" << std::endl;
std::cout << "with randomly generated block tridiagonal coefficient" << std::endl;
std::cout << "matrix by calculating ratios of residuals" << std::endl;
std::cout << "to RHS vectors' norms." << std::endl;
// LU factorization of the coefficient matrix
info = dgeblttrf(queue, n, nb, d.data(), dl.data(), du1.data(), du2.data(), ipiv.data());
if (info) {
std::cout << "DGEBLTTRF returned nonzero INFO = " << info << std::endl;
return 1;
}
// Solving the system of equations using factorized coefficient matrix
info = dgeblttrs(queue, n, nb, nrhs, d.data(), dl.data(), du1.data(), du2.data(), ipiv.data(), f.data(), ldf);
if (info) {
std::cout << -info << "-th parameter in call of dgeblttrs has illegal value" << std::endl;
return 1;
} else {
// computing the residual
double eps = resid2(n, nb, nrhs, dlcpy.data(), dcpy.data(), du1cpy.data(), f.data(), ldf, fcpy.data(), ldf);
std::cout << "max_(i=1,...,nrhs){||ax(i)-f(i)||/||f(i)||} = " << eps << std::endl;
}
return 0;
}