Skip to content
Permalink
Browse files

Updated to latest plasma (nightly build).

  • Loading branch information...
sdrelton committed May 25, 2017
1 parent 8b69bee commit 1445ec74b1737e950babb99141603ec5f0860480
@@ -0,0 +1,51 @@
/**
*
* @file
*
* PLASMA is a software package provided by:
* University of Tennessee, US,
* University of Manchester, UK.
*
* @precisions normal z -> s d c
*
**/

#include "plasma_async.h"
#include "plasma_context.h"
#include "plasma_descriptor.h"
#include "plasma_internal.h"
#include "plasma_types.h"
#include "plasma_workspace.h"
#include "core_blas.h"

/******************************************************************************/
void plasma_pzdesc2tr(plasma_desc_t A,
plasma_complex64_t *pA, int lda,
plasma_sequence_t *sequence,
plasma_request_t *request)
{
// Return if failed sequence.
if (sequence->status != PlasmaSuccess)
return;

for (int m = 0; m < A.mt; m++) {
int ldt = plasma_tile_mmain(A, m);
int n_start = (A.type == PlasmaUpper ? m : 0);
int n_end = (A.type == PlasmaUpper ? A.nt : m+1);
for (int n = n_start; n < n_end; n++) {
int x1 = n == 0 ? A.j%A.nb : 0;
int y1 = m == 0 ? A.i%A.mb : 0;
int x2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
int y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;

plasma_complex64_t *f77 = &pA[(size_t)A.nb*lda*n + (size_t)A.mb*m];
plasma_complex64_t *bdl = (plasma_complex64_t*)plasma_tile_addr(A, m, n);

core_omp_zlacpy(PlasmaGeneral, PlasmaNoTrans,
y2-y1, x2-x1,
&(bdl[x1*A.nb+y1]), ldt,
&(f77[x1*lda+y1]), lda,
sequence, request);
}
}
}
@@ -32,7 +32,6 @@ void plasma_pzgbtrf(plasma_desc_t A, int *ipiv,
plasma_context_t *plasma = plasma_context_self();
int ib = plasma->ib;
int max_panel_threads = plasma->max_panel_threads;
plasma_barrier_t *barrier = &plasma->barrier;

for (int k = 0; k < imin(A.mt, A.nt); k++) {
// for band matrix, gm is a multiple of mb,
@@ -52,23 +51,44 @@ void plasma_pzgbtrf(plasma_desc_t A, int *ipiv,
depend(out:ipivk[0:size_i]) /*\
priority(1) */
{
volatile int *max_idx = (int*)malloc(max_panel_threads*sizeof(int));
if (max_idx == NULL)
plasma_request_fail(sequence, request, PlasmaErrorOutOfMemory);

volatile plasma_complex64_t *max_val =
(plasma_complex64_t*)malloc(max_panel_threads*sizeof(
plasma_complex64_t));
if (max_val == NULL)
plasma_request_fail(sequence, request, PlasmaErrorOutOfMemory);

volatile int info = 0;

plasma_barrier_t barrier;
plasma_barrier_init(&barrier);

if (sequence->status == PlasmaSuccess) {
for (int rank = 0; rank < max_panel_threads; rank++) {
#pragma omp task // priority(1)
#pragma omp task shared(barrier) // priority(1)
{
// create a view for panel as a "general" submatrix
plasma_desc_t view = plasma_desc_view(
A, (A.kut-1)*A.mb, k*A.nb, mak, nvak);
view.type = PlasmaGeneral;

int info = core_zgetrf(view, &ipiv[k*A.mb], ib, rank,
max_panel_threads, barrier);
core_zgetrf(view, &ipiv[k*A.mb], ib,
rank, max_panel_threads,
max_idx, max_val, &info,
&barrier);

if (info != 0)
plasma_request_fail(sequence, request, k*A.mb+info);
}
}
}
#pragma omp taskwait

free((void*)max_idx);
free((void*)max_val);
}
// update
// TODO: fills are not tracked, see the one in fork
@@ -40,8 +40,8 @@ void plasma_pzgeswp(plasma_enum_t colrow,
int lda10 = plasma_tile_mmain(A, A.mt-1);
int nva10 = plasma_tile_nview(A, n);

#pragma omp task depend (inout:a00[ma00*na00]) \
depend (inout:a10[lda10*nva10])
#pragma omp task depend (inout:a00[0:ma00*na00]) \
depend (inout:a10[0:lda10*nva10])
{
int nvan = plasma_tile_nview(A, n);
plasma_desc_t view = plasma_desc_view(A, 0, n*A.nb, A.m, nvan);
@@ -30,8 +30,9 @@ void plasma_pzgetrf(plasma_desc_t A, int *ipiv,

// Read parameters from the context.
plasma_context_t *plasma = plasma_context_self();

// Set tiling parameters.
int ib = plasma->ib;
plasma_barrier_t *barrier = &plasma->barrier;

for (int k = 0; k < imin(A.mt, A.nt); k++) {
plasma_complex64_t *a00, *a20;
@@ -54,25 +55,45 @@ void plasma_pzgetrf(plasma_desc_t A, int *ipiv,
depend(out:ipiv[k*A.mb:mvak]) \
priority(1)
{
volatile int *max_idx = (int*)malloc(num_panel_threads*sizeof(int));
if (max_idx == NULL)
plasma_request_fail(sequence, request, PlasmaErrorOutOfMemory);

volatile plasma_complex64_t *max_val =
(plasma_complex64_t*)malloc(num_panel_threads*sizeof(
plasma_complex64_t));
if (max_val == NULL)
plasma_request_fail(sequence, request, PlasmaErrorOutOfMemory);

volatile int info = 0;

plasma_barrier_t barrier;
plasma_barrier_init(&barrier);

if (sequence->status == PlasmaSuccess) {
for (int rank = 0; rank < num_panel_threads; rank++) {
#pragma omp task priority(1)
#pragma omp task shared(barrier) priority(1)
{
plasma_desc_t view =
plasma_desc_view(A,
k*A.mb, k*A.nb,
A.m-k*A.mb, nvak);

int info = core_zgetrf(view, &ipiv[k*A.mb], ib,
rank, num_panel_threads,
barrier);
core_zgetrf(view, &ipiv[k*A.mb], ib,
rank, num_panel_threads,
max_idx, max_val, &info,
&barrier);

if (info != 0)
plasma_request_fail(sequence, request, k*A.mb+info);
}
}
}
#pragma omp taskwait

free((void*)max_idx);
free((void*)max_val);

for (int i = k*A.mb+1; i <= imin(A.m, k*A.mb+nvak); i++)
ipiv[i-1] += k*A.mb;
}
@@ -111,7 +132,6 @@ void plasma_pzgetrf(plasma_desc_t A, int *ipiv,
mvak, nvan,
1.0, A(k, k), ldak,
A(k, n), ldak);

// gemm
for (int m = k+1; m < A.mt; m++) {
int mvam = plasma_tile_mview(A, m);
@@ -135,7 +155,7 @@ void plasma_pzgetrf(plasma_desc_t A, int *ipiv,
// pivoting to the left
for (int k = 1; k < imin(A.mt, A.nt); k++) {
plasma_complex64_t *akk;
akk = A(k, k);
akk = A(k-1, k-1);
int makk = (A.mt-k-1)*A.mb;
int nakk = plasma_tile_nmain(A, k);

@@ -0,0 +1,51 @@
/**
*
* @file
*
* PLASMA is a software package provided by:
* University of Tennessee, US,
* University of Manchester, UK.
*
* @precisions normal z -> s d c
*
**/

#include "plasma_async.h"
#include "plasma_context.h"
#include "plasma_descriptor.h"
#include "plasma_internal.h"
#include "plasma_types.h"
#include "plasma_workspace.h"
#include "core_blas.h"

/******************************************************************************/
void plasma_pztr2desc(plasma_complex64_t *pA, int lda,
plasma_desc_t A,
plasma_sequence_t *sequence,
plasma_request_t *request)
{
// Return if failed sequence.
if (sequence->status != PlasmaSuccess)
return;

for (int m = 0; m < A.mt; m++) {
int ldt = plasma_tile_mmain(A, m);
int n_start = (A.type == PlasmaUpper ? m : 0);
int n_end = (A.type == PlasmaUpper ? A.nt : m+1);
for (int n = n_start; n < n_end; n++) {
int x1 = n == 0 ? A.j%A.nb : 0;
int y1 = m == 0 ? A.i%A.mb : 0;
int x2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
int y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;

plasma_complex64_t *f77 = &pA[(size_t)A.nb*lda*n + (size_t)A.mb*m];
plasma_complex64_t *bdl = (plasma_complex64_t*)plasma_tile_addr(A, m, n);

core_omp_zlacpy(PlasmaGeneral, PlasmaNoTrans,
y2-y1, x2-x1,
&(f77[x1*lda+y1]), lda,
&(bdl[x1*A.nb+y1]), ldt,
sequence, request);
}
}
}
@@ -0,0 +1,67 @@
/**
*
* @file
*
* PLASMA is a software package provided by:
* University of Tennessee, US,
* University of Manchester, UK.
*
* @precisions normal z -> s d c
*
**/

#include "plasma_async.h"
#include "plasma_context.h"
#include "plasma_descriptor.h"
#include "plasma_internal.h"
#include "plasma_types.h"
#include "plasma_workspace.h"

/***************************************************************************//**
@ingroup plasma_ccrb2cm
Convert tiled (CCRB) to column-major (CM) matrix layout.
Out-of-place.
*/
void plasma_omp_zdesc2tr(plasma_desc_t A,
plasma_complex64_t *pA, int lda,
plasma_sequence_t *sequence,
plasma_request_t *request)
{
// Get PLASMA context.
plasma_context_t *plasma = plasma_context_self();
if (plasma == NULL) {
plasma_error("PLASMA not initialized");
plasma_request_fail(sequence, request, PlasmaErrorIllegalValue);
return;
}

// Check input arguments.
if (plasma_desc_check(A) != PlasmaSuccess) {
plasma_error("invalid A");
plasma_request_fail(sequence, request, PlasmaErrorIllegalValue);
return;
}
if (pA == NULL) {
plasma_error("NULL A");
plasma_request_fail(sequence, request, PlasmaErrorIllegalValue);
return;
}
if (sequence == NULL) {
plasma_error("NULL sequence");
plasma_request_fail(sequence, request, PlasmaErrorIllegalValue);
return;
}
if (request == NULL) {
plasma_error("NULL request");
plasma_request_fail(sequence, request, PlasmaErrorIllegalValue);
return;
}

// quick return
if (A.m == 0 || A.n == 0)
return;

// Call the parallel function.
plasma_pzdesc2tr(A, pA, lda, sequence, request);
}

0 comments on commit 1445ec7

Please sign in to comment.
You can’t perform that action at this time.