Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update device ID selection for HIP/CUDA/MAGMA backends #740

Merged
merged 1 commit into from
Apr 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,11 @@ Currently, each MAGMA library installation is only built for either CUDA or HIP.
set of libCEED backends (``/gpu/cuda/magma/*`` or ``/gpu/hip/magma/*``) will automatically be built
for the version of the MAGMA library found in ``MAGMA_DIR``.

Users can specify a device for all CUDA, HIP, and MAGMA backends through adding `:device_id=#`
after the resource name. For example:

- `/gpu/cuda/gen:device_id=1`

The ``/*/occa`` backends rely upon the `OCCA <http://github.com/libocca/occa>`_ package to provide
cross platform performance. To enable the OCCA backend, the environment variable ``OCCA_DIR`` must point
to the top-level OCCA directory, with the OCCA library located in the ``${OCCA_DIR}/lib`` (By default,
Expand Down
10 changes: 5 additions & 5 deletions backends/cuda-gen/ceed-cuda-gen.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,15 @@ static int CeedInit_Cuda_gen(const char *resource, Ceed ceed) {
"Cuda backend cannot use resource: %s", resource);
// LCOV_EXCL_STOP

Ceed ceedshared;
CeedInit("/gpu/cuda/shared", &ceedshared);
ierr = CeedSetDelegate(ceed, ceedshared); CeedChkBackend(ierr);

Ceed_Cuda_gen *data;
Ceed_Cuda *data;
ierr = CeedCalloc(1, &data); CeedChkBackend(ierr);
ierr = CeedSetData(ceed, data); CeedChkBackend(ierr);
ierr = CeedCudaInit(ceed, resource, nrc); CeedChkBackend(ierr);

Ceed ceedshared;
CeedInit("/gpu/cuda/shared", &ceedshared);
ierr = CeedSetDelegate(ceed, ceedshared); CeedChkBackend(ierr);

const char fallbackresource[] = "/gpu/cuda/ref";
ierr = CeedSetOperatorFallbackResource(ceed, fallbackresource);
CeedChkBackend(ierr);
Expand Down
4 changes: 0 additions & 4 deletions backends/cuda-gen/ceed-cuda-gen.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,6 @@ typedef struct {
void *d_c;
} CeedQFunction_Cuda_gen;

typedef struct {
Ceed_Cuda base;
} Ceed_Cuda_gen;

CEED_INTERN int CeedQFunctionCreate_Cuda_gen(CeedQFunction qf);

CEED_INTERN int CeedOperatorCreate_Cuda_gen(CeedOperator op);
Expand Down
2 changes: 1 addition & 1 deletion backends/cuda-shared/ceed-cuda-shared-basis.c
Original file line number Diff line number Diff line change
Expand Up @@ -773,7 +773,7 @@ int CeedBasisApplyTensor_Cuda_shared(CeedBasis basis, const CeedInt nelem,
int ierr;
Ceed ceed;
ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
Ceed_Cuda_shared *ceed_Cuda;
Ceed_Cuda *ceed_Cuda;
CeedGetData(ceed, &ceed_Cuda); CeedChkBackend(ierr);
CeedBasis_Cuda_shared *data;
CeedBasisGetData(basis, &data); CeedChkBackend(ierr);
Expand Down
11 changes: 5 additions & 6 deletions backends/cuda-shared/ceed-cuda-shared.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

#include <ceed/ceed.h>
#include <ceed/backend.h>
#include <stdbool.h>
#include <string.h>
#include "ceed-cuda-shared.h"
#include "../cuda/ceed-cuda.h"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One of these days we'll have to decide how we want this reaching over into other backend's headers to work.

Expand All @@ -34,15 +33,15 @@ static int CeedInit_Cuda_shared(const char *resource, Ceed ceed) {
// LCOV_EXCL_STOP
ierr = CeedSetDeterministic(ceed, true); CeedChk(ierr);

Ceed ceedref;
CeedInit("/gpu/cuda/ref", &ceedref);
ierr = CeedSetDelegate(ceed, ceedref); CeedChk(ierr);

Ceed_Cuda_shared *data;
Ceed_Cuda *data;
ierr = CeedCalloc(1, &data); CeedChk(ierr);
ierr = CeedSetData(ceed, data); CeedChk(ierr);
ierr = CeedCudaInit(ceed, resource, nrc); CeedChk(ierr);

Ceed ceedref;
CeedInit("/gpu/cuda/ref", &ceedref);
ierr = CeedSetDelegate(ceed, ceedref); CeedChk(ierr);

ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "BasisCreateTensorH1",
CeedBasisCreateTensorH1_Cuda_shared);
CeedChk(ierr);
Expand Down
4 changes: 0 additions & 4 deletions backends/cuda-shared/ceed-cuda-shared.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,6 @@ typedef struct {
CeedScalar *c_G;
} CeedBasis_Cuda_shared;

typedef struct {
Ceed_Cuda base;
} Ceed_Cuda_shared;

CEED_INTERN int CeedBasisCreateTensorH1_Cuda_shared(CeedInt dim, CeedInt P1d,
CeedInt Q1d, const CeedScalar *interp1d, const CeedScalar *grad1d,
const CeedScalar *qref1d, const CeedScalar *qweight1d, CeedBasis basis);
Expand Down
14 changes: 7 additions & 7 deletions backends/cuda/ceed-cuda.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
#include <cuda_runtime.h>
#include <nvrtc.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
Expand Down Expand Up @@ -150,22 +149,22 @@ static int CeedGetPreferredMemType_Cuda(CeedMemType *type) {
//------------------------------------------------------------------------------
int CeedCudaInit(Ceed ceed, const char *resource, int nrc) {
int ierr;
const int rlen = strlen(resource);
const bool slash = (rlen>nrc) ? (resource[nrc] == '/') : false;
const int deviceID = (slash && rlen > nrc + 1) ? atoi(&resource[nrc + 1]) : -1;
const char *device_spec = strstr(resource, ":device_id=");
const int deviceID = (device_spec) ? atoi(device_spec+11) : -1;

int currentDeviceID;
ierr = cudaGetDevice(&currentDeviceID); CeedChk_Cu(ceed,ierr);
if (deviceID >= 0 && currentDeviceID != deviceID) {
ierr = cudaSetDevice(deviceID); CeedChk_Cu(ceed,ierr);
currentDeviceID = deviceID;
}

struct cudaDeviceProp deviceProp;
ierr = cudaGetDeviceProperties(&deviceProp, deviceID); CeedChk_Cu(ceed,ierr);
ierr = cudaGetDeviceProperties(&deviceProp, currentDeviceID);
CeedChk_Cu(ceed,ierr);

Ceed_Cuda *data;
ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr);
data->deviceId = deviceID;
data->deviceId = currentDeviceID;
data->optblocksize = deviceProp.maxThreadsPerBlock;
return CEED_ERROR_SUCCESS;
}
Expand Down Expand Up @@ -210,6 +209,7 @@ static int CeedInit_Cuda(const char *resource, Ceed ceed) {
return CeedError(ceed, CEED_ERROR_BACKEND,
"Cuda backend cannot use resource: %s", resource);
// LCOV_EXCL_STOP
ierr = CeedSetDeterministic(ceed, true); CeedChk(ierr);

Ceed_Cuda *data;
ierr = CeedCalloc(1, &data); CeedChkBackend(ierr);
Expand Down
10 changes: 5 additions & 5 deletions backends/hip-gen/ceed-hip-gen.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,15 @@ static int CeedInit_Hip_gen(const char *resource, Ceed ceed) {
"Hip backend cannot use resource: %s", resource);
// LCOV_EXCL_STOP

Ceed ceedshared;
CeedInit("/gpu/hip/shared", &ceedshared);
ierr = CeedSetDelegate(ceed, ceedshared); CeedChkBackend(ierr);

Ceed_Hip_gen *data;
Ceed_Hip *data;
ierr = CeedCalloc(1, &data); CeedChkBackend(ierr);
ierr = CeedSetData(ceed, data); CeedChkBackend(ierr);
ierr = CeedHipInit(ceed, resource, nrc); CeedChkBackend(ierr);

Ceed ceedshared;
CeedInit("/gpu/hip/shared", &ceedshared);
ierr = CeedSetDelegate(ceed, ceedshared); CeedChkBackend(ierr);

const char fallbackresource[] = "/gpu/hip/ref";
ierr = CeedSetOperatorFallbackResource(ceed, fallbackresource);
CeedChkBackend(ierr);
Expand Down
4 changes: 0 additions & 4 deletions backends/hip-gen/ceed-hip-gen.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,6 @@ typedef struct {
void *d_c;
} CeedQFunction_Hip_gen;

typedef struct {
Ceed_Hip base;
} Ceed_Hip_gen;

CEED_INTERN int CeedQFunctionCreate_Hip_gen(CeedQFunction qf);

CEED_INTERN int CeedOperatorCreate_Hip_gen(CeedOperator op);
Expand Down
2 changes: 1 addition & 1 deletion backends/hip-shared/ceed-hip-shared-basis.c
Original file line number Diff line number Diff line change
Expand Up @@ -859,7 +859,7 @@ int CeedBasisApplyTensor_Hip_shared(CeedBasis basis, const CeedInt nelem,
int ierr;
Ceed ceed;
ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
Ceed_Hip_shared *ceed_Hip;
Ceed_Hip *ceed_Hip;
CeedGetData(ceed, &ceed_Hip); CeedChkBackend(ierr);
CeedBasis_Hip_shared *data;
CeedBasisGetData(basis, &data); CeedChkBackend(ierr);
Expand Down
10 changes: 5 additions & 5 deletions backends/hip-shared/ceed-hip-shared.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,15 @@ static int CeedInit_Hip_shared(const char *resource, Ceed ceed) {
// LCOV_EXCL_STOP
ierr = CeedSetDeterministic(ceed, true); CeedChkBackend(ierr);

Ceed ceedref;
CeedInit("/gpu/hip/ref", &ceedref);
ierr = CeedSetDelegate(ceed, ceedref); CeedChkBackend(ierr);

Ceed_Hip_shared *data;
Ceed_Hip *data;
ierr = CeedCalloc(1, &data); CeedChkBackend(ierr);
ierr = CeedSetData(ceed, data); CeedChkBackend(ierr);
ierr = CeedHipInit(ceed, resource, nrc); CeedChkBackend(ierr);

Ceed ceedref;
CeedInit("/gpu/hip/ref", &ceedref);
ierr = CeedSetDelegate(ceed, ceedref); CeedChkBackend(ierr);

ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "BasisCreateTensorH1",
CeedBasisCreateTensorH1_Hip_shared);
CeedChkBackend(ierr);
Expand Down
4 changes: 0 additions & 4 deletions backends/hip-shared/ceed-hip-shared.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,6 @@ typedef struct {
CeedScalar *d_qweight1d;
} CeedBasis_Hip_shared;

typedef struct {
Ceed_Hip base;
} Ceed_Hip_shared;

CEED_INTERN int CeedBasisCreateTensorH1_Hip_shared(CeedInt dim, CeedInt P1d,
CeedInt Q1d, const CeedScalar *interp1d, const CeedScalar *grad1d,
const CeedScalar *qref1d, const CeedScalar *qweight1d, CeedBasis basis);
Expand Down
14 changes: 8 additions & 6 deletions backends/hip/ceed-hip.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

#include <ceed/ceed.h>
#include <ceed/backend.h>
#include <stdarg.h>
#include <string.h>
#include <stdlib.h>
#include "ceed-hip.h"

//------------------------------------------------------------------------------
Expand All @@ -33,22 +33,23 @@ static int CeedGetPreferredMemType_Hip(CeedMemType *type) {
//------------------------------------------------------------------------------
int CeedHipInit(Ceed ceed, const char *resource, int nrc) {
int ierr;
const int rlen = strlen(resource);
const bool slash = (rlen>nrc) ? (resource[nrc] == '/') : false;
const int deviceID = (slash && rlen > nrc + 1) ? atoi(&resource[nrc + 1]) : -1;
const char *device_spec = strstr(resource, ":device_id=");
const int deviceID = (device_spec) ? atoi(device_spec+11) : -1;

int currentDeviceID;
ierr = hipGetDevice(&currentDeviceID); CeedChk_Hip(ceed,ierr);
if (deviceID >= 0 && currentDeviceID != deviceID) {
ierr = hipSetDevice(deviceID); CeedChk_Hip(ceed,ierr);
currentDeviceID = deviceID;
}

struct hipDeviceProp_t deviceProp;
ierr = hipGetDeviceProperties(&deviceProp, deviceID); CeedChk_Hip(ceed,ierr);
ierr = hipGetDeviceProperties(&deviceProp, currentDeviceID);
CeedChk_Hip(ceed,ierr);

Ceed_Hip *data;
ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr);
data->deviceId = deviceID;
data->deviceId = currentDeviceID;
data->optblocksize = 256;
return CEED_ERROR_SUCCESS;
}
Expand Down Expand Up @@ -93,6 +94,7 @@ static int CeedInit_Hip(const char *resource, Ceed ceed) {
return CeedError(ceed, CEED_ERROR_BACKEND,
"Hip backend cannot use resource: %s", resource);
// LCOV_EXCL_STOP
ierr = CeedSetDeterministic(ceed, true); CeedChk(ierr);

Ceed_Hip *data;
ierr = CeedCalloc(1, &data); CeedChkBackend(ierr);
Expand Down
24 changes: 22 additions & 2 deletions backends/magma/ceed-magma-det.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,38 @@

#include <ceed/ceed.h>
#include <ceed/backend.h>
#include <string.h>
#include <stdlib.h>
#include "ceed-magma.h"

CEED_INTERN int CeedInit_Magma_Det(const char *resource, Ceed ceed) {
int ierr;
if (strcmp(resource, "/gpu/cuda/magma/det")
&& strcmp(resource, "/gpu/hip/magma/det"))
const int nrc = 18; // number of characters in resource
if (strncmp(resource, "/gpu/cuda/magma/det", nrc)
&& strncmp(resource, "/gpu/hip/magma/det", nrc))
// LCOV_EXCL_START
return CeedError(ceed, CEED_ERROR_BACKEND,
"Magma backend cannot use resource: %s", resource);
// LCOV_EXCL_STOP
ierr = CeedSetDeterministic(ceed, true); CeedChkBackend(ierr);

Ceed_Magma *data;
ierr = CeedCalloc(sizeof(Ceed_Magma), &data); CeedChkBackend(ierr);
ierr = CeedSetData(ceed, data); CeedChkBackend(ierr);

// get/set device ID
const char *device_spec = strstr(resource, ":device_id=");
const int deviceID = (device_spec) ? atoi(device_spec+11) : -1;

int currentDeviceID;
magma_getdevice(&currentDeviceID);
if (deviceID >= 0 && currentDeviceID != deviceID) {
magma_setdevice(deviceID);
currentDeviceID = deviceID;
}
// create a queue that uses the null stream
data->device = currentDeviceID;

// Create reference CEED that implementation will be dispatched
// through unless overridden
Ceed ceedref;
Expand Down
1 change: 1 addition & 0 deletions backends/magma/ceed-magma-restriction.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include <ceed/ceed.h>
#include <ceed/backend.h>
#include <string.h>
#include "ceed-magma.h"

static int CeedElemRestrictionApply_Magma(CeedElemRestriction r,
Expand Down
37 changes: 25 additions & 12 deletions backends/magma/ceed-magma.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <ceed/ceed.h>
#include <ceed/backend.h>
#include <string.h>
#include <stdlib.h>
#include "ceed-magma.h"

static int CeedDestroy_Magma(Ceed ceed) {
Expand All @@ -30,22 +31,14 @@ static int CeedDestroy_Magma(Ceed ceed) {

static int CeedInit_Magma(const char *resource, Ceed ceed) {
int ierr;
if (strcmp(resource, "/gpu/cuda/magma") && strcmp(resource, "/gpu/hip/magma"))
const int nrc = 14; // number of characters in resource
if (strncmp(resource, "/gpu/cuda/magma", nrc)
&& strncmp(resource, "/gpu/hip/magma", nrc))
// LCOV_EXCL_START
return CeedError(ceed, CEED_ERROR_BACKEND,
"Magma backend cannot use resource: %s", resource);
// LCOV_EXCL_STOP

// Create reference CEED that implementation will be dispatched
// through unless overridden
Ceed ceedref;
#ifdef HAVE_HIP
CeedInit("/gpu/hip/ref", &ceedref);
#else
CeedInit("/gpu/cuda/ref", &ceedref);
#endif
ierr = CeedSetDelegate(ceed, ceedref); CeedChkBackend(ierr);

ierr = magma_init();
if (ierr)
// LCOV_EXCL_START
Expand All @@ -64,14 +57,34 @@ static int CeedInit_Magma(const char *resource, Ceed ceed) {
data->maxthreads[1] = 128; // for 2D kernels
data->maxthreads[2] = 64; // for 3D kernels

// get/set device ID
const char *device_spec = strstr(resource, ":device_id=");
const int deviceID = (device_spec) ? atoi(device_spec+11) : -1;

int currentDeviceID;
magma_getdevice(&currentDeviceID);
if (deviceID >= 0 && currentDeviceID != deviceID) {
magma_setdevice(deviceID);
currentDeviceID = deviceID;
}
// create a queue that uses the null stream
magma_getdevice( &(data->device) );
data->device = currentDeviceID;
#ifdef HAVE_HIP
magma_queue_create_from_hip(data->device, NULL, NULL, NULL, &(data->queue));
#else
magma_queue_create_from_cuda(data->device, NULL, NULL, NULL, &(data->queue));
#endif

// Create reference CEED that implementation will be dispatched
// through unless overridden
Ceed ceedref;
#ifdef HAVE_HIP
CeedInit("/gpu/hip/ref", &ceedref);
#else
CeedInit("/gpu/cuda/ref", &ceedref);
#endif
ierr = CeedSetDelegate(ceed, ceedref); CeedChkBackend(ierr);

ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "ElemRestrictionCreate",
CeedElemRestrictionCreate_Magma); CeedChkBackend(ierr);
ierr = CeedSetBackendFunction(ceed, "Ceed", ceed,
Expand Down