Skip to content

Commit

Permalink
Merge branch 'main' into yohann/logic-useCollograd
Browse files Browse the repository at this point in the history
* main: (63 commits)
  Fix t568 bug (#1065)
  ci: patch occa-1.1.0 for spurious sys/sysctl.h
  t568-operator: fix input order and make test handle nan; skip on broken backends
  ci: use Noether for CUDA, handle CUDA on Debian CUDA_DIR=/usr
  examples/petsc: fix missing PetscFunctionBegin
  Fluids - Blasius: Some Minor Modifications (#1063)
  ci - newer xsmm for ci
  Icl/magma ntgemm (#1060)
  Fluids - fix blasius test
  Fluids - Some fixes missing from #1039 (#1056)
  Freed orient's array
  fluids: Add STATIC and print-% to makefile
  fluids: Set solution time label for strong stg
  Fluids - Compressible Blasius boundary layer (#1039)
  doc: Update fluids state_var option
  fluids: Switch to StateVar enum
  JIT: free relative path variable after getting absolute path
  magma: free memory used in loading jit kernel source
  fluids: Use newtonian with StateFromQi*_t func pointers
  fluids: Use StateFromQi* for newt boundary QFs
  ...
  • Loading branch information
jedbrown committed Sep 6, 2022
2 parents fe667b8 + dbac3b9 commit 6251c4a
Show file tree
Hide file tree
Showing 174 changed files with 12,722 additions and 2,152 deletions.
35 changes: 16 additions & 19 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ noether-cpu:
- echo "-------------- HIPCC ---------------" && $HIPCC --version
- echo "-------------- GCOV ----------------" && gcov --version
# Libraries for backends
# -- LIBXSMM v1.17
- cd .. && export XSMM_VERSION=libxsmm-1.17 && { [[ -d $XSMM_VERSION ]] || { git clone --depth 1 --branch 1.17 https://github.com/hfp/libxsmm.git $XSMM_VERSION && make -C $XSMM_VERSION -j$(nproc); }; } && export XSMM_DIR=$PWD/$XSMM_VERSION && cd libCEED
- echo "-------------- LIBXSMM -------------" && git -C $XSMM_DIR describe --tags
# -- LIBXSMM 44433be9426eddaed88415646c15b3bcc61afc85
- cd .. && export XSMM_HASH=44433be9426eddaed88415646c15b3bcc61afc85 && { [[ -d libxsmm-$XSMM_HASH ]] || { curl -L https://github.com/libxsmm/libxsmm/archive/$XSMM_HASH.tar.gz -o xsmm.tar.gz && tar zvxf xsmm.tar.gz && rm xsmm.tar.gz && make -C libxsmm-$XSMM_HASH -j$(nproc); }; } && export XSMM_DIR=$PWD/libxsmm-$XSMM_HASH && cd libCEED
- echo "-------------- LIBXSMM -------------" && basename $XSMM_DIR
# -- OCCA v1.1.0
- cd .. && export OCCA_VERSION=occa-1.1.0 OCCA_OPENCL_ENABLED=0 && { [[ -d $OCCA_VERSION ]] || { git clone --depth 1 --branch v1.1.0 https://github.com/libocca/occa.git $OCCA_VERSION && make -C $OCCA_VERSION -j$(nproc); }; } && export OCCA_DIR=$PWD/$OCCA_VERSION && cd libCEED
- cd .. && export OCCA_VERSION=occa-1.1.0 OCCA_OPENCL_ENABLED=0 && { [[ -d $OCCA_VERSION ]] || { git clone --depth 1 --branch v1.1.0 https://github.com/libocca/occa.git $OCCA_VERSION && sed -i '/sysctl.h/d' $OCCA_VERSION/src/tools/sys.cpp && make -C $OCCA_VERSION -j$(nproc); }; } && export OCCA_DIR=$PWD/$OCCA_VERSION && cd libCEED
- echo "-------------- OCCA ----------------" && make -C $OCCA_DIR info
script:
- rm -f .SUCCESS
Expand Down Expand Up @@ -164,14 +164,14 @@ noether-float:
# -- MAGMA from dev branch
- echo "-------------- MAGMA ---------------"
- export MAGMA_DIR=/projects/hipMAGMA && git -C $MAGMA_DIR describe
# -- LIBXSMM v1.17
- cd .. && export XSMM_VERSION=libxsmm-1.17 && { [[ -d $XSMM_VERSION ]] || { git clone --depth 1 --branch 1.17 https://github.com/hfp/libxsmm.git $XSMM_VERSION && make -C $XSMM_VERSION -j$(nproc); }; } && export XSMM_DIR=$PWD/$XSMM_VERSION && cd libCEED
- echo "-------------- LIBXSMM -------------" && git -C $XSMM_DIR describe --tags
# -- LIBXSMM 44433be9426eddaed88415646c15b3bcc61afc85
- cd .. && export XSMM_HASH=44433be9426eddaed88415646c15b3bcc61afc85 && { [[ -d libxsmm-$XSMM_HASH ]] || { curl -L https://github.com/libxsmm/libxsmm/archive/$XSMM_HASH.tar.gz -o xsmm.tar.gz && tar zvxf xsmm.tar.gz && rm xsmm.tar.gz && make -C libxsmm-$XSMM_HASH -j$(nproc); }; } && export XSMM_DIR=$PWD/libxsmm-$XSMM_HASH && cd libCEED
- echo "-------------- LIBXSMM -------------" && basename $XSMM_DIR
script:
- rm -f .SUCCESS
# libCEED
# Change to single precision
- sed -i 's/ceed-f64/ceed-f32/1' include/ceed/ceed.h
- sed -i 's/ceed-f64/ceed-f32/1' include/ceed/types.h
# Build libCEED
- make configure HIP_DIR=/opt/rocm OPT='-O -march=native -ffp-contract=fast'
- BACKENDS_CPU=$(make info-backends-all | grep -o '/cpu[^ ]*') && BACKENDS_GPU=$(make info-backends | grep -o '/gpu[^ ]*')
Expand All @@ -198,35 +198,32 @@ noether-float:
bash <(curl -s https://codecov.io/bash) -f coverage.info -t ${CODECOV_ACCESS_TOKEN} -F tests;
bash <(curl -s https://codecov.io/bash) -f coverage.info -t ${CODECOV_ACCESS_TOKEN} -F examples;
fi
- sed -i 's/ceed-f32/ceed-f64/1' include/ceed/ceed.h
- sed -i 's/ceed-f32/ceed-f64/1' include/ceed/types.h
artifacts:
paths:
- build/*.junit
reports:
junit: build/*.junit
performance: performance.json

lv-cuda:
noether-cuda:
stage: test:gpu-and-float
tags:
- cuda
interruptible: true
before_script:
# Environment
- ulimit -v $[1024*1024*32] # 32 GiB in units of 1024 bytes
- . /opt/rh/gcc-toolset-11/enable
- export COVERAGE=1 CC=gcc CXX=g++ FC=gfortran
- export CUDA_DIR=/usr/local/cuda-11.6
- export CUDA_VISIBLE_DEVICES=GPU-c4529365-8229-f689-b43d-ccd7f1677079 # our RTX 2080 Super via nvidia-smi -L
- echo "-------------- nproc ---------------" && NPROC_CPU=$(nproc) && NPROC_GPU=4 && echo "NPROC_CPU" $NPROC_CPU && echo "NPROC_GPU" $NPROC_GPU
- export COVERAGE=1 CC=gcc CXX=g++ FC=gfortran NVCC=nvcc
- echo "-------------- nproc ---------------" && NPROC_CPU=$(nproc) && NPROC_GPU=$(($(nproc)<8?$(nproc):8)) && echo "NPROC_CPU" $NPROC_CPU && echo "NPROC_GPU" $NPROC_GPU
- echo "-------------- CC ------------------" && $CC --version
- echo "-------------- CXX -----------------" && $CXX --version
- echo "-------------- FC ------------------" && $FC --version
- echo "-------------- NVCC ----------------" && $CUDA_DIR/bin/nvcc --version
- echo "-------------- NVCC ----------------" && $NVCC --version
- echo "-------------- GCOV ----------------" && gcov --version
script:
- rm -f .SUCCESS
# libCEED
- make configure OPT='-O -march=native -ffp-contract=fast'
- make configure OPT='-O -march=native -ffp-contract=fast' CUDA_DIR=/usr
- echo "-------------- libCEED -------------" && make info
- BACKENDS_GPU=$(make info-backends | grep -o '/gpu[^ ]*')
- echo "-------------- BACKENDS_GPU --------" && echo $BACKENDS_GPU
Expand All @@ -238,7 +235,7 @@ lv-cuda:
- nice make -k -j$NPROC_GPU BACKENDS="$BACKENDS_GPU" JUNIT_BATCH="cuda" junit realsearch=%
# Libraries for examples
# -- PETSc with CUDA (minimal)
- export PETSC_DIR=/home/jeth8984/petsc PETSC_ARCH=cuda-O && git -C $PETSC_DIR describe
- export PETSC_DIR=/projects/petsc PETSC_ARCH=mpich-cuda-O PETSC_OPTIONS='-use_gpu_aware_mpi 0' && git -C $PETSC_DIR describe
- echo "-------------- PETSc ---------------" && make -C $PETSC_DIR info
- nice make -k -j$NPROC_GPU JUNIT_BATCH="cuda" junit BACKENDS="$BACKENDS_GPU" search="petsc fluids solids"
# Report status
Expand Down
6 changes: 3 additions & 3 deletions .mailmap
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ Leila Ghaffari <Leila.Ghaffari@colorado.edu> <leila@Leilas-MacBook-Pr
Natalie Beams <nbeams@icl.utk.edu> <246972+nbeams@users.noreply.github.com>
Rey Koki <rey.koki@colorado.edu> <36133157+reykoki@users.noreply.github.com>
Rezgar Shakeri <Rezgar.Shakeri@colorado.edu> <42816410+rezgarshakeri@users.noreply.github.com>
Thilina Ratnayaka <thilinarmtb@gmail.com> <thilinarmtb@users.noreply.github.com>
Thilina Ratnayaka <thilinarmtb@gmail.com> <thilinarmtb@users.noreply.github.com>
Tzanio Kolev <tzanio@llnl.gov>
Valeria Barra <valeriabarra21@gmail.com>
Valeria Barra <valeriabarra21@gmail.com> <39932030+valeriabarra@users.noreply.github.com>
Valeria Barra <valeriabarra21@gmail.com> <vaba3353@shas0136.rc.int.colorado.edu>
Valeria Barra <valeriabarra21@gmail.com> <vaba3353@shas0137.rc.int.colorado.edu>
Valeria Barra <valeriabarra21@gmail.com> <valeria.barra@colorado.edu>
Will Pazner <will.e.p@gmail.com> <11493037+pazner@users.noreply.github.com>
Yohann Dudouit <yohann.dudouit@gmail.com>
Yohann Dudouit <yohann.dudouit@gmail.com> <dudouit1@llnl.gov>
Yohann Dudouit <dudouit1@llnl.gov>
Yohann Dudouit <dudouit1@llnl.gov> <yohann.dudouit@gmail.com>
24 changes: 17 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ endif
AFLAGS = -fsanitize=address #-fsanitize=undefined -fno-omit-frame-pointer

# Note: Intel oneAPI C/C++ compiler is now icx/icpx
CC_VENDOR := $(subst oneAPI,icc,$(firstword $(filter gcc clang icc oneAPI XL,$(subst -, ,$(shell $(CC) --version)))))
CC_VENDOR := $(subst icc_orig,icc,$(subst oneAPI,icc,$(firstword $(filter gcc clang icc icc_orig oneAPI XL,$(subst -, ,$(shell $(CC) --version))))))
FC_VENDOR := $(if $(FC),$(firstword $(filter GNU ifort XL,$(shell $(FC) --version 2>&1 || $(FC) -qversion))))

# Default extra flags by vendor
Expand Down Expand Up @@ -224,6 +224,7 @@ opt.c := $(sort $(wildcard backends/opt/*.c))
avx.c := $(sort $(wildcard backends/avx/*.c))
xsmm.c := $(sort $(wildcard backends/xsmm/*.c))
cuda.c := $(sort $(wildcard backends/cuda/*.c))
cuda.cpp := $(sort $(wildcard backends/cuda/*.cpp))
cuda-ref.c := $(sort $(wildcard backends/cuda-ref/*.c))
cuda-ref.cpp := $(sort $(wildcard backends/cuda-ref/*.cpp))
cuda-ref.cu := $(sort $(wildcard backends/cuda-ref/kernels/*.cu))
Expand All @@ -234,6 +235,7 @@ cuda-gen.cpp := $(sort $(wildcard backends/cuda-gen/*.cpp))
cuda-gen.cu := $(sort $(wildcard backends/cuda-gen/kernels/*.cu))
occa.cpp := $(sort $(shell find backends/occa -type f -name *.cpp))
magma.c := $(sort $(wildcard backends/magma/*.c))
magma.cpp := $(sort $(wildcard backends/magma/*.cpp))
magma.cu := $(sort $(wildcard backends/magma/kernels/cuda/*.cu))
magma.hip := $(sort $(wildcard backends/magma/kernels/hip/*.hip.cpp))
hip.c := $(sort $(wildcard backends/hip/*.c))
Expand Down Expand Up @@ -380,8 +382,10 @@ ifneq ($(wildcard $(OCCA_DIR)/lib/libocca.*),)
endif

# CUDA Backends
CUDA_LIB_DIR := $(wildcard $(foreach d,lib lib64,$(CUDA_DIR)/$d/libcudart.${SO_EXT}))
CUDA_LIB_DIR := $(patsubst %/,%,$(dir $(firstword $(CUDA_LIB_DIR))))
ifneq ($(CUDA_DIR),)
CUDA_LIB_DIR := $(wildcard $(foreach d,lib lib64 lib/x86_64-linux-gnu,$(CUDA_DIR)/$d/libcudart.${SO_EXT}))
CUDA_LIB_DIR := $(patsubst %/,%,$(dir $(firstword $(CUDA_LIB_DIR))))
endif
CUDA_LIB_DIR_STUBS := $(CUDA_LIB_DIR)/stubs
CUDA_BACKENDS = /gpu/cuda/ref /gpu/cuda/shared /gpu/cuda/gen
ifneq ($(CUDA_LIB_DIR),)
Expand All @@ -391,7 +395,7 @@ ifneq ($(CUDA_LIB_DIR),)
LIBCEED_CONTAINS_CXX = 1
libceed.c += interface/ceed-cuda.c
libceed.c += $(cuda.c) $(cuda-ref.c) $(cuda-shared.c) $(cuda-gen.c)
libceed.cpp += $(cuda-ref.cpp) $(cuda-gen.cpp)
libceed.cpp += $(cuda.cpp) $(cuda-ref.cpp) $(cuda-gen.cpp)
libceed.cu += $(cuda-ref.cu) $(cuda-shared.cu) $(cuda-gen.cu)
BACKENDS_MAKE += $(CUDA_BACKENDS)
endif
Expand Down Expand Up @@ -426,9 +430,11 @@ ifneq ($(wildcard $(MAGMA_DIR)/lib/libmagma.*),)
magma_link_shared = -L$(MAGMA_DIR)/lib -Wl,-rpath,$(abspath $(MAGMA_DIR)/lib) -lmagma
magma_link := $(if $(wildcard $(MAGMA_DIR)/lib/libmagma.${SO_EXT}),$(magma_link_shared),$(magma_link_static))
PKG_LIBS += $(magma_link)
libceed.c += $(magma.c)
libceed.cu += $(magma.cu)
libceed.c += $(magma.c)
libceed.cpp += $(magma.cpp)
libceed.cu += $(magma.cu)
$(magma.c:%.c=$(OBJDIR)/%.o) $(magma.c:%=%.tidy) : CPPFLAGS += -DADD_ -I$(MAGMA_DIR)/include -I$(CUDA_DIR)/include
$(magma.cpp:%.cpp=$(OBJDIR)/%.o) $(magma.cpp:%=%.tidy) : CPPFLAGS += -DADD_ -I$(MAGMA_DIR)/include -I$(CUDA_DIR)/include
$(magma.cu:%.cu=$(OBJDIR)/%.o) : CPPFLAGS += --compiler-options=-fPIC -DADD_ -I$(MAGMA_DIR)/include -I$(MAGMA_DIR)/magmablas -I$(CUDA_DIR)/include
MAGMA_BACKENDS = /gpu/cuda/magma /gpu/cuda/magma/det
endif
Expand All @@ -440,12 +446,15 @@ ifneq ($(wildcard $(MAGMA_DIR)/lib/libmagma.*),)
magma_link_shared = -L$(MAGMA_DIR)/lib -Wl,-rpath,$(abspath $(MAGMA_DIR)/lib) -lmagma
magma_link := $(if $(wildcard $(MAGMA_DIR)/lib/libmagma.${SO_EXT}),$(magma_link_shared),$(magma_link_static))
PKG_LIBS += $(magma_link)
libceed.c += $(magma.c)
libceed.c += $(magma.c)
libceed.cpp += $(magma.cpp)
libceed.hip += $(magma.hip)
ifneq ($(CXX), $(HIPCC))
$(magma.c:%.c=$(OBJDIR)/%.o) $(magma.c:%=%.tidy) : CPPFLAGS += -I$(MAGMA_DIR)/include -I$(HIP_DIR)/include -DCEED_MAGMA_USE_HIP -DADD_
$(magma.cpp:%.cpp=$(OBJDIR)/%.o) $(magma.cpp:%=%.tidy) : CPPFLAGS += -I$(MAGMA_DIR)/include -I$(HIP_DIR)/include -DCEED_MAGMA_USE_HIP -DADD_
else
$(magma.c:%.c=$(OBJDIR)/%.o) $(magma.c:%=%.tidy) : HIPCCFLAGS += -I$(MAGMA_DIR)/include -I$(HIP_DIR)/include -DCEED_MAGMA_USE_HIP -DADD_
$(magma.cpp:%.cpp=$(OBJDIR)/%.o) $(magma.cpp:%=%.tidy) : HIPCCFLAGS += -I$(MAGMA_DIR)/include -I$(HIP_DIR)/include -DCEED_MAGMA_USE_HIP -DADD_
endif
$(magma.hip:%.hip.cpp=$(OBJDIR)/%.o) : HIPCCFLAGS += -I$(MAGMA_DIR)/include -I$(MAGMA_DIR)/magmablas -I$(HIP_DIR)/include -DCEED_MAGMA_USE_HIP -DADD_
MAGMA_BACKENDS = /gpu/hip/magma /gpu/hip/magma/det
Expand Down Expand Up @@ -635,6 +644,7 @@ install : $(libceed) $(OBJDIR)/ceed.pc
"$(includedir)/ceed/jit-source/cuda/" "$(includedir)/ceed/jit-source/hip/"\
"$(includedir)/ceed/jit-source/gallery/" "$(libdir)" "$(pkgconfigdir)")
$(INSTALL_DATA) include/ceed/ceed.h "$(DESTDIR)$(includedir)/ceed/"
$(INSTALL_DATA) include/ceed/types.h "$(DESTDIR)$(includedir)/ceed/"
$(INSTALL_DATA) include/ceed/ceed-f32.h "$(DESTDIR)$(includedir)/ceed/"
$(INSTALL_DATA) include/ceed/ceed-f64.h "$(DESTDIR)$(includedir)/ceed/"
$(INSTALL_DATA) include/ceed/fortran.h "$(DESTDIR)$(includedir)/ceed/"
Expand Down
5 changes: 0 additions & 5 deletions backends/cuda-gen/ceed-cuda-gen-operator-build.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -808,11 +808,6 @@ extern "C" int CeedCudaGenOperatorBuild(CeedOperator op) {
string oper;
oper = "CeedKernel_Cuda_gen_" + qFunctionName;

code << "\n#define CEED_QFUNCTION(name) inline __device__ int name\n";
code << "#define CEED_QFUNCTION_HELPER inline __device__\n";
code << "#define CeedPragmaSIMD\n";
code << "#define CEED_ERROR_SUCCESS 0\n\n";

// Find dim and Q1d
bool useCollograd = false;
// Only use collocated gradient algorithm when we actually compute a gradient.
Expand Down
6 changes: 0 additions & 6 deletions backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,6 @@ extern "C" int CeedCudaBuildQFunction(CeedQFunction qf) {
ostringstream code;

// Defintions
code << "\n#define CEED_QFUNCTION(name) inline __device__ int name\n";
code << "#define CEED_QFUNCTION_HELPER inline __device__\n";
code << "#define CeedPragmaSIMD\n";
code << "#define CEED_ERROR_SUCCESS 0\n";
code << "#define CEED_Q_VLA 1\n\n";
code << "typedef struct { const CeedScalar* inputs[16]; CeedScalar* outputs[16]; } Fields_Cuda;\n";
code << read_write;
code << qfunction_source;
code << "\n";
Expand Down
2 changes: 1 addition & 1 deletion backends/cuda/ceed-cuda-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

#define CeedChk_Cu(ceed, x) \
do { \
CUresult cuda_result = x; \
CUresult cuda_result = (CUresult)x; \
if (cuda_result != CUDA_SUCCESS) { \
const char *msg; \
cuGetErrorName(cuda_result, &msg); \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,19 @@

#include <ceed/ceed.h>
#include <ceed/backend.h>
#include <ceed/jit-tools.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <nvrtc.h>
#include <sstream>
#include <stdarg.h>
#include <string.h>
#include "ceed-cuda-common.h"
#include "ceed-cuda-compile.h"

#define CeedChk_Nvrtc(ceed, x) \
do { \
nvrtcResult result = x; \
nvrtcResult result = static_cast<nvrtcResult>(x); \
if (result != NVRTC_SUCCESS) \
return CeedError((ceed), CEED_ERROR_BACKEND, nvrtcGetErrorString(result)); \
} while (0)
Expand All @@ -25,50 +28,60 @@ do { \
// Compile CUDA kernel
//------------------------------------------------------------------------------
int CeedCompileCuda(Ceed ceed, const char *source, CUmodule *module,
const CeedInt num_opts, ...) {
const CeedInt num_defines, ...) {
int ierr;
cudaFree(0); // Make sure a Context exists for nvrtc
nvrtcProgram prog;
CeedChk_Nvrtc(ceed, nvrtcCreateProgram(&prog, source, NULL, 0, NULL, NULL));

std::ostringstream code;

// Get kernel specific options, such as kernel constants
const int opts_len = 32;
const int opts_extra = 4;
const char *opts[num_opts + opts_extra];
char buf[num_opts][opts_len];
if (num_opts > 0) {
if (num_defines > 0) {
va_list args;
va_start(args, num_opts);
va_start(args, num_defines);
char *name;
int val;
for (int i = 0; i < num_opts; i++) {
for (int i = 0; i < num_defines; i++) {
name = va_arg(args, char *);
val = va_arg(args, int);
snprintf(&buf[i][0], opts_len,"-D%s=%d", name, val);
opts[i] = &buf[i][0];
code << "#define " << name << " " << val << "\n";
}
va_end(args);
}

// Standard backend options
if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) {
opts[num_opts] = "-DCeedScalar=float";
} else {
opts[num_opts] = "-DCeedScalar=double";
}
opts[num_opts + 1] = "-DCeedInt=int";
opts[num_opts + 2] = "-default-device";
// Standard libCEED definitions for CUDA backends
char *jit_defs_path, *jit_defs_source;
ierr = CeedGetJitAbsolutePath(ceed,
"ceed/jit-source/cuda/cuda-jit.h",
&jit_defs_path); CeedChkBackend(ierr);
ierr = CeedLoadSourceToBuffer(ceed, jit_defs_path, &jit_defs_source);
CeedChkBackend(ierr);
code << jit_defs_source;
code << "\n\n";
ierr = CeedFree(&jit_defs_path); CeedChkBackend(ierr);
ierr = CeedFree(&jit_defs_source); CeedChkBackend(ierr);

// Non-macro options
const int num_opts = 3;
const char *opts[num_opts];
opts[0] = "-default-device";
struct cudaDeviceProp prop;
Ceed_Cuda *ceed_data;
ierr = CeedGetData(ceed, &ceed_data); CeedChkBackend(ierr);
ierr = cudaGetDeviceProperties(&prop, ceed_data->device_id);
CeedChk_Cu(ceed, ierr);
char buff[opts_len];
snprintf(buff, opts_len,"-arch=compute_%d%d", prop.major, prop.minor);
opts[num_opts + 3] = buff;
std::string arch_arg = "-arch=compute_" + std::to_string(prop.major) + std::to_string(prop.minor);
opts[1] = arch_arg.c_str();
opts[2] = "-Dint32_t=int";

// Add string source argument provided in call
code << source;

// Create Program
CeedChk_Nvrtc(ceed, nvrtcCreateProgram(&prog, code.str().c_str(), NULL, 0, NULL, NULL));

// Compile kernel
nvrtcResult result = nvrtcCompileProgram(prog, num_opts + opts_extra, opts);
nvrtcResult result = nvrtcCompileProgram(prog, num_opts, opts);
if (result != NVRTC_SUCCESS) {
size_t log_size;
CeedChk_Nvrtc(ceed, nvrtcGetProgramLogSize(prog, &log_size));
Expand Down
2 changes: 1 addition & 1 deletion backends/cuda/ceed-cuda-compile.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ static inline CeedInt CeedDivUpInt(CeedInt numerator, CeedInt denominator) {
}

CEED_INTERN int CeedCompileCuda(Ceed ceed, const char *source, CUmodule *module,
const CeedInt num_opts, ...);
const CeedInt num_defines, ...);

CEED_INTERN int CeedGetKernelCuda(Ceed ceed, CUmodule module, const char *name,
CUfunction *kernel);
Expand Down
5 changes: 0 additions & 5 deletions backends/hip-gen/ceed-hip-gen-operator-build.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -807,11 +807,6 @@ extern "C" int CeedHipGenOperatorBuild(CeedOperator op) {
string oper;
oper = "CeedKernel_Hip_gen_" + qFunctionName;

code << "\n#define CEED_QFUNCTION(name) inline __device__ int name\n";
code << "#define CEED_QFUNCTION_HELPER __device__ __forceinline__\n";
code << "#define CeedPragmaSIMD\n";
code << "#define CEED_ERROR_SUCCESS 0\n\n";

// Find dim and Q1d
bool useCollograd = false;
// Only use collocated gradient algorithm when we actually compute a gradient.
Expand Down
6 changes: 0 additions & 6 deletions backends/hip-ref/ceed-hip-ref-qfunction-load.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,6 @@ extern "C" int CeedHipBuildQFunction(CeedQFunction qf) {
ostringstream code;

// Defintions
code << "\n#define CEED_QFUNCTION(name) inline __device__ int name\n";
code << "#define CEED_QFUNCTION_HELPER __device__ __forceinline__\n";
code << "#define CeedPragmaSIMD\n";
code << "#define CEED_ERROR_SUCCESS 0\n";
code << "#define CEED_Q_VLA 1\n\n";
code << "typedef struct { const CeedScalar* inputs[16]; CeedScalar* outputs[16]; } Fields_Hip;\n";
code << read_write;
code << qfunction_source;
code << "\n";
Expand Down
1 change: 0 additions & 1 deletion backends/hip-ref/ceed-hip-ref-vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
#include <ceed/ceed.h>
#include <ceed/backend.h>
#include <hip/hip_runtime.h>
#include <hipblas.h>
#include <math.h>
#include <string.h>
#include "ceed-hip-ref.h"
Expand Down
1 change: 0 additions & 1 deletion backends/hip-ref/ceed-hip-ref.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#include <ceed/ceed.h>
#include <ceed/backend.h>
#include <hip/hip_runtime.h>
#include <hipblas.h>
#include "../hip/ceed-hip-common.h"

typedef struct {
Expand Down
4 changes: 4 additions & 0 deletions backends/hip/ceed-hip-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@
#include <ceed/ceed.h>
#include <ceed/backend.h>
#include <hip/hip_runtime.h>
#if (HIP_VERSION >= 50200000)
#include <hipblas/hipblas.h>
#else
#include <hipblas.h>
#endif

#define QUOTE(...) #__VA_ARGS__

Expand Down

0 comments on commit 6251c4a

Please sign in to comment.