From ebf03873c810261319d27bd7aebb7b7138b2f85a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 6 Mar 2024 12:10:34 +0000 Subject: [PATCH] Replace Divide() with IndexToDims() --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CudaLocalTransformation.cu | 104 +++++----- .../cuda/CudaLocalTransformationKernels.cu | 194 ++++++++---------- reg-lib/cuda/_reg_ssd_gpu.cu | 4 +- 4 files changed, 136 insertions(+), 168 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 816d01be..1bb7ac53 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -420 +421 diff --git a/reg-lib/cuda/CudaLocalTransformation.cu b/reg-lib/cuda/CudaLocalTransformation.cu index ce733da6..8e901204 100644 --- a/reg-lib/cuda/CudaLocalTransformation.cu +++ b/reg-lib/cuda/CudaLocalTransformation.cu @@ -26,8 +26,8 @@ void GetDeformationField(const nifti_image *controlPointImage, const int *maskCuda, const size_t activeVoxelNumber) { const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); - const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); - const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); + const int3 referenceImageDims = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); + const int3 controlPointImageDims = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / referenceImage->dx, controlPointImage->dy / referenceImage->dy, controlPointImage->dz / referenceImage->dz); @@ -46,12 +46,12 @@ void GetDeformationField(const nifti_image *controlPointImage, if (referenceImage->nz > 1) { thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [=]__device__(const int index) { GetDeformationField3d(deformationFieldCuda, controlPointTexture, realToVoxelCuda, - referenceImageDim, controlPointImageDim, controlPointVoxelSpacing, index); + referenceImageDims, controlPointImageDims, controlPointVoxelSpacing, index); }); } else { thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [=]__device__(const int index) { GetDeformationField2d(deformationFieldCuda, controlPointTexture, realToVoxelCuda, - referenceImageDim, controlPointImageDim, controlPointVoxelSpacing, index); + referenceImageDims, controlPointImageDims, controlPointVoxelSpacing, index); }); } } @@ -82,23 +82,23 @@ struct SecondDerivative { template __device__ SecondDerivative GetApproxSecondDerivative(const int index, cudaTextureObject_t controlPointTexture, - const int3 controlPointImageDim, + const int3 controlPointImageDims, const Basis2nd basis) { - const auto [x, y, z] = IndexToDims(index, controlPointImageDim); - if (!isGradient && (x < 1 || x >= controlPointImageDim.x - 1 || - y < 1 || y >= controlPointImageDim.y - 1 || - (is3d && (z < 1 || z >= controlPointImageDim.z - 1)))) return {}; + const auto [x, y, z] = IndexToDims(index, controlPointImageDims); + if (!isGradient && (x < 1 || x >= controlPointImageDims.x - 1 || + y < 1 || y >= controlPointImageDims.y - 1 || + (is3d && (z < 1 || z >= controlPointImageDims.z - 1)))) return {}; SecondDerivative secondDerivative{}; if constexpr (is3d) { for (int c = z - 1, basInd = 0; c < z + 2; c++) { - if (isGradient && (c < 0 || c >= controlPointImageDim.z)) { basInd += 9; continue; } - const int indexZ = c * controlPointImageDim.y; + if (isGradient && (c < 0 || c >= controlPointImageDims.z)) { basInd += 9; continue; } + const int indexZ = c * controlPointImageDims.y; for (int b = y - 1; b < y + 2; b++) { - if (isGradient && (b < 0 || b >= controlPointImageDim.y)) { basInd += 3; continue; } - int indexXYZ = (indexZ + b) * controlPointImageDim.x + x - 1; + if (isGradient && (b < 0 || b >= controlPointImageDims.y)) { basInd += 3; continue; } + int indexXYZ = (indexZ + b) * controlPointImageDims.x + x - 1; for (int a = x - 1; a < x + 2; a++, basInd++, indexXYZ++) { - if (isGradient && (a < 0 || a >= controlPointImageDim.x)) continue; + if (isGradient && (a < 0 || a >= controlPointImageDims.x)) continue; const float3 controlPointValue = make_float3(tex1Dfetch(controlPointTexture, indexXYZ)); secondDerivative.xx = secondDerivative.xx + basis.xx[basInd] * controlPointValue; secondDerivative.yy = secondDerivative.yy + basis.yy[basInd] * controlPointValue; @@ -111,10 +111,10 @@ __device__ SecondDerivative GetApproxSecondDerivative(const int index, } } else { for (int b = y - 1, basInd = 0; b < y + 2; b++) { - if (isGradient && (b < 0 || b >= controlPointImageDim.y)) { basInd += 3; continue; } - int indexXY = b * controlPointImageDim.x + x - 1; + if (isGradient && (b < 0 || b >= controlPointImageDims.y)) { basInd += 3; continue; } + int indexXY = b * controlPointImageDims.x + x - 1; for (int a = x - 1; a < x + 2; a++, basInd++, indexXY++) { - if (isGradient && (a < 0 || a >= controlPointImageDim.x)) continue; + if (isGradient && (a < 0 || a >= controlPointImageDims.x)) continue; const float2 controlPointValue = make_float2(tex1Dfetch(controlPointTexture, indexXY)); secondDerivative.xx = secondDerivative.xx + basis.xx[basInd] * controlPointValue; secondDerivative.yy = secondDerivative.yy + basis.yy[basInd] * controlPointValue; @@ -128,7 +128,7 @@ __device__ SecondDerivative GetApproxSecondDerivative(const int index, template double ApproxBendingEnergy(const nifti_image *controlPointImage, const float4 *controlPointImageCuda) { const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); - const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); + const int3 controlPointImageDims = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4); auto controlPointTexture = *controlPointTexturePtr; @@ -141,7 +141,7 @@ double ApproxBendingEnergy(const nifti_image *controlPointImage, const float4 *c thrust::counting_iterator index(0); return thrust::transform_reduce(thrust::device, index, index + controlPointNumber, [=]__device__(const int index) { - const auto secondDerivative = GetApproxSecondDerivative(index, controlPointTexture, controlPointImageDim, basis); + const auto secondDerivative = GetApproxSecondDerivative(index, controlPointTexture, controlPointImageDims, basis); if constexpr (is3d) return (Square(secondDerivative.xx.x) + Square(secondDerivative.xx.y) + Square(secondDerivative.xx.z) + Square(secondDerivative.yy.x) + Square(secondDerivative.yy.y) + Square(secondDerivative.yy.z) + @@ -163,7 +163,7 @@ void ApproxBendingEnergyGradient(nifti_image *controlPointImage, float4 *transGradientCuda, float bendingEnergyWeight) { const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); - const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); + const int3 controlPointImageDims = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4); auto controlPointTexture = *controlPointTexturePtr; @@ -180,8 +180,8 @@ void ApproxBendingEnergyGradient(nifti_image *controlPointImage, thrust::device_vector::TextureType> secondDerivativesCudaVec((is3d ? 6 : 3) * controlPointNumber); auto secondDerivativesCuda = secondDerivativesCudaVec.data().get(); thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), controlPointNumber, - [controlPointTexture, controlPointImageDim, basis, secondDerivativesCuda]__device__(const int index) { - const auto secondDerivative = GetApproxSecondDerivative(index, controlPointTexture, controlPointImageDim, basis); + [controlPointTexture, controlPointImageDims, basis, secondDerivativesCuda]__device__(const int index) { + const auto secondDerivative = GetApproxSecondDerivative(index, controlPointTexture, controlPointImageDims, basis); if constexpr (is3d) { int derInd = 6 * index; secondDerivativesCuda[derInd++] = make_float4(secondDerivative.xx); @@ -205,18 +205,18 @@ void ApproxBendingEnergyGradient(nifti_image *controlPointImage, // Compute the gradient const float approxRatio = bendingEnergyWeight / (float)controlPointNumber; thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), controlPointNumber, - [controlPointImageDim, basis, secondDerivativesTexture, transGradientCuda, approxRatio]__device__(const int index) { - const auto [x, y, z] = IndexToDims(index, controlPointImageDim); + [controlPointImageDims, basis, secondDerivativesTexture, transGradientCuda, approxRatio]__device__(const int index) { + const auto [x, y, z] = IndexToDims(index, controlPointImageDims); typename SecondDerivative::Type gradientValue{}; if constexpr (is3d) { for (int c = z - 1, basInd = 0; c < z + 2; c++) { - if (c < 0 || c >= controlPointImageDim.z) { basInd += 9; continue; } - const int indexZ = c * controlPointImageDim.y; + if (c < 0 || c >= controlPointImageDims.z) { basInd += 9; continue; } + const int indexZ = c * controlPointImageDims.y; for (int b = y - 1; b < y + 2; b++) { - if (b < 0 || b >= controlPointImageDim.y) { basInd += 3; continue; } - int indexXYZ = ((indexZ + b) * controlPointImageDim.x + x - 1) * 6; + if (b < 0 || b >= controlPointImageDims.y) { basInd += 3; continue; } + int indexXYZ = ((indexZ + b) * controlPointImageDims.x + x - 1) * 6; for (int a = x - 1; a < x + 2; a++, basInd++) { - if (a < 0 || a >= controlPointImageDim.x) { indexXYZ += 6; continue; } + if (a < 0 || a >= controlPointImageDims.x) { indexXYZ += 6; continue; } const float3 secondDerivativeXX = make_float3(tex1Dfetch(secondDerivativesTexture, indexXYZ++)); gradientValue = gradientValue + secondDerivativeXX * basis.xx[basInd]; const float3 secondDerivativeYY = make_float3(tex1Dfetch(secondDerivativesTexture, indexXYZ++)); @@ -234,10 +234,10 @@ void ApproxBendingEnergyGradient(nifti_image *controlPointImage, } } else { for (int b = y - 1, basInd = 0; b < y + 2; b++) { - if (b < 0 || b >= controlPointImageDim.y) { basInd += 3; continue; } - int indexXY = (b * controlPointImageDim.x + x - 1) * 3; + if (b < 0 || b >= controlPointImageDims.y) { basInd += 3; continue; } + int indexXY = (b * controlPointImageDims.x + x - 1) * 3; for (int a = x - 1; a < x + 2; a++, basInd++) { - if (a < 0 || a >= controlPointImageDim.x) { indexXY += 3; continue; } + if (a < 0 || a >= controlPointImageDims.x) { indexXY += 3; continue; } const float2 secondDerivativeXX = tex1Dfetch(secondDerivativesTexture, indexXY++); gradientValue = gradientValue + secondDerivativeXX * basis.xx[basInd]; const float2 secondDerivativeYY = tex1Dfetch(secondDerivativesTexture, indexXY++); @@ -266,7 +266,7 @@ void ComputeApproxJacobianValues(const nifti_image *controlPointImage, float *jacobianDetCuda) { auto blockSize = CudaContext::GetBlockSize(); const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); - const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); + const int3 controlPointImageDims = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4); // Need to reorient the Jacobian matrix using the header information - real to voxel conversion @@ -279,7 +279,7 @@ void ComputeApproxJacobianValues(const nifti_image *controlPointImage, const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); GetApproxJacobianValues3d<<>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture, - controlPointImageDim, (unsigned)controlPointNumber, reorientation); + controlPointImageDims, (unsigned)controlPointNumber, reorientation); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { const unsigned blocks = blockSize->GetApproxJacobianValues2d; @@ -287,7 +287,7 @@ void ComputeApproxJacobianValues(const nifti_image *controlPointImage, const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); GetApproxJacobianValues2d<<>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture, - controlPointImageDim, (unsigned)controlPointNumber, reorientation); + controlPointImageDims, (unsigned)controlPointNumber, reorientation); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } } @@ -300,8 +300,8 @@ void ComputeJacobianValues(const nifti_image *controlPointImage, auto blockSize = CudaContext::GetBlockSize(); const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); - const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); - const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); + const int3 referenceImageDims = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); + const int3 controlPointImageDims = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz); auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4); @@ -317,7 +317,7 @@ void ComputeJacobianValues(const nifti_image *controlPointImage, // 8 floats of shared memory are allocated per thread const unsigned sharedMemSize = blocks * 8 * sizeof(float); GetJacobianValues3d<<>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture, - controlPointImageDim, controlPointSpacing, referenceImageDim, + controlPointImageDims, controlPointSpacing, referenceImageDims, (unsigned)voxelNumber, reorientation); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { @@ -326,7 +326,7 @@ void ComputeJacobianValues(const nifti_image *controlPointImage, const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); GetJacobianValues2d<<>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture, - controlPointImageDim, controlPointSpacing, referenceImageDim, + controlPointImageDims, controlPointSpacing, referenceImageDims, (unsigned)voxelNumber, reorientation); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } @@ -401,7 +401,7 @@ void GetJacobianPenaltyTermGradient(const nifti_image *referenceImage, const mat33 reorientation = Mat44ToMat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_ijk : &controlPointImage->qto_ijk); const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); - const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); + const int3 controlPointImageDims = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz); const float3 weight = make_float3(referenceImage->dx * jacobianWeight / ((float)jacNumber * controlPointImage->dx), referenceImage->dy * jacobianWeight / ((float)jacNumber * controlPointImage->dy), @@ -416,7 +416,7 @@ void GetJacobianPenaltyTermGradient(const nifti_image *referenceImage, const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); ComputeApproxJacGradient3d<<>>(transGradientCuda, *jacobianDeterminantTexture, - *jacobianMatricesTexture, controlPointImageDim, + *jacobianMatricesTexture, controlPointImageDims, (unsigned)controlPointNumber, reorientation, weight); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { @@ -425,12 +425,12 @@ void GetJacobianPenaltyTermGradient(const nifti_image *referenceImage, const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); ComputeApproxJacGradient2d<<>>(transGradientCuda, *jacobianDeterminantTexture, - *jacobianMatricesTexture, controlPointImageDim, + *jacobianMatricesTexture, controlPointImageDims, (unsigned)controlPointNumber, reorientation, weight); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } } else { - const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); + const int3 referenceImageDims = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / referenceImage->dx, controlPointImage->dy / referenceImage->dy, controlPointImage->dz / referenceImage->dz); @@ -440,9 +440,9 @@ void GetJacobianPenaltyTermGradient(const nifti_image *referenceImage, const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); ComputeJacGradient3d<<>>(transGradientCuda, *jacobianDeterminantTexture, - *jacobianMatricesTexture, controlPointImageDim, + *jacobianMatricesTexture, controlPointImageDims, controlPointVoxelSpacing, (unsigned)controlPointNumber, - referenceImageDim, reorientation, weight); + referenceImageDims, reorientation, weight); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { const unsigned blocks = blockSize->ComputeJacGradient2d; @@ -450,9 +450,9 @@ void GetJacobianPenaltyTermGradient(const nifti_image *referenceImage, const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); ComputeJacGradient2d<<>>(transGradientCuda, *jacobianDeterminantTexture, - *jacobianMatricesTexture, controlPointImageDim, + *jacobianMatricesTexture, controlPointImageDims, controlPointVoxelSpacing, (unsigned)controlPointNumber, - referenceImageDim, reorientation, weight); + referenceImageDims, reorientation, weight); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } } @@ -514,7 +514,7 @@ double CorrectFolding(const nifti_image *referenceImage, const mat33 reorientation = Mat44ToMat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_ijk : &controlPointImage->qto_ijk); const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); - const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); + const int3 controlPointImageDims = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz); auto jacobianDeterminantTexture = Cuda::CreateTextureObject(jacobianDetCuda, jacNumber, cudaChannelFormatKindFloat, 1); auto jacobianMatricesTexture = Cuda::CreateTextureObject(jacobianMatricesCuda, 9 * jacNumber, cudaChannelFormatKindFloat, 1); @@ -524,11 +524,11 @@ double CorrectFolding(const nifti_image *referenceImage, const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); ApproxCorrectFolding3d<<>>(controlPointImageCuda, *jacobianDeterminantTexture, - *jacobianMatricesTexture, controlPointImageDim, + *jacobianMatricesTexture, controlPointImageDims, controlPointSpacing, (unsigned)controlPointNumber, reorientation); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { - const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); + const int3 referenceImageDims = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / referenceImage->dx, controlPointImage->dy / referenceImage->dy, controlPointImage->dz / referenceImage->dz); @@ -537,9 +537,9 @@ double CorrectFolding(const nifti_image *referenceImage, const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); CorrectFolding3d<<>>(controlPointImageCuda, *jacobianDeterminantTexture, - *jacobianMatricesTexture, controlPointImageDim, controlPointSpacing, + *jacobianMatricesTexture, controlPointImageDims, controlPointSpacing, controlPointVoxelSpacing, (unsigned)controlPointNumber, - referenceImageDim, reorientation); + referenceImageDims, reorientation); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } NR_CUDA_SAFE_CALL(cudaFree(jacobianDetCuda)); diff --git a/reg-lib/cuda/CudaLocalTransformationKernels.cu b/reg-lib/cuda/CudaLocalTransformationKernels.cu index ebb95539..c3c344be 100644 --- a/reg-lib/cuda/CudaLocalTransformationKernels.cu +++ b/reg-lib/cuda/CudaLocalTransformationKernels.cu @@ -217,49 +217,49 @@ __device__ void GetFirstDerivativeBasisValues3D(const int index, float *xBasis, /* *************************************************************** */ __device__ float4 GetSlidedValues(int x, int y, cudaTextureObject_t deformationFieldTexture, - const int3& referenceImageDim, + const int3& referenceImageDims, const mat44& affineMatrix) { int newX = x; if (x < 0) newX = 0; - else if (x >= referenceImageDim.x) - newX = referenceImageDim.x - 1; + else if (x >= referenceImageDims.x) + newX = referenceImageDims.x - 1; int newY = y; if (y < 0) newY = 0; - else if (y >= referenceImageDim.y) - newY = referenceImageDim.y - 1; + else if (y >= referenceImageDims.y) + newY = referenceImageDims.y - 1; x -= newX; y -= newY; const float4 slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1], x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1], 0.f, 0.f); - return slidedValues + tex1Dfetch(deformationFieldTexture, newY * referenceImageDim.x + newX); + return slidedValues + tex1Dfetch(deformationFieldTexture, newY * referenceImageDims.x + newX); } /* *************************************************************** */ __device__ float4 GetSlidedValues(int x, int y, int z, cudaTextureObject_t deformationFieldTexture, - const int3& referenceImageDim, + const int3& referenceImageDims, const mat44& affineMatrix) { int newX = x; if (x < 0) newX = 0; - else if (x >= referenceImageDim.x) - newX = referenceImageDim.x - 1; + else if (x >= referenceImageDims.x) + newX = referenceImageDims.x - 1; int newY = y; if (y < 0) newY = 0; - else if (y >= referenceImageDim.y) - newY = referenceImageDim.y - 1; + else if (y >= referenceImageDims.y) + newY = referenceImageDims.y - 1; int newZ = z; if (z < 0) newZ = 0; - else if (z >= referenceImageDim.z) - newZ = referenceImageDim.z - 1; + else if (z >= referenceImageDims.z) + newZ = referenceImageDims.z - 1; x -= newX; y -= newY; @@ -268,15 +268,15 @@ __device__ float4 GetSlidedValues(int x, int y, int z, x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1] + z * affineMatrix.m[1][2], x * affineMatrix.m[2][0] + y * affineMatrix.m[2][1] + z * affineMatrix.m[2][2], 0.f); - return slidedValues + tex1Dfetch(deformationFieldTexture, (newZ * referenceImageDim.y + newY) * referenceImageDim.x + newX); + return slidedValues + tex1Dfetch(deformationFieldTexture, (newZ * referenceImageDims.y + newY) * referenceImageDims.x + newX); } /* *************************************************************** */ template __device__ void GetDeformationField3d(float4 *deformationField, cudaTextureObject_t controlPointTexture, const mat44 *realToVoxel, - const int3 referenceImageDim, - const int3 controlPointImageDim, + const int3 referenceImageDims, + const int3 controlPointImageDims, const float3 controlPointVoxelSpacing, const int index) { int3 nodePre; @@ -300,14 +300,14 @@ __device__ void GetDeformationField3d(float4 *deformationField, realToVoxel->m[2][2] * node.z + realToVoxel->m[2][3]); - if (xVoxel < 0 || xVoxel >= referenceImageDim.x || - yVoxel < 0 || yVoxel >= referenceImageDim.y || - zVoxel < 0 || zVoxel >= referenceImageDim.z) return; + if (xVoxel < 0 || xVoxel >= referenceImageDims.x || + yVoxel < 0 || yVoxel >= referenceImageDims.y || + zVoxel < 0 || zVoxel >= referenceImageDims.z) return; nodePre = { Floor(xVoxel), Floor(yVoxel), Floor(zVoxel) }; basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--), zVoxel - float(nodePre.z--) }; } else { // starting deformation field is blank - !composition - const auto [x, y, z] = IndexToDims(index, referenceImageDim); + const auto [x, y, z] = IndexToDims(index, referenceImageDims); // The "nearest previous" node is determined [0,0,0] const float xVoxel = float(x) / controlPointVoxelSpacing.x; const float yVoxel = float(y) / controlPointVoxelSpacing.y; @@ -324,9 +324,9 @@ __device__ void GetDeformationField3d(float4 *deformationField, float4 displacement{}; for (char c = 0; c < 4; c++) { - int indexYZ = ((nodePre.z + c) * controlPointImageDim.y + nodePre.y) * controlPointImageDim.x; + int indexYZ = ((nodePre.z + c) * controlPointImageDims.y + nodePre.y) * controlPointImageDims.x; const float basisZ = zBasis[c]; - for (char b = 0; b < 4; b++, indexYZ += controlPointImageDim.x) { + for (char b = 0; b < 4; b++, indexYZ += controlPointImageDims.x) { int indexXYZ = indexYZ + nodePre.x; const float basisY = yBasis[b]; for (char a = 0; a < 4; a++, indexXYZ++) { @@ -345,8 +345,8 @@ template __device__ void GetDeformationField2d(float4 *deformationField, cudaTextureObject_t controlPointTexture, const mat44 *realToVoxel, - const int3 referenceImageDim, - const int3 controlPointImageDim, + const int3 referenceImageDims, + const int3 controlPointImageDims, const float3 controlPointVoxelSpacing, const int index) { int2 nodePre; @@ -364,13 +364,13 @@ __device__ void GetDeformationField2d(float4 *deformationField, realToVoxel->m[1][1] * node.y + realToVoxel->m[1][3]); - if (xVoxel < 0 || xVoxel >= referenceImageDim.x || - yVoxel < 0 || yVoxel >= referenceImageDim.y) return; + if (xVoxel < 0 || xVoxel >= referenceImageDims.x || + yVoxel < 0 || yVoxel >= referenceImageDims.y) return; nodePre = { Floor(xVoxel), Floor(yVoxel) }; basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--) }; } else { // starting deformation field is blank - !composition - const auto [x, y, z] = IndexToDims(index, referenceImageDim); + const auto [x, y, z] = IndexToDims(index, referenceImageDims); // The "nearest previous" node is determined [0,0,0] const float xVoxel = float(x) / controlPointVoxelSpacing.x; const float yVoxel = float(y) / controlPointVoxelSpacing.y; @@ -385,7 +385,7 @@ __device__ void GetDeformationField2d(float4 *deformationField, float4 displacement{}; for (char b = 0; b < 4; b++) { - int index = (nodePre.y + b) * controlPointImageDim.x + nodePre.x; + int index = (nodePre.y + b) * controlPointImageDims.x + nodePre.x; const float basis = yBasis[b]; for (char a = 0; a < 4; a++, index++) { const float4 nodeCoeff = tex1Dfetch(controlPointTexture, index); @@ -400,7 +400,7 @@ __device__ void GetDeformationField2d(float4 *deformationField, __global__ void GetApproxJacobianValues2d(float *jacobianMatrices, float *jacobianDet, cudaTextureObject_t controlPointTexture, - const int3 controlPointImageDim, + const int3 controlPointImageDims, const unsigned controlPointNumber, const mat33 reorientation) { __shared__ float xbasis[9]; @@ -412,16 +412,14 @@ __global__ void GetApproxJacobianValues2d(float *jacobianMatrices, const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < controlPointNumber) { - int quot, rem; - Divide(tid, controlPointImageDim.x, quot, rem); - const int y = quot, x = rem; + const auto [x, y, z] = IndexToDims(tid, controlPointImageDims); - if (0 < x && x < controlPointImageDim.x - 1 && 0 < y && y < controlPointImageDim.y - 1) { + if (0 < x && x < controlPointImageDims.x - 1 && 0 < y && y < controlPointImageDims.y - 1) { float2 tx{}, ty{}; unsigned index = 0; for (int b = y - 1; b < y + 2; ++b) { for (int a = x - 1; a < x + 2; ++a) { - const int indexXY = b * controlPointImageDim.x + a; + const int indexXY = b * controlPointImageDims.x + a; const float4 controlPointValues = tex1Dfetch(controlPointTexture, indexXY); tx.x += xbasis[index] * controlPointValues.x; tx.y += ybasis[index] * controlPointValues.x; @@ -461,7 +459,7 @@ __global__ void GetApproxJacobianValues2d(float *jacobianMatrices, __global__ void GetApproxJacobianValues3d(float *jacobianMatrices, float *jacobianDet, cudaTextureObject_t controlPointTexture, - const int3 controlPointImageDim, + const int3 controlPointImageDims, const unsigned controlPointNumber, const mat33 reorientation) { __shared__ float xbasis[27]; @@ -474,19 +472,15 @@ __global__ void GetApproxJacobianValues3d(float *jacobianMatrices, const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < controlPointNumber) { - int quot, rem; - Divide(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem); - const int z = quot; - Divide(rem, controlPointImageDim.x, quot, rem); - const int y = quot, x = rem; + const auto [x, y, z] = IndexToDims(tid, controlPointImageDims); - if (0 < x && x < controlPointImageDim.x - 1 && 0 < y && y < controlPointImageDim.y - 1 && 0 < z && z < controlPointImageDim.z - 1) { + if (0 < x && x < controlPointImageDims.x - 1 && 0 < y && y < controlPointImageDims.y - 1 && 0 < z && z < controlPointImageDims.z - 1) { float3 tx{}, ty{}, tz{}; unsigned index = 0; for (int c = z - 1; c < z + 2; ++c) { for (int b = y - 1; b < y + 2; ++b) { for (int a = x - 1; a < x + 2; ++a) { - const int indexXYZ = (c * controlPointImageDim.y + b) * controlPointImageDim.x + a; + const int indexXYZ = (c * controlPointImageDims.y + b) * controlPointImageDims.x + a; const float4 controlPointValues = tex1Dfetch(controlPointTexture, indexXYZ); tx.x += xbasis[index] * controlPointValues.x; tx.y += ybasis[index] * controlPointValues.x; @@ -552,16 +546,14 @@ __global__ void GetApproxJacobianValues3d(float *jacobianMatrices, __global__ void GetJacobianValues2d(float *jacobianMatrices, float *jacobianDet, cudaTextureObject_t controlPointTexture, - const int3 controlPointImageDim, + const int3 controlPointImageDims, const float3 controlPointSpacing, - const int3 referenceImageDim, + const int3 referenceImageDims, const unsigned voxelNumber, const mat33 reorientation) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < voxelNumber) { - int quot, rem; - Divide(tid, referenceImageDim.x, quot, rem); - const int y = quot, x = rem; + const auto [x, y, z] = IndexToDims(tid, referenceImageDims); // the "nearest previous" node is determined [0,0,0] const int2 nodePre = { Floor((float)x / controlPointSpacing.x), Floor((float)y / controlPointSpacing.y) }; @@ -576,7 +568,7 @@ __global__ void GetJacobianValues2d(float *jacobianMatrices, float2 tx{}, ty{}; for (int b = 0; b < 4; ++b) { - int indexXY = (nodePre.y + b) * controlPointImageDim.x + nodePre.x; + int indexXY = (nodePre.y + b) * controlPointImageDims.x + nodePre.x; float4 nodeCoefficient = tex1Dfetch(controlPointTexture, indexXY++); float2 basis = make_float2(xFirst[0] * yBasis[b], xBasis[0] * yFirst[b]); @@ -621,18 +613,14 @@ __global__ void GetJacobianValues2d(float *jacobianMatrices, __global__ void GetJacobianValues3d(float *jacobianMatrices, float *jacobianDet, cudaTextureObject_t controlPointTexture, - const int3 controlPointImageDim, + const int3 controlPointImageDims, const float3 controlPointSpacing, - const int3 referenceImageDim, + const int3 referenceImageDims, const unsigned voxelNumber, const mat33 reorientation) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < voxelNumber) { - int quot, rem; - Divide(tid, referenceImageDim.x * referenceImageDim.y, quot, rem); - const int z = quot; - Divide(rem, referenceImageDim.x, quot, rem); - const int y = quot, x = rem; + const auto [x, y, z] = IndexToDims(tid, referenceImageDims); // the "nearest previous" node is determined [0,0,0] const int3 nodePre = { @@ -659,7 +647,7 @@ __global__ void GetJacobianValues3d(float *jacobianMatrices, float3 tx{}, ty{}, tz{}; for (int c = 0; c < 4; ++c) { for (int b = 0; b < 4; ++b) { - int indexXYZ = ((nodePre.z + c) * controlPointImageDim.y + nodePre.y + b) * controlPointImageDim.x + nodePre.x; + int indexXYZ = ((nodePre.z + c) * controlPointImageDims.y + nodePre.y + b) * controlPointImageDims.x + nodePre.x; float3 basisXY{ yBasis[b] * zBasis[c], yFirst[sharedMemIndex + b] * zBasis[c], yBasis[b] * zFirst[sharedMemIndex + c] }; float4 nodeCoefficient = tex1Dfetch(controlPointTexture, indexXYZ++); @@ -764,7 +752,7 @@ __device__ void GetJacobianGradientValues3d(float *jacobianMatrix, __global__ void ComputeApproxJacGradient2d(float4 *gradient, cudaTextureObject_t jacobianDeterminantTexture, cudaTextureObject_t jacobianMatricesTexture, - const int3 controlPointImageDim, + const int3 controlPointImageDims, const unsigned controlPointNumber, const mat33 reorientation, const float3 weight) { @@ -777,17 +765,15 @@ __global__ void ComputeApproxJacGradient2d(float4 *gradient, const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < controlPointNumber) { - int quot, rem; - Divide(tid, controlPointImageDim.x, quot, rem); - const int y = quot, x = rem; + const auto [x, y, z] = IndexToDims(tid, controlPointImageDims); float2 jacobianGradient{}; unsigned index = 8; for (int pixelY = y - 1; pixelY < y + 2; ++pixelY) { - if (0 < pixelY && pixelY < controlPointImageDim.y - 1) { - int jacIndex = pixelY * controlPointImageDim.x + x - 1; + if (0 < pixelY && pixelY < controlPointImageDims.y - 1) { + int jacIndex = pixelY * controlPointImageDims.x + x - 1; for (int pixelX = (int)(x - 1); pixelX < (int)(x + 2); ++pixelX) { - if (0 < pixelX && pixelX < controlPointImageDim.x - 1) { + if (0 < pixelX && pixelX < controlPointImageDims.x - 1) { float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex); if (detJac > 0.f) { detJac = 2.f * logf(detJac) / detJac; @@ -815,7 +801,7 @@ __global__ void ComputeApproxJacGradient2d(float4 *gradient, __global__ void ComputeApproxJacGradient3d(float4 *gradient, cudaTextureObject_t jacobianDeterminantTexture, cudaTextureObject_t jacobianMatricesTexture, - const int3 controlPointImageDim, + const int3 controlPointImageDims, const unsigned controlPointNumber, const mat33 reorientation, const float3 weight) { @@ -829,21 +815,17 @@ __global__ void ComputeApproxJacGradient3d(float4 *gradient, const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < controlPointNumber) { - int quot, rem; - Divide(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem); - const int z = quot; - Divide(rem, controlPointImageDim.x, quot, rem); - const int y = quot, x = rem; + const auto [x, y, z] = IndexToDims(tid, controlPointImageDims); float3 jacobianGradient{}; unsigned index = 26; for (int pixelZ = z - 1; pixelZ < z + 2; ++pixelZ) { - if (0 < pixelZ && pixelZ < controlPointImageDim.z - 1) { + if (0 < pixelZ && pixelZ < controlPointImageDims.z - 1) { for (int pixelY = y - 1; pixelY < y + 2; ++pixelY) { - if (0 < pixelY && pixelY < controlPointImageDim.y - 1) { - int jacIndex = (pixelZ * controlPointImageDim.y + pixelY) * controlPointImageDim.x + x - 1; + if (0 < pixelY && pixelY < controlPointImageDims.y - 1) { + int jacIndex = (pixelZ * controlPointImageDims.y + pixelY) * controlPointImageDims.x + x - 1; for (int pixelX = x - 1; pixelX < x + 2; ++pixelX) { - if (0 < pixelX && pixelX < controlPointImageDim.x - 1) { + if (0 < pixelX && pixelX < controlPointImageDims.x - 1) { float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex); if (detJac > 0.f) { detJac = 2.f * logf(detJac) / detJac; @@ -879,34 +861,32 @@ __global__ void ComputeApproxJacGradient3d(float4 *gradient, __global__ void ComputeJacGradient2d(float4 *gradient, cudaTextureObject_t jacobianDeterminantTexture, cudaTextureObject_t jacobianMatricesTexture, - const int3 controlPointImageDim, + const int3 controlPointImageDims, const float3 controlPointVoxelSpacing, const unsigned controlPointNumber, - const int3 referenceImageDim, + const int3 referenceImageDims, const mat33 reorientation, const float3 weight) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < controlPointNumber) { - int quot, rem; - Divide(tid, controlPointImageDim.x, quot, rem); - const int y = quot, x = rem; + const auto [x, y, z] = IndexToDims(tid, controlPointImageDims); float2 jacobianGradient{}; for (int pixelY = Ceil((y - 3) * controlPointVoxelSpacing.y); pixelY <= Ceil((y + 1) * controlPointVoxelSpacing.y); ++pixelY) { - if (-1 < pixelY && pixelY < referenceImageDim.y) { + if (-1 < pixelY && pixelY < referenceImageDims.y) { const int yPre = (int)((float)pixelY / controlPointVoxelSpacing.y); float basis = (float)pixelY / controlPointVoxelSpacing.y - (float)yPre; float yBasis, yFirst; GetBSplineBasisValue(basis, y - yPre, &yBasis, &yFirst); for (int pixelX = Ceil((x - 3) * controlPointVoxelSpacing.x); pixelX <= Ceil((x + 1) * controlPointVoxelSpacing.x); ++pixelX) { - if (-1 < pixelX && pixelX < referenceImageDim.x && (yFirst != 0.f || yBasis != 0.f)) { + if (-1 < pixelX && pixelX < referenceImageDims.x && (yFirst != 0.f || yBasis != 0.f)) { const int xPre = (int)((float)pixelX / controlPointVoxelSpacing.x); basis = (float)pixelX / controlPointVoxelSpacing.x - (float)xPre; float xBasis, xFirst; GetBSplineBasisValue(basis, x - xPre, &xBasis, &xFirst); - int jacIndex = pixelY * referenceImageDim.x + pixelX; + int jacIndex = pixelY * referenceImageDims.x + pixelX; float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex); if (detJac > 0.f && (xFirst != 0.f || xBasis != 0.f)) { @@ -934,43 +914,39 @@ __global__ void ComputeJacGradient2d(float4 *gradient, __global__ void ComputeJacGradient3d(float4 *gradient, cudaTextureObject_t jacobianDeterminantTexture, cudaTextureObject_t jacobianMatricesTexture, - const int3 controlPointImageDim, + const int3 controlPointImageDims, const float3 controlPointVoxelSpacing, const unsigned controlPointNumber, - const int3 referenceImageDim, + const int3 referenceImageDims, const mat33 reorientation, const float3 weight) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < controlPointNumber) { - int quot, rem; - Divide(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem); - const int z = quot; - Divide(rem, controlPointImageDim.x, quot, rem); - const int y = quot, x = rem; + const auto [x, y, z] = IndexToDims(tid, controlPointImageDims); float3 jacobianGradient{}; for (int pixelZ = Ceil((z - 3) * controlPointVoxelSpacing.z); pixelZ <= Ceil((z + 1) * controlPointVoxelSpacing.z); ++pixelZ) { - if (-1 < pixelZ && pixelZ < referenceImageDim.z) { + if (-1 < pixelZ && pixelZ < referenceImageDims.z) { const int zPre = (int)((float)pixelZ / controlPointVoxelSpacing.z); float basis = (float)pixelZ / controlPointVoxelSpacing.z - (float)zPre; float zBasis, zFirst; GetBSplineBasisValue(basis, z - zPre, &zBasis, &zFirst); for (int pixelY = Ceil((y - 3) * controlPointVoxelSpacing.y); pixelY <= Ceil((y + 1) * controlPointVoxelSpacing.y); ++pixelY) { - if (-1 < pixelY && pixelY < referenceImageDim.y && (zFirst != 0.f || zBasis != 0.f)) { + if (-1 < pixelY && pixelY < referenceImageDims.y && (zFirst != 0.f || zBasis != 0.f)) { const int yPre = (int)((float)pixelY / controlPointVoxelSpacing.y); basis = (float)pixelY / controlPointVoxelSpacing.y - (float)yPre; float yBasis, yFirst; GetBSplineBasisValue(basis, y - yPre, &yBasis, &yFirst); for (int pixelX = Ceil((x - 3) * controlPointVoxelSpacing.x); pixelX <= Ceil((x + 1) * controlPointVoxelSpacing.x); ++pixelX) { - if (-1 < pixelX && pixelX < referenceImageDim.x && (yFirst != 0.f || yBasis != 0.f)) { + if (-1 < pixelX && pixelX < referenceImageDims.x && (yFirst != 0.f || yBasis != 0.f)) { const int xPre = (int)((float)pixelX / controlPointVoxelSpacing.x); basis = (float)pixelX / controlPointVoxelSpacing.x - (float)xPre; float xBasis, xFirst; GetBSplineBasisValue(basis, x - xPre, &xBasis, &xFirst); - int jacIndex = (pixelZ * referenceImageDim.y + pixelY) * referenceImageDim.x + pixelX; + int jacIndex = (pixelZ * referenceImageDims.y + pixelY) * referenceImageDims.x + pixelX; float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex); if (detJac > 0.f && (xFirst != 0.f || xBasis != 0.f)) { @@ -1011,26 +987,22 @@ __global__ void ComputeJacGradient3d(float4 *gradient, __global__ void ApproxCorrectFolding3d(float4 *controlPointGrid, cudaTextureObject_t jacobianDeterminantTexture, cudaTextureObject_t jacobianMatricesTexture, - const int3 controlPointImageDim, + const int3 controlPointImageDims, const float3 controlPointSpacing, const unsigned controlPointNumber, const mat33 reorientation) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < controlPointNumber) { - int quot, rem; - Divide(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem); - const int z = quot; - Divide(rem, controlPointImageDim.x, quot, rem); - const int y = quot, x = rem; + const auto [x, y, z] = IndexToDims(tid, controlPointImageDims); float3 foldingCorrection{}; for (int pixelZ = z - 1; pixelZ < z + 2; ++pixelZ) { - if (0 < pixelZ && pixelZ < controlPointImageDim.z - 1) { + if (0 < pixelZ && pixelZ < controlPointImageDims.z - 1) { for (int pixelY = y - 1; pixelY < y + 2; ++pixelY) { - if (0 < pixelY && pixelY < controlPointImageDim.y - 1) { + if (0 < pixelY && pixelY < controlPointImageDims.y - 1) { for (int pixelX = x - 1; pixelX < x + 2; ++pixelX) { - if (0 < pixelX && pixelX < controlPointImageDim.x - 1) { - int jacIndex = (pixelZ * controlPointImageDim.y + pixelY) * controlPointImageDim.x + pixelX; + if (0 < pixelX && pixelX < controlPointImageDims.x - 1) { + int jacIndex = (pixelZ * controlPointImageDims.y + pixelY) * controlPointImageDims.x + pixelX; float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex); if (detJac <= 0.f) { float jacobianMatrix[9]; @@ -1080,28 +1052,24 @@ __global__ void ApproxCorrectFolding3d(float4 *controlPointGrid, __global__ void CorrectFolding3d(float4 *controlPointGrid, cudaTextureObject_t jacobianDeterminantTexture, cudaTextureObject_t jacobianMatricesTexture, - const int3 controlPointImageDim, + const int3 controlPointImageDims, const float3 controlPointSpacing, const float3 controlPointVoxelSpacing, const unsigned controlPointNumber, - const int3 referenceImageDim, + const int3 referenceImageDims, const mat33 reorientation) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < controlPointNumber) { - int quot, rem; - Divide(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem); - const int z = quot; - Divide(rem, controlPointImageDim.x, quot, rem); - const int y = quot, x = rem; + const auto [x, y, z] = IndexToDims(tid, controlPointImageDims); float3 foldingCorrection{}; for (int pixelZ = Ceil((z - 3) * controlPointVoxelSpacing.z); pixelZ < Ceil((z + 1) * controlPointVoxelSpacing.z); ++pixelZ) { - if (-1 < pixelZ && pixelZ < referenceImageDim.z) { + if (-1 < pixelZ && pixelZ < referenceImageDims.z) { for (int pixelY = Ceil((y - 3) * controlPointVoxelSpacing.y); pixelY < Ceil((y + 1) * controlPointVoxelSpacing.y); ++pixelY) { - if (-1 < pixelY && pixelY < referenceImageDim.y) { + if (-1 < pixelY && pixelY < referenceImageDims.y) { for (int pixelX = Ceil((x - 3) * controlPointVoxelSpacing.x); pixelX < Ceil((x + 1) * controlPointVoxelSpacing.x); ++pixelX) { - if (-1 < pixelX && pixelX < referenceImageDim.x) { - int jacIndex = (pixelZ * referenceImageDim.y + pixelY) * referenceImageDim.x + pixelX; + if (-1 < pixelX && pixelX < referenceImageDims.x) { + int jacIndex = (pixelZ * referenceImageDims.y + pixelY) * referenceImageDims.x + pixelX; float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex); if (detJac <= 0.f) { float jacobianMatrix[9]; diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index 073906b7..03f6d253 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -57,7 +57,7 @@ double reg_getSsdValue_gpu(const nifti_image *referenceImage, const size_t activeVoxelNumber, const double *timePointWeights, const int referenceTimePoints) { - const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); + const int3 referenceImageDims = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); Cuda::UniqueTextureObjectPtr localWeightSimTexturePtr; cudaTextureObject_t localWeightSimTexture = 0; @@ -123,7 +123,7 @@ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage, const size_t activeVoxelNumber, const double timePointWeight, const int currentTimePoint) { - const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); + const int3 referenceImageDims = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); auto referenceTexturePtr = Cuda::CreateTextureObject(referenceImageCuda + currentTimePoint * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1);