diff --git a/src/cudamatrix/cu-kernels-ansi.h b/src/cudamatrix/cu-kernels-ansi.h index 878ba216407..554837049e3 100644 --- a/src/cudamatrix/cu-kernels-ansi.h +++ b/src/cudamatrix/cu-kernels-ansi.h @@ -64,6 +64,7 @@ void cudaF_apply_pow(dim3 Gr, dim3 Bl, float* mat, float power, MatrixDim d); void cudaF_apply_pow_abs(dim3 Gr, dim3 Bl, float* mat, float power, bool include_sign, MatrixDim d); void cudaF_apply_heaviside(dim3 Gr, dim3 Bl, float* mat, MatrixDim d); +void cudaF_apply_heaviside_by_row(dim3 Gr, dim3 Bl, float* mat, MatrixDim d); void cudaF_apply_floor(dim3 Gr, dim3 Bl, float* mat, float floor_val, MatrixDim d); void cudaF_copy_cols(dim3 Gr, dim3 Bl, float* dst, const float* src, @@ -330,6 +331,7 @@ void cudaD_apply_pow(dim3 Gr, dim3 Bl, double* mat, double power, MatrixDim d); void cudaD_apply_pow_abs(dim3 Gr, dim3 Bl, double* mat, double power, bool include_sign, MatrixDim d); void cudaD_apply_heaviside(dim3 Gr, dim3 Bl, double* mat, MatrixDim d); +void cudaD_apply_heaviside_by_row(dim3 Gr, dim3 Bl, double* mat, MatrixDim d); void cudaD_apply_floor(dim3 Gr, dim3 Bl, double* mat, double floor_val, MatrixDim d); void cudaD_copy_cols(dim3 Gr, dim3 Bl, double* dst, const double* src, diff --git a/src/cudamatrix/cu-kernels.cu b/src/cudamatrix/cu-kernels.cu index e06dfbe56ac..98e03b02b37 100644 --- a/src/cudamatrix/cu-kernels.cu +++ b/src/cudamatrix/cu-kernels.cu @@ -1636,11 +1636,11 @@ static void _apply_heaviside_by_row(Real* mat, MatrixDim d) { int j_tempt = blockIdx.y * blockDim.y + threadIdx.y; // row index using to control setting heavyside() in the first rows int index = i + j * d.stride; if (i < d.cols && j < d.rows) - if (j = j_ref) { + if (j = j_tempt) { mat[index] = (mat[index] > 0.0 ? 1.0 : 0.0); } else { - mat[index] = mat[index-d.stride-d.cols] + mat[index] = mat[index-d.stride-d.cols]; } } @@ -3901,6 +3901,10 @@ void cudaD_apply_heaviside(dim3 Gr, dim3 Bl, double* mat, MatrixDim d) { _apply_heaviside<<>>(mat, d); } +void cudaD_apply_heaviside_by_row(dim3 Gr, dim3 Bl, double* mat, MatrixDim d) { + _apply_heaviside_by_row<<>>(mat, d); +} + void cudaD_copy_cols(dim3 Gr, dim3 Bl, double* dst, const double* src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride) { diff --git a/src/cudamatrix/cu-kernels.h b/src/cudamatrix/cu-kernels.h index 71493ad8bd6..58432f290f7 100644 --- a/src/cudamatrix/cu-kernels.h +++ b/src/cudamatrix/cu-kernels.h @@ -742,6 +742,9 @@ inline void cuda_apply_pow_abs(dim3 Gr, dim3 Bl, double* mat, double power, inline void cuda_apply_heaviside(dim3 Gr, dim3 Bl, double* mat, MatrixDim dim) { cudaD_apply_heaviside(Gr, Bl, mat, dim); } +inline void cuda_apply_heaviside_by_row(dim3 Gr, dim3 Bl, double* mat, MatrixDim dim) { + cudaD_apply_heaviside_by_row(Gr, Bl, mat, dim); +} inline void cuda_apply_floor(dim3 Gr, dim3 Bl, double* mat, double floor_val, MatrixDim dim) { cudaD_apply_floor(Gr, Bl, mat, floor_val, dim);