Skip to content

Commit

Permalink
adding kernel heavybyrow
Browse files Browse the repository at this point in the history
  • Loading branch information
GaofengCheng committed Dec 17, 2016
1 parent c1d1ad1 commit 14662b6
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 9 deletions.
21 changes: 21 additions & 0 deletions src/cudamatrix/cu-kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1628,6 +1628,23 @@ static void _apply_heaviside(Real* mat, MatrixDim d) {
mat[index] = (mat[index] > 0.0 ? 1.0 : 0.0);
}

template<typename Real>
__global__
static void _apply_heaviside_by_row(Real* mat, MatrixDim d) {
int i = blockIdx.x * blockDim.x + threadIdx.x; // col index
int j = blockIdx.y * blockDim.y + threadIdx.y; // row index
int j_tempt = blockIdx.y * blockDim.y + threadIdx.y; // row index using to control setting heavyside() in the first rows
int index = i + j * d.stride;
if (i < d.cols && j < d.rows)
if (j = j_ref) {
mat[index] = (mat[index] > 0.0 ? 1.0 : 0.0);
}
else {
mat[index] = mat[index-d.stride-d.cols]
}
}


template<typename Real>
__global__
static void _apply_floor(Real* mat, Real floor_val, MatrixDim d) {
Expand Down Expand Up @@ -3233,6 +3250,10 @@ void cudaF_apply_heaviside(dim3 Gr, dim3 Bl, float* mat, MatrixDim d) {
_apply_heaviside<<<Gr,Bl>>>(mat, d);
}

void cudaF_apply_heaviside_by_row(dim3 Gr, dim3 Bl, float* mat, MatrixDim d) {
_apply_heaviside_by_row<<<Gr,Bl>>>(mat, d);
}

void cudaF_copy_cols(dim3 Gr, dim3 Bl, float* dst, const float* src,
const MatrixIndexT_cuda* reorder, MatrixDim dst_dim,
int src_stride) {
Expand Down
3 changes: 3 additions & 0 deletions src/cudamatrix/cu-kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,9 @@ inline void cuda_apply_pow_abs(dim3 Gr, dim3 Bl, float* mat, float power,
inline void cuda_apply_heaviside(dim3 Gr, dim3 Bl, float* mat, MatrixDim dim) {
cudaF_apply_heaviside(Gr, Bl, mat, dim);
}
inline void cuda_apply_heaviside_by_row(dim3 Gr, dim3 Bl, float* mat, MatrixDim dim) {
cudaF_apply_heaviside_by_row(Gr, Bl, mat, dim);
}
inline void cuda_apply_floor(dim3 Gr, dim3 Bl, float* mat, float floor_val,
MatrixDim dim) {
cudaF_apply_floor(Gr, Bl, mat, floor_val, dim);
Expand Down
17 changes: 17 additions & 0 deletions src/cudamatrix/cu-matrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2207,6 +2207,23 @@ void CuMatrixBase<Real>::ApplyHeaviside() {
}
}

template<typename Real>
void CuMatrixBase<Real>::ApplyHeavisideByRow() {
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
Timer tim;
dim3 dimGrid, dimBlock;
GetBlockSizesForSimpleMatrixOperation(NumRows(), NumCols(),
&dimGrid, &dimBlock);
cuda_apply_heaviside_by_row(dimGrid, dimBlock, data_, Dim());
CU_SAFE_CALL(cudaGetLastError());
CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed());
} else
#endif
{
KALDI_ERR << "no ApplyHeavisideByRow implemented without CUDA";
}
}
template<typename Real>
void CuMatrixBase<Real>::Heaviside(const CuMatrixBase<Real> &src) {
KALDI_ASSERT(SameDim(*this, src));
Expand Down
1 change: 1 addition & 0 deletions src/cudamatrix/cu-matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,7 @@ class CuMatrixBase {
/// For each element, sets x = (x > 0 ? 1.0 : 0.0).
/// See also Heaviside().
void ApplyHeaviside();
void ApplyHeavisideByRow();
void ApplyFloor(Real floor_val);
void ApplyCeiling(Real ceiling_val);
void ApplyExp();
Expand Down
11 changes: 2 additions & 9 deletions src/nnet3/nnet-simple-component.cc
Original file line number Diff line number Diff line change
Expand Up @@ -146,15 +146,8 @@ void DropoutComponent::Propagate(const ComponentPrecomputedIndexes *indexes,
// to use multi-threaded code with the GPU.
const_cast<CuRand<BaseFloat>&>(random_generator_).RandUniform(out);
out->Add(-dropout); // now, a proportion "dropout" will be <0.0
out->ApplyHeaviside(); // apply the function (x>0?1:0). Now, a proportion "dropout" will
// be zero and (1 - dropout) will be 1.0.
CuVector<BaseFloat> *random_drop_vector = new CuVector<BaseFloat>(in.NumRows(), kSetZero);
MatrixIndexT i = 0;
random_drop_vector->CopyColFromMat(*out, i);
for (MatrixIndexT i = 0; i < in.NumCols(); i++)
{
out->CopyColFromVec(*random_drop_vector, i);
}
out->ApplyHeavisideByRow(); // apply the function (x>0?1:0). Now, a proportion "dropout" will
// be zero and (1 - dropout) will be 1.0 by row.
out->MulElements(in);
}
}
Expand Down

0 comments on commit 14662b6

Please sign in to comment.