Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

some attempts to tune the performance

  • Loading branch information...
commit b7553d4e2eddc5645023a94a850fe092dd9f223e 1 parent 02fb3f0
@vpisarev vpisarev authored
Showing with 187 additions and 150 deletions.
  1. +0 −1  data/haarcascades/haarcascade_eye.xml
  2. +0 −1  data/haarcascades/haarcascade_eye_tree_eyeglasses.xml
  3. +0 −1  data/haarcascades/haarcascade_frontalface_alt.xml
  4. +0 −1  data/haarcascades/haarcascade_frontalface_alt2.xml
  5. +0 −1  data/haarcascades/haarcascade_frontalface_alt_tree.xml
  6. +0 −1  data/haarcascades/haarcascade_frontalface_default.xml
  7. +0 −1  data/haarcascades/haarcascade_fullbody.xml
  8. +0 −1  data/haarcascades/haarcascade_lefteye_2splits.xml
  9. +0 −1  data/haarcascades/haarcascade_lowerbody.xml
  10. +0 −1  data/haarcascades/haarcascade_mcs_eyepair_big.xml
  11. +0 −1  data/haarcascades/haarcascade_mcs_eyepair_small.xml
  12. +0 −1  data/haarcascades/haarcascade_mcs_leftear.xml
  13. +0 −1  data/haarcascades/haarcascade_mcs_lefteye.xml
  14. +0 −1  data/haarcascades/haarcascade_mcs_mouth.xml
  15. +0 −1  data/haarcascades/haarcascade_mcs_nose.xml
  16. +0 −1  data/haarcascades/haarcascade_mcs_rightear.xml
  17. +0 −1  data/haarcascades/haarcascade_mcs_righteye.xml
  18. +0 −1  data/haarcascades/haarcascade_mcs_upperbody.xml
  19. +0 −1  data/haarcascades/haarcascade_profileface.xml
  20. +0 −1  data/haarcascades/haarcascade_righteye_2splits.xml
  21. +0 −1  data/haarcascades/haarcascade_smile.xml
  22. +0 −1  data/haarcascades/haarcascade_upperbody.xml
  23. +37 −19 modules/objdetect/src/cascadedetect.cpp
  24. +53 −43 modules/objdetect/src/cascadedetect.hpp
  25. +0 −1  modules/objdetect/src/cascadedetect_convert.cpp
  26. +97 −65 modules/objdetect/src/opencl/haarobjectdetect.cl
View
1  data/haarcascades/haarcascade_eye.xml
@@ -48,7 +48,6 @@
<height>20</height>
<width>20</width>
<stageParams>
- <maxDepth>0</maxDepth>
<maxWeakCount>93</maxWeakCount></stageParams>
<featureParams>
<maxCatCount>0</maxCatCount></featureParams>
View
1  data/haarcascades/haarcascade_eye_tree_eyeglasses.xml
@@ -48,7 +48,6 @@
<height>20</height>
<width>20</width>
<stageParams>
- <maxDepth>0</maxDepth>
<maxWeakCount>47</maxWeakCount></stageParams>
<featureParams>
<maxCatCount>0</maxCatCount></featureParams>
View
1  data/haarcascades/haarcascade_frontalface_alt.xml
@@ -48,7 +48,6 @@
<height>20</height>
<width>20</width>
<stageParams>
- <maxDepth>0</maxDepth>
<maxWeakCount>213</maxWeakCount></stageParams>
<featureParams>
<maxCatCount>0</maxCatCount></featureParams>
View
1  data/haarcascades/haarcascade_frontalface_alt2.xml
@@ -48,7 +48,6 @@
<height>20</height>
<width>20</width>
<stageParams>
- <maxDepth>0</maxDepth>
<maxWeakCount>109</maxWeakCount></stageParams>
<featureParams>
<maxCatCount>0</maxCatCount></featureParams>
View
1  data/haarcascades/haarcascade_frontalface_alt_tree.xml
@@ -49,7 +49,6 @@
<height>20</height>
<width>20</width>
<stageParams>
- <maxDepth>0</maxDepth>
<maxWeakCount>406</maxWeakCount></stageParams>
<featureParams>
<maxCatCount>0</maxCatCount></featureParams>
View
1  data/haarcascades/haarcascade_frontalface_default.xml
@@ -48,7 +48,6 @@
<height>24</height>
<width>24</width>
<stageParams>
- <maxDepth>0</maxDepth>
<maxWeakCount>211</maxWeakCount></stageParams>
<featureParams>
<maxCatCount>0</maxCatCount></featureParams>
View
1  data/haarcascades/haarcascade_fullbody.xml
@@ -142,7 +142,6 @@ Thanks to Martin Spengler, ETH Zurich, for providing the demo movie.
<height>14</height>
<width>28</width>
<stageParams>
- <maxDepth>0</maxDepth>
<maxWeakCount>107</maxWeakCount></stageParams>
<featureParams>
<maxCatCount>0</maxCatCount></featureParams>
View
1  data/haarcascades/haarcascade_lefteye_2splits.xml
@@ -49,7 +49,6 @@
<height>20</height>
<width>20</width>
<stageParams>
- <maxDepth>0</maxDepth>
<maxWeakCount>33</maxWeakCount></stageParams>
<featureParams>
<maxCatCount>0</maxCatCount></featureParams>
View
1  data/haarcascades/haarcascade_lowerbody.xml
@@ -142,7 +142,6 @@ Thanks to Martin Spengler, ETH Zurich, for providing the demo movie.
<height>19</height>
<width>23</width>
<stageParams>
- <maxDepth>0</maxDepth>
<maxWeakCount>89</maxWeakCount></stageParams>
<featureParams>
<maxCatCount>0</maxCatCount></featureParams>
View
1  data/haarcascades/haarcascade_mcs_eyepair_big.xml
@@ -88,7 +88,6 @@ mcastrillon@iusiani.ulpgc.es
<height>45</height>
<width>11</width>
<stageParams>
- <maxDepth>0</maxDepth>
<maxWeakCount>85</maxWeakCount></stageParams>
<featureParams>
<maxCatCount>0</maxCatCount></featureParams>
View
1  data/haarcascades/haarcascade_mcs_eyepair_small.xml
@@ -87,7 +87,6 @@ mcastrillon@iusiani.ulpgc.es
<height>22</height>
<width>5</width>
<stageParams>
- <maxDepth>0</maxDepth>
<maxWeakCount>133</maxWeakCount></stageParams>
<featureParams>
<maxCatCount>0</maxCatCount></featureParams>
View
1  data/haarcascades/haarcascade_mcs_leftear.xml
@@ -67,7 +67,6 @@ mcastrillon@iusiani.ulpgc.es
<height>12</height>
<width>20</width>
<stageParams>
- <maxDepth>0</maxDepth>
<maxWeakCount>65</maxWeakCount></stageParams>
<featureParams>
<maxCatCount>0</maxCatCount></featureParams>
View
1  data/haarcascades/haarcascade_mcs_lefteye.xml
@@ -87,7 +87,6 @@ mcastrillon@iusiani.ulpgc.es
<height>18</height>
<width>12</width>
<stageParams>
- <maxDepth>0</maxDepth>
<maxWeakCount>279</maxWeakCount></stageParams>
<featureParams>
<maxCatCount>0</maxCatCount></featureParams>
View
1  data/haarcascades/haarcascade_mcs_mouth.xml
@@ -87,7 +87,6 @@ mcastrillon@iusiani.ulpgc.es
<height>25</height>
<width>15</width>
<stageParams>
- <maxDepth>0</maxDepth>
<maxWeakCount>218</maxWeakCount></stageParams>
<featureParams>
<maxCatCount>0</maxCatCount></featureParams>
View
1  data/haarcascades/haarcascade_mcs_nose.xml
@@ -87,7 +87,6 @@ mcastrillon@iusiani.ulpgc.es
<height>18</height>
<width>15</width>
<stageParams>
- <maxDepth>0</maxDepth>
<maxWeakCount>377</maxWeakCount></stageParams>
<featureParams>
<maxCatCount>0</maxCatCount></featureParams>
View
1  data/haarcascades/haarcascade_mcs_rightear.xml
@@ -67,7 +67,6 @@ mcastrillon@iusiani.ulpgc.es
<height>12</height>
<width>20</width>
<stageParams>
- <maxDepth>0</maxDepth>
<maxWeakCount>61</maxWeakCount></stageParams>
<featureParams>
<maxCatCount>0</maxCatCount></featureParams>
View
1  data/haarcascades/haarcascade_mcs_righteye.xml
@@ -87,7 +87,6 @@ mcastrillon@iusiani.ulpgc.es
<height>18</height>
<width>12</width>
<stageParams>
- <maxDepth>0</maxDepth>
<maxWeakCount>415</maxWeakCount></stageParams>
<featureParams>
<maxCatCount>0</maxCatCount></featureParams>
View
1  data/haarcascades/haarcascade_mcs_upperbody.xml
@@ -85,7 +85,6 @@ mcastrillon@iusiani.ulpgc.es
<height>22</height>
<width>20</width>
<stageParams>
- <maxDepth>0</maxDepth>
<maxWeakCount>334</maxWeakCount></stageParams>
<featureParams>
<maxCatCount>0</maxCatCount></featureParams>
View
1  data/haarcascades/haarcascade_profileface.xml
@@ -48,7 +48,6 @@
<height>20</height>
<width>20</width>
<stageParams>
- <maxDepth>0</maxDepth>
<maxWeakCount>195</maxWeakCount></stageParams>
<featureParams>
<maxCatCount>0</maxCatCount></featureParams>
View
1  data/haarcascades/haarcascade_righteye_2splits.xml
@@ -49,7 +49,6 @@
<height>20</height>
<width>20</width>
<stageParams>
- <maxDepth>0</maxDepth>
<maxWeakCount>34</maxWeakCount></stageParams>
<featureParams>
<maxCatCount>0</maxCatCount></featureParams>
View
1  data/haarcascades/haarcascade_smile.xml
@@ -50,7 +50,6 @@
<height>36</height>
<width>18</width>
<stageParams>
- <maxDepth>0</maxDepth>
<maxWeakCount>53</maxWeakCount></stageParams>
<featureParams>
<maxCatCount>0</maxCatCount></featureParams>
View
1  data/haarcascades/haarcascade_upperbody.xml
@@ -142,7 +142,6 @@ Thanks to Martin Spengler, ETH Zurich, for providing the demo movie.
<height>22</height>
<width>18</width>
<stageParams>
- <maxDepth>0</maxDepth>
<maxWeakCount>152</maxWeakCount></stageParams>
<featureParams>
<maxCatCount>0</maxCatCount></featureParams>
View
56 modules/objdetect/src/cascadedetect.cpp
@@ -954,7 +954,7 @@ int CascadeClassifierImpl::runAt( Ptr<FeatureEvaluator>& evaluator, Point pt, do
if( !evaluator->setWindow(pt) )
return -1;
- if( data.isStumpBased )
+ if( data.isStumpBased() )
{
if( data.featureType == FeatureEvaluator::HAAR )
return predictOrderedStump<HaarEvaluator>( *this, evaluator, weight );
@@ -1133,6 +1133,7 @@ bool CascadeClassifierImpl::detectSingleScale( InputArray _image, Size processin
bool CascadeClassifierImpl::ocl_detectSingleScale( InputArray _image, Size processingRectSize,
int yStep, double factor, Size sumSize0 )
{
+ const int VECTOR_SIZE = 4;
Ptr<HaarEvaluator> haar = featureEvaluator.dynamicCast<HaarEvaluator>();
if( haar.empty() )
return false;
@@ -1142,7 +1143,7 @@ bool CascadeClassifierImpl::ocl_detectSingleScale( InputArray _image, Size proce
if( cascadeKernel.empty() )
{
cascadeKernel.create("runHaarClassifierStump", ocl::objdetect::haarobjectdetect_oclsrc,
- format("-D MAX_FACES=%d", MAX_FACES));
+ format("-D VECTOR_SIZE=%d", VECTOR_SIZE));
if( cascadeKernel.empty() )
return false;
}
@@ -1150,9 +1151,7 @@ bool CascadeClassifierImpl::ocl_detectSingleScale( InputArray _image, Size proce
if( ustages.empty() )
{
copyVectorToUMat(data.stages, ustages);
- copyVectorToUMat(data.classifiers, uclassifiers);
- copyVectorToUMat(data.nodes, unodes);
- copyVectorToUMat(data.leaves, uleaves);
+ copyVectorToUMat(data.stumps, ustumps);
}
std::vector<UMat> bufs;
@@ -1162,7 +1161,7 @@ bool CascadeClassifierImpl::ocl_detectSingleScale( InputArray _image, Size proce
Rect normrect = haar->getNormRect();
//processingRectSize = Size(yStep, yStep);
- size_t globalsize[] = { processingRectSize.width/yStep, processingRectSize.height/yStep };
+ size_t globalsize[] = { (processingRectSize.width/yStep + VECTOR_SIZE-1)/VECTOR_SIZE, processingRectSize.height/yStep };
cascadeKernel.args(ocl::KernelArg::ReadOnlyNoSize(bufs[0]), // sum
ocl::KernelArg::ReadOnlyNoSize(bufs[1]), // sqsum
@@ -1171,14 +1170,12 @@ bool CascadeClassifierImpl::ocl_detectSingleScale( InputArray _image, Size proce
// cascade classifier
(int)data.stages.size(),
ocl::KernelArg::PtrReadOnly(ustages),
- ocl::KernelArg::PtrReadOnly(uclassifiers),
- ocl::KernelArg::PtrReadOnly(unodes),
- ocl::KernelArg::PtrReadOnly(uleaves),
+ ocl::KernelArg::PtrReadOnly(ustumps),
ocl::KernelArg::PtrWriteOnly(ufacepos), // positions
processingRectSize,
yStep, (float)factor,
- normrect, data.origWinSize);
+ normrect, data.origWinSize, MAX_FACES);
bool ok = cascadeKernel.run(2, globalsize, 0, true);
//CV_Assert(ok);
return ok;
@@ -1243,7 +1240,7 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std::
bool use_ocl = ocl::useOpenCL() &&
getFeatureType() == FeatureEvaluator::HAAR &&
!isOldFormatCascade() &&
- data.isStumpBased &&
+ data.isStumpBased() &&
maskGenerator.empty() &&
!outputRejectLevels &&
tryOpenCL;
@@ -1345,7 +1342,6 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std::
Mat facepos = ufacepos.getMat(ACCESS_READ);
const int* fptr = facepos.ptr<int>();
int i, nfaces = fptr[0];
- printf("nfaces = %d\n", nfaces);
for( i = 0; i < nfaces; i++ )
{
candidates.push_back(Rect(fptr[i*4+1], fptr[i*4+2], fptr[i*4+3], fptr[i*4+4]));
@@ -1428,6 +1424,12 @@ void CascadeClassifierImpl::detectMultiScale( InputArray _image, std::vector<Rec
}
}
+
+CascadeClassifierImpl::Data::Data()
+{
+ stageType = featureType = ncategories = maxNodesPerTree = 0;
+}
+
bool CascadeClassifierImpl::Data::read(const FileNode &root)
{
static const float THRESHOLD_EPS = 1e-5f;
@@ -1471,9 +1473,10 @@ bool CascadeClassifierImpl::Data::read(const FileNode &root)
stages.reserve(fn.size());
classifiers.clear();
nodes.clear();
+ stumps.clear();
FileNodeIterator it = fn.begin(), it_end = fn.end();
- isStumpBased = true;
+ maxNodesPerTree = 0;
for( int si = 0; it != it_end; si++, ++it )
{
@@ -1499,9 +1502,8 @@ bool CascadeClassifierImpl::Data::read(const FileNode &root)
DTree tree;
tree.nodeCount = (int)internalNodes.size()/nodeStep;
- if( tree.nodeCount > 1 )
- isStumpBased = false;
-
+ maxNodesPerTree = std::max(maxNodesPerTree, tree.nodeCount);
+
classifiers.push_back(tree);
nodes.reserve(nodes.size() + tree.nodeCount);
@@ -1536,6 +1538,24 @@ bool CascadeClassifierImpl::Data::read(const FileNode &root)
leaves.push_back((float)*internalNodesIter);
}
}
+
+ if( isStumpBased() )
+ {
+ int nodeOfs = 0, leafOfs = 0;
+ size_t nstages = stages.size();
+ for( size_t stageIdx = 0; stageIdx < nstages; stageIdx++ )
+ {
+ const Stage& stage = stages[stageIdx];
+
+ int ntrees = stage.ntrees;
+ for( int i = 0; i < ntrees; i++, nodeOfs++, leafOfs+= 2 )
+ {
+ const DTreeNode& node = nodes[nodeOfs];
+ stumps.push_back(Stump(node.featureIdx, node.threshold,
+ leaves[leafOfs], leaves[leafOfs+1]));
+ }
+ }
+ }
return true;
}
@@ -1546,9 +1566,7 @@ bool CascadeClassifierImpl::read_(const FileNode& root)
tryOpenCL = true;
cascadeKernel = ocl::Kernel();
ustages.release();
- uclassifiers.release();
- unodes.release();
- uleaves.release();
+ ustumps.release();
if( !data.read(root) )
return false;
View
96 modules/objdetect/src/cascadedetect.hpp
@@ -48,7 +48,7 @@ class CascadeClassifierImpl : public BaseCascadeClassifier
Ptr<MaskGenerator> getMaskGenerator();
protected:
- enum { SUM_ALIGN = 16 };
+ enum { SUM_ALIGN = 64 };
bool detectSingleScale( InputArray image, Size processingRectSize,
int yStep, double factor, std::vector<Rect>& candidates,
@@ -109,14 +109,29 @@ class CascadeClassifierImpl : public BaseCascadeClassifier
int ntrees;
float threshold;
};
+
+ struct Stump
+ {
+ Stump() {};
+ Stump(int _featureIdx, float _threshold, float _left, float _right)
+ : featureIdx(_featureIdx), threshold(_threshold), left(_left), right(_right) {}
+
+ int featureIdx;
+ float threshold;
+ float left;
+ float right;
+ };
+
+ Data();
bool read(const FileNode &node);
- bool isStumpBased;
+ bool isStumpBased() const { return maxNodesPerTree == 1; }
int stageType;
int featureType;
int ncategories;
+ int maxNodesPerTree;
Size origWinSize;
std::vector<Stage> stages;
@@ -124,6 +139,7 @@ class CascadeClassifierImpl : public BaseCascadeClassifier
std::vector<DTreeNode> nodes;
std::vector<float> leaves;
std::vector<int> subsets;
+ std::vector<Stump> stumps;
};
Data data;
@@ -132,7 +148,7 @@ class CascadeClassifierImpl : public BaseCascadeClassifier
Ptr<MaskGenerator> maskGenerator;
UMat ugrayImage, uimageBuffer;
- UMat ufacepos, ustages, uclassifiers, unodes, uleaves, usubsets;
+ UMat ufacepos, ustages, ustumps, usubsets;
ocl::Kernel cascadeKernel;
bool tryOpenCL;
@@ -592,30 +608,36 @@ template<class FEval>
inline int predictOrderedStump( CascadeClassifierImpl& cascade,
Ptr<FeatureEvaluator> &_featureEvaluator, double& sum )
{
- int nodeOfs = 0, leafOfs = 0;
+ CV_Assert(!cascade.data.stumps.empty());
FEval& featureEvaluator = (FEval&)*_featureEvaluator;
- float* cascadeLeaves = &cascade.data.leaves[0];
- CascadeClassifierImpl::Data::DTreeNode* cascadeNodes = &cascade.data.nodes[0];
- CascadeClassifierImpl::Data::Stage* cascadeStages = &cascade.data.stages[0];
+ const CascadeClassifierImpl::Data::Stump* cascadeStumps = &cascade.data.stumps[0];
+ const CascadeClassifierImpl::Data::Stage* cascadeStages = &cascade.data.stages[0];
int nstages = (int)cascade.data.stages.size();
+ double tmp = 0;
+
for( int stageIdx = 0; stageIdx < nstages; stageIdx++ )
{
- CascadeClassifierImpl::Data::Stage& stage = cascadeStages[stageIdx];
- sum = 0.0;
+ const CascadeClassifierImpl::Data::Stage& stage = cascadeStages[stageIdx];
+ tmp = 0;
int ntrees = stage.ntrees;
- for( int i = 0; i < ntrees; i++, nodeOfs++, leafOfs+= 2 )
+ for( int i = 0; i < ntrees; i++ )
{
- CascadeClassifierImpl::Data::DTreeNode& node = cascadeNodes[nodeOfs];
- double value = featureEvaluator(node.featureIdx);
- sum += cascadeLeaves[ value < node.threshold ? leafOfs : leafOfs + 1 ];
+ const CascadeClassifierImpl::Data::Stump& stump = cascadeStumps[i];
+ double value = featureEvaluator(stump.featureIdx);
+ tmp += value < stump.threshold ? stump.left : stump.right;
}
- if( sum < stage.threshold )
+ if( tmp < stage.threshold )
+ {
+ sum = (double)tmp;
return -stageIdx;
+ }
+ cascadeStumps += ntrees;
}
+ sum = (double)tmp;
return 1;
}
@@ -623,56 +645,44 @@ template<class FEval>
inline int predictCategoricalStump( CascadeClassifierImpl& cascade,
Ptr<FeatureEvaluator> &_featureEvaluator, double& sum )
{
+ CV_Assert(!cascade.data.stumps.empty());
int nstages = (int)cascade.data.stages.size();
- int nodeOfs = 0, leafOfs = 0;
FEval& featureEvaluator = (FEval&)*_featureEvaluator;
size_t subsetSize = (cascade.data.ncategories + 31)/32;
- int* cascadeSubsets = &cascade.data.subsets[0];
- float* cascadeLeaves = &cascade.data.leaves[0];
- CascadeClassifierImpl::Data::DTreeNode* cascadeNodes = &cascade.data.nodes[0];
- CascadeClassifierImpl::Data::Stage* cascadeStages = &cascade.data.stages[0];
+ const int* cascadeSubsets = &cascade.data.subsets[0];
+ const CascadeClassifierImpl::Data::Stump* cascadeStumps = &cascade.data.stumps[0];
+ const CascadeClassifierImpl::Data::Stage* cascadeStages = &cascade.data.stages[0];
#ifdef HAVE_TEGRA_OPTIMIZATION
float tmp = 0; // float accumulator -- float operations are quicker
+#else
+ double tmp = 0;
#endif
for( int si = 0; si < nstages; si++ )
{
- CascadeClassifierImpl::Data::Stage& stage = cascadeStages[si];
+ const CascadeClassifierImpl::Data::Stage& stage = cascadeStages[si];
int wi, ntrees = stage.ntrees;
-#ifdef HAVE_TEGRA_OPTIMIZATION
tmp = 0;
-#else
- sum = 0;
-#endif
for( wi = 0; wi < ntrees; wi++ )
{
- CascadeClassifierImpl::Data::DTreeNode& node = cascadeNodes[nodeOfs];
- int c = featureEvaluator(node.featureIdx);
- const int* subset = &cascadeSubsets[nodeOfs*subsetSize];
-#ifdef HAVE_TEGRA_OPTIMIZATION
- tmp += cascadeLeaves[ subset[c>>5] & (1 << (c & 31)) ? leafOfs : leafOfs+1];
-#else
- sum += cascadeLeaves[ subset[c>>5] & (1 << (c & 31)) ? leafOfs : leafOfs+1];
-#endif
- nodeOfs++;
- leafOfs += 2;
+ const CascadeClassifierImpl::Data::Stump& stump = cascadeStumps[wi];
+ int c = featureEvaluator(stump.featureIdx);
+ const int* subset = &cascadeSubsets[wi*subsetSize];
+ tmp += (subset[c>>5] & (1 << (c & 31))) ? stump.left : stump.right;
}
-#ifdef HAVE_TEGRA_OPTIMIZATION
- if( tmp < stage.threshold ) {
+
+ if( tmp < stage.threshold )
+ {
sum = (double)tmp;
return -si;
}
-#else
- if( sum < stage.threshold )
- return -si;
-#endif
+
+ cascadeStumps += ntrees;
+ cascadeSubsets += ntrees*subsetSize;
}
-#ifdef HAVE_TEGRA_OPTIMIZATION
sum = (double)tmp;
-#endif
-
return 1;
}
}
View
1  modules/objdetect/src/cascadedetect_convert.cpp
@@ -209,7 +209,6 @@ static bool convert(const String& oldcascade, const String& newcascade)
<< "height" << cascadesize.width
<< "width" << cascadesize.height
<< "stageParams" << "{"
- << "maxDepth" << maxdepth
<< "maxWeakCount" << (int)maxWeakCount
<< "}"
<< "featureParams" << "{"
View
162 modules/objdetect/src/opencl/haarobjectdetect.cl
@@ -1,43 +1,5 @@
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-// Niko Li, newlife20080214@gmail.com
-// Wang Weiyan, wangweiyanster@gmail.com
-// Jia Haipeng, jiahaipeng95@gmail.com
-// Nathan, liujun@multicorewareinc.com
-// Peng Xiao, pengxiao@outlook.com
-// Erping Pang, erping@multicorewareinc.com
-// Vadim Pisarevsky, vadim.pisarevsky@itseez.com
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//
+///////////////////////////// OpenCL kernels for face detection //////////////////////////////
+////////////////////////////// see the opencv/doc/license.txt ///////////////////////////////
typedef struct __attribute__((aligned(4))) OptFeature
{
@@ -46,20 +8,14 @@ typedef struct __attribute__((aligned(4))) OptFeature
}
OptFeature;
-typedef struct __attribute__((aligned(4))) DTreeNode
+typedef struct __attribute__((aligned(4))) Stump
{
int featureIdx __attribute__((aligned (4)));
float threshold __attribute__((aligned (4))); // for ordered features only
- int left __attribute__((aligned (4)));
- int right __attribute__((aligned (4)));
+ float left __attribute__((aligned (4)));
+ float right __attribute__((aligned (4)));
}
-DTreeNode;
-
-typedef struct __attribute__((aligned (4))) DTree
-{
- int nodeCount __attribute__((aligned (4)));
-}
-DTree;
+Stump;
typedef struct __attribute__((aligned (4))) Stage
{
@@ -78,25 +34,23 @@ __kernel void runHaarClassifierStump(
int nstages,
__global const Stage* stages,
- __global const DTree* trees,
- __global const DTreeNode* nodes,
- __global const float* leaves,
+ __global const Stump* stumps,
volatile __global int* facepos,
int2 imgsize, int xyscale, float factor,
- int4 normrect, int2 windowsize)
+ int4 normrect, int2 windowsize, int maxFaces)
{
- int ix = get_global_id(0)*xyscale;
+ int ix = get_global_id(0)*xyscale*VECTOR_SIZE;
int iy = get_global_id(1)*xyscale;
sumstep /= sizeof(int);
sqsumstep /= sizeof(int);
if( ix < imgsize.x && iy < imgsize.y )
{
- int ntrees, nodeOfs = 0, leafOfs = 0;
+ int ntrees;
int stageIdx, i;
float s = 0.f;
- __global const DTreeNode* node;
+ __global const Stump* stump = stumps;
__global const OptFeature* f;
__global const int* psum = sum + mad24(iy, sumstep, ix);
@@ -107,19 +61,17 @@ __kernel void runHaarClassifierStump(
pnsum[mad24(normrect.w, sumstep, normrect.z)])*invarea;
float sqval = (sqsum[mad24(iy + normrect.y, sqsumstep, ix + normrect.x)])*invarea;
float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f));
- float4 weight;
- int4 ofs;
+ float4 weight, vsval;
+ int4 ofs, ofs0, ofs1, ofs2;
nf = nf > 0 ? nf : 1.f;
for( stageIdx = 0; stageIdx < nstages; stageIdx++ )
{
ntrees = stages[stageIdx].ntrees;
s = 0.f;
- for( i = 0; i < ntrees; i++, nodeOfs++, leafOfs += 2 )
+ for( i = 0; i < ntrees; i++, stump++ )
{
- node = nodes + nodeOfs;
- f = optfeatures + node->featureIdx;
-
+ f = optfeatures + stump->featureIdx;
weight = f->weight;
ofs = f->ofs[0];
@@ -131,7 +83,8 @@ __kernel void runHaarClassifierStump(
ofs = f->ofs[2];
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z;
}
- s += leaves[ sval < node->threshold*nf ? leafOfs : leafOfs + 1 ];
+
+ s += (sval < stump->threshold*nf) ? stump->left : stump->right;
}
if( s < stages[stageIdx].threshold )
@@ -142,7 +95,84 @@ __kernel void runHaarClassifierStump(
{
int nfaces = atomic_inc(facepos);
//printf("detected face #d!!!!\n", nfaces);
- if( nfaces < MAX_FACES )
+ if( nfaces < maxFaces )
+ {
+ volatile __global int* face = facepos + 1 + nfaces*4;
+ face[0] = convert_int_rte(ix*factor);
+ face[1] = convert_int_rte(iy*factor);
+ face[2] = convert_int_rte(windowsize.x*factor);
+ face[3] = convert_int_rte(windowsize.y*factor);
+ }
+ }
+ }
+}
+
+#if 0
+__kernel void runLBPClassifierStump(
+ __global const int* sum,
+ int sumstep, int sumoffset,
+ __global const int* sqsum,
+ int sqsumstep, int sqsumoffset,
+ __global const OptFeature* optfeatures,
+
+ int nstages,
+ __global const Stage* stages,
+ __global const Stump* stumps,
+ __global const int* bitsets,
+ int bitsetSize,
+
+ volatile __global int* facepos,
+ int2 imgsize, int xyscale, float factor,
+ int4 normrect, int2 windowsize, int maxFaces)
+{
+ int ix = get_global_id(0)*xyscale*VECTOR_SIZE;
+ int iy = get_global_id(1)*xyscale;
+ sumstep /= sizeof(int);
+ sqsumstep /= sizeof(int);
+
+ if( ix < imgsize.x && iy < imgsize.y )
+ {
+ int ntrees;
+ int stageIdx, i;
+ float s = 0.f;
+ __global const Stump* stump = stumps;
+ __global const int* bitset = bitsets;
+ __global const OptFeature* f;
+
+ __global const int* psum = sum + mad24(iy, sumstep, ix);
+ __global const int* pnsum = psum + mad24(normrect.y, sumstep, normrect.x);
+ int normarea = normrect.z * normrect.w;
+ float invarea = 1.f/normarea;
+ float sval = (pnsum[0] - pnsum[normrect.z] - pnsum[mul24(normrect.w, sumstep)] +
+ pnsum[mad24(normrect.w, sumstep, normrect.z)])*invarea;
+ float sqval = (sqsum[mad24(iy + normrect.y, sqsumstep, ix + normrect.x)])*invarea;
+ float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f));
+ float4 weight;
+ int4 ofs;
+ nf = nf > 0 ? nf : 1.f;
+
+ for( stageIdx = 0; stageIdx < nstages; stageIdx++ )
+ {
+ ntrees = stages[stageIdx].ntrees;
+ s = 0.f;
+ for( i = 0; i < ntrees; i++, stump++, bitset += bitsetSize )
+ {
+ f = optfeatures + stump->featureIdx;
+
+ weight = f->weight;
+
+ // compute LBP feature to val
+ s += (bitset[val >> 5] & (1 << (val & 31))) ? stump->left : stump->right;
+ }
+
+ if( s < stages[stageIdx].threshold )
+ break;
+ }
+
+ if( stageIdx == nstages )
+ {
+ int nfaces = atomic_inc(facepos);
+ if( nfaces < maxFaces )
{
volatile __global int* face = facepos + 1 + nfaces*4;
face[0] = convert_int_rte(ix*factor);
@@ -153,3 +183,5 @@ __kernel void runHaarClassifierStump(
}
}
}
+#endif
+

0 comments on commit b7553d4

Please sign in to comment.
Something went wrong with that request. Please try again.