Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

Made changes to allow ml module to work with big data. #395

Merged
merged 3 commits into from

6 participants

Leonid Beynenson Vadim Pisarevsky beenking Andrey Kamaev samuel OpenCV Pushbot
Leonid Beynenson
Collaborator

No description provided.

Vadim Pisarevsky vpisarev was assigned
Vadim Pisarevsky
Owner

Leonid, could you please correct the warnings: http://pullrequest.opencv.org

Andrey Kamaev taka-no-me commented on the diff
modules/ml/include/opencv2/ml/ml.hpp
@@ -796,7 +796,7 @@ struct CV_EXPORTS CvDTreeTrainData
const CvMat* responses;
CvMat* responses_copy; // used in Boosting
- int buf_count, buf_size;
+ int buf_count, buf_size; // buf_size is obsolete, please do not use it, use expression ((int64)buf->rows * (int64)buf->cols / buf_count) instead

Can this field be completely removed in master?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Vadim Pisarevsky
Owner

yes, removing buf_size is probably a good idea.

Vadim Pisarevsky
Owner

:+1:

OpenCV Pushbot opencv-pushbot merged commit 87b0126 into from
samuel

I use 20000 faces positive sample to train face detector. If I use decision tree with depth 2 as the weaker classifier, it will at least 100 weaker classifiers to achieve 0.9 TP and 0.3FP . Is that right. I don't know if it's OK to post question here.

beenking

orz

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Jan 31, 2013
  1. Leonid Beynenson
Commits on Feb 1, 2013
  1. Leonid Beynenson

    Changed types of some variables from int64 back to int.

    LeonidBeynenson authored
    Also corrected some indexes to be size_t.
  2. Leonid Beynenson
This page is out of date. Refresh to see the latest.
71 apps/traincascade/boost.cpp
View
@@ -360,7 +360,7 @@ CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_id
if (is_buf_16u)
{
- unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols +
+ unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
vi*sample_count + data_root->offset);
for( int i = 0; i < num_valid; i++ )
{
@@ -373,7 +373,7 @@ CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_id
}
else
{
- int* idst_idx = buf->data.i + root->buf_idx*buf->cols +
+ int* idst_idx = buf->data.i + root->buf_idx*get_length_subbuf() +
vi*sample_count + root->offset;
for( int i = 0; i < num_valid; i++ )
{
@@ -390,14 +390,14 @@ CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_id
const int* src_lbls = get_cv_labels(data_root, (int*)(uchar*)inn_buf);
if (is_buf_16u)
{
- unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols +
+ unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
(workVarCount-1)*sample_count + root->offset);
for( int i = 0; i < count; i++ )
udst[i] = (unsigned short)src_lbls[sidx[i]];
}
else
{
- int* idst = buf->data.i + root->buf_idx*buf->cols +
+ int* idst = buf->data.i + root->buf_idx*get_length_subbuf() +
(workVarCount-1)*sample_count + root->offset;
for( int i = 0; i < count; i++ )
idst[i] = src_lbls[sidx[i]];
@@ -407,14 +407,14 @@ CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_id
const int* sample_idx_src = get_sample_indices(data_root, (int*)(uchar*)inn_buf);
if (is_buf_16u)
{
- unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols +
+ unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
workVarCount*sample_count + root->offset);
for( int i = 0; i < count; i++ )
sample_idx_dst[i] = (unsigned short)sample_idx_src[sidx[i]];
}
else
{
- int* sample_idx_dst = buf->data.i + root->buf_idx*buf->cols +
+ int* sample_idx_dst = buf->data.i + root->buf_idx*get_length_subbuf() +
workVarCount*sample_count + root->offset;
for( int i = 0; i < count; i++ )
sample_idx_dst[i] = sample_idx_src[sidx[i]];
@@ -489,6 +489,10 @@ void CvCascadeBoostTrainData::setData( const CvFeatureEvaluator* _featureEvaluat
int* idst = 0;
unsigned short* udst = 0;
+ uint64 effective_buf_size = 0;
+ int effective_buf_height = 0, effective_buf_width = 0;
+
+
clear();
shared = true;
have_labels = true;
@@ -548,13 +552,28 @@ void CvCascadeBoostTrainData::setData( const CvFeatureEvaluator* _featureEvaluat
var_type->data.i[var_count] = cat_var_count;
var_type->data.i[var_count+1] = cat_var_count+1;
work_var_count = ( cat_var_count ? 0 : numPrecalcIdx ) + 1/*cv_lables*/;
- buf_size = (work_var_count + 1) * sample_count/*sample_indices*/;
buf_count = 2;
+ buf_size = -1; // the member buf_size is obsolete
+
+ effective_buf_size = (uint64)(work_var_count + 1)*(uint64)sample_count * buf_count; // this is the total size of "CvMat buf" to be allocated
+ effective_buf_width = sample_count;
+ effective_buf_height = work_var_count+1;
+
+ if (effective_buf_width >= effective_buf_height)
+ effective_buf_height *= buf_count;
+ else
+ effective_buf_width *= buf_count;
+
+ if ((uint64)effective_buf_width * (uint64)effective_buf_height != effective_buf_size)
+ {
+ CV_Error(CV_StsBadArg, "The memory buffer cannot be allocated since its size exceeds integer fields limit");
+ }
+
if ( is_buf_16u )
- buf = cvCreateMat( buf_count, buf_size, CV_16UC1 );
+ buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_16UC1 );
else
- buf = cvCreateMat( buf_count, buf_size, CV_32SC1 );
+ buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_32SC1 );
cat_count = cvCreateMat( 1, cat_var_count + 1, CV_32SC1 );
@@ -609,7 +628,7 @@ void CvCascadeBoostTrainData::setData( const CvFeatureEvaluator* _featureEvaluat
priors_mult = cvCloneMat( priors );
counts = cvCreateMat( 1, get_num_classes(), CV_32SC1 );
direction = cvCreateMat( 1, sample_count, CV_8UC1 );
- split_buf = cvCreateMat( 1, sample_count, CV_32SC1 );
+ split_buf = cvCreateMat( 1, sample_count, CV_32SC1 );//TODO: make a pointer
}
void CvCascadeBoostTrainData::free_train_data()
@@ -652,10 +671,10 @@ void CvCascadeBoostTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* o
if ( vi < numPrecalcIdx )
{
if( !is_buf_16u )
- *sortedIndices = buf->data.i + n->buf_idx*buf->cols + vi*sample_count + n->offset;
+ *sortedIndices = buf->data.i + n->buf_idx*get_length_subbuf() + vi*sample_count + n->offset;
else
{
- const unsigned short* shortIndices = (const unsigned short*)(buf->data.s + n->buf_idx*buf->cols +
+ const unsigned short* shortIndices = (const unsigned short*)(buf->data.s + n->buf_idx*get_length_subbuf() +
vi*sample_count + n->offset );
for( int i = 0; i < nodeSampleCount; i++ )
sortedIndicesBuf[i] = shortIndices[i];
@@ -1027,6 +1046,7 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
int newBufIdx = data->get_child_buf_idx( node );
int workVarCount = data->get_work_var_count();
CvMat* buf = data->buf;
+ size_t length_buf_row = data->get_length_subbuf();
cv::AutoBuffer<uchar> inn_buf(n*(3*sizeof(int)+sizeof(float)));
int* tempBuf = (int*)(uchar*)inn_buf;
bool splitInputData;
@@ -1070,7 +1090,7 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
if (data->is_buf_16u)
{
ushort *ldst, *rdst;
- ldst = (ushort*)(buf->data.s + left->buf_idx*buf->cols +
+ ldst = (ushort*)(buf->data.s + left->buf_idx*length_buf_row +
vi*scount + left->offset);
rdst = (ushort*)(ldst + nl);
@@ -1096,9 +1116,9 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
else
{
int *ldst, *rdst;
- ldst = buf->data.i + left->buf_idx*buf->cols +
+ ldst = buf->data.i + left->buf_idx*length_buf_row +
vi*scount + left->offset;
- rdst = buf->data.i + right->buf_idx*buf->cols +
+ rdst = buf->data.i + right->buf_idx*length_buf_row +
vi*scount + right->offset;
// split sorted
@@ -1131,9 +1151,9 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
if (data->is_buf_16u)
{
- unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*buf->cols +
+ unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*length_buf_row +
(workVarCount-1)*scount + left->offset);
- unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*buf->cols +
+ unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*length_buf_row +
(workVarCount-1)*scount + right->offset);
for( int i = 0; i < n; i++ )
@@ -1154,9 +1174,9 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
}
else
{
- int *ldst = buf->data.i + left->buf_idx*buf->cols +
+ int *ldst = buf->data.i + left->buf_idx*length_buf_row +
(workVarCount-1)*scount + left->offset;
- int *rdst = buf->data.i + right->buf_idx*buf->cols +
+ int *rdst = buf->data.i + right->buf_idx*length_buf_row +
(workVarCount-1)*scount + right->offset;
for( int i = 0; i < n; i++ )
@@ -1184,9 +1204,9 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
if (data->is_buf_16u)
{
- unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*buf->cols +
+ unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*length_buf_row +
workVarCount*scount + left->offset);
- unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*buf->cols +
+ unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*length_buf_row +
workVarCount*scount + right->offset);
for (int i = 0; i < n; i++)
{
@@ -1205,9 +1225,9 @@ void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
}
else
{
- int* ldst = buf->data.i + left->buf_idx*buf->cols +
+ int* ldst = buf->data.i + left->buf_idx*length_buf_row +
workVarCount*scount + left->offset;
- int* rdst = buf->data.i + right->buf_idx*buf->cols +
+ int* rdst = buf->data.i + right->buf_idx*length_buf_row +
workVarCount*scount + right->offset;
for (int i = 0; i < n; i++)
{
@@ -1352,6 +1372,7 @@ void CvCascadeBoost::update_weights( CvBoostTree* tree )
sampleIdx = data->get_sample_indices( data->data_root, sampleIdxBuf );
}
CvMat* buf = data->buf;
+ size_t length_buf_row = data->get_length_subbuf();
if( !tree ) // before training the first tree, initialize weights and other parameters
{
int* classLabelsBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(classLabelsBuf + n);
@@ -1375,7 +1396,7 @@ void CvCascadeBoost::update_weights( CvBoostTree* tree )
if (data->is_buf_16u)
{
- unsigned short* labels = (unsigned short*)(buf->data.s + data->data_root->buf_idx*buf->cols +
+ unsigned short* labels = (unsigned short*)(buf->data.s + data->data_root->buf_idx*length_buf_row +
data->data_root->offset + (data->work_var_count-1)*data->sample_count);
for( int i = 0; i < n; i++ )
{
@@ -1393,7 +1414,7 @@ void CvCascadeBoost::update_weights( CvBoostTree* tree )
}
else
{
- int* labels = buf->data.i + data->data_root->buf_idx*buf->cols +
+ int* labels = buf->data.i + data->data_root->buf_idx*length_buf_row +
data->data_root->offset + (data->work_var_count-1)*data->sample_count;
for( int i = 0; i < n; i++ )
8 modules/ml/include/opencv2/ml/ml.hpp
View
@@ -796,7 +796,7 @@ struct CV_EXPORTS CvDTreeTrainData
const CvMat* responses;
CvMat* responses_copy; // used in Boosting
- int buf_count, buf_size;
+ int buf_count, buf_size; // buf_size is obsolete, please do not use it, use expression ((int64)buf->rows * (int64)buf->cols / buf_count) instead

Can this field be completely removed in master?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
bool shared;
int is_buf_16u;
@@ -806,6 +806,12 @@ struct CV_EXPORTS CvDTreeTrainData
CvMat* counts;
CvMat* buf;
+ inline size_t get_length_subbuf() const
+ {
+ size_t res = (size_t)(work_var_count + 1) * (size_t)sample_count;
+ return res;
+ }
+
CvMat* direction;
CvMat* split_buf;
14 modules/ml/src/boost.cpp
View
@@ -1130,13 +1130,13 @@ CvBoost::update_weights( CvBoostTree* tree )
int *sample_idx_buf;
const int* sample_idx = 0;
cv::AutoBuffer<uchar> inn_buf;
- size_t _buf_size = (params.boost_type == LOGIT) || (params.boost_type == GENTLE) ? data->sample_count*sizeof(int) : 0;
+ size_t _buf_size = (params.boost_type == LOGIT) || (params.boost_type == GENTLE) ? (size_t)(data->sample_count)*sizeof(int) : 0;
if( !tree )
_buf_size += n*sizeof(int);
else
{
if( have_subsample )
- _buf_size += data->buf->cols*(sizeof(float)+sizeof(uchar));
+ _buf_size += data->get_length_subbuf()*(sizeof(float)+sizeof(uchar));
}
inn_buf.allocate(_buf_size);
uchar* cur_buf_pos = (uchar*)inn_buf;
@@ -1151,6 +1151,7 @@ CvBoost::update_weights( CvBoostTree* tree )
sample_idx = data->get_sample_indices( data->data_root, sample_idx_buf );
}
CvMat* dtree_data_buf = data->buf;
+ size_t length_buf_row = data->get_length_subbuf();
if( !tree ) // before training the first tree, initialize weights and other parameters
{
int* class_labels_buf = (int*)cur_buf_pos;
@@ -1189,7 +1190,7 @@ CvBoost::update_weights( CvBoostTree* tree )
if (data->is_buf_16u)
{
- unsigned short* labels = (unsigned short*)(dtree_data_buf->data.s + data->data_root->buf_idx*dtree_data_buf->cols +
+ unsigned short* labels = (unsigned short*)(dtree_data_buf->data.s + data->data_root->buf_idx*length_buf_row +
data->data_root->offset + (data->work_var_count-1)*data->sample_count);
for( i = 0; i < n; i++ )
{
@@ -1207,7 +1208,7 @@ CvBoost::update_weights( CvBoostTree* tree )
}
else
{
- int* labels = dtree_data_buf->data.i + data->data_root->buf_idx*dtree_data_buf->cols +
+ int* labels = dtree_data_buf->data.i + data->data_root->buf_idx*length_buf_row +
data->data_root->offset + (data->work_var_count-1)*data->sample_count;
for( i = 0; i < n; i++ )
@@ -1254,9 +1255,10 @@ CvBoost::update_weights( CvBoostTree* tree )
if( have_subsample )
{
float* values = (float*)cur_buf_pos;
- cur_buf_pos = (uchar*)(values + data->buf->cols);
+ cur_buf_pos = (uchar*)(values + data->get_length_subbuf());
uchar* missing = cur_buf_pos;
- cur_buf_pos = missing + data->buf->step;
+ cur_buf_pos = missing + data->get_length_subbuf() * (size_t)CV_ELEM_SIZE(data->buf->type);
+
CvMat _sample, _mask;
// invert the subsample mask
58 modules/ml/src/ertrees.cpp
View
@@ -80,6 +80,9 @@ void CvERTreeTrainData::set_data( const CvMat* _train_data, int _tflag,
char err[100];
const int *sidx = 0, *vidx = 0;
+ uint64 effective_buf_size = 0;
+ int effective_buf_height = 0, effective_buf_width = 0;
+
if ( _params.use_surrogates )
CV_ERROR(CV_StsBadArg, "CvERTrees do not support surrogate splits");
@@ -179,18 +182,34 @@ void CvERTreeTrainData::set_data( const CvMat* _train_data, int _tflag,
have_labels = cv_n > 0 || (ord_var_count == 1 && cat_var_count == 0) || _add_labels;
work_var_count = cat_var_count + (is_classifier ? 1 : 0) + (have_labels ? 1 : 0);
- buf_size = (work_var_count + 1)*sample_count;
+
shared = _shared;
buf_count = shared ? 2 : 1;
+ buf_size = -1; // the member buf_size is obsolete
+
+ effective_buf_size = (uint64)(work_var_count + 1)*(uint64)sample_count * buf_count; // this is the total size of "CvMat buf" to be allocated
+ effective_buf_width = sample_count;
+ effective_buf_height = work_var_count+1;
+
+ if (effective_buf_width >= effective_buf_height)
+ effective_buf_height *= buf_count;
+ else
+ effective_buf_width *= buf_count;
+
+ if ((uint64)effective_buf_width * (uint64)effective_buf_height != effective_buf_size)
+ {
+ CV_Error(CV_StsBadArg, "The memory buffer cannot be allocated since its size exceeds integer fields limit");
+ }
+
if ( is_buf_16u )
{
- CV_CALL( buf = cvCreateMat( buf_count, buf_size, CV_16UC1 ));
+ CV_CALL( buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_16UC1 ));
CV_CALL( pair16u32s_ptr = (CvPair16u32s*)cvAlloc( sample_count*sizeof(pair16u32s_ptr[0]) ));
}
else
{
- CV_CALL( buf = cvCreateMat( buf_count, buf_size, CV_32SC1 ));
+ CV_CALL( buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_32SC1 ));
CV_CALL( int_ptr = (int**)cvAlloc( sample_count*sizeof(int_ptr[0]) ));
}
@@ -293,13 +312,13 @@ void CvERTreeTrainData::set_data( const CvMat* _train_data, int _tflag,
for( i = 0; i < sample_count; i++ )
{
int val = INT_MAX, si = sidx ? sidx[i] : i;
- if( !mask || !mask[si*m_step] )
+ if( !mask || !mask[(size_t)si*m_step] )
{
if( idata )
- val = idata[si*step];
+ val = idata[(size_t)si*step];
else
{
- float t = fdata[si*step];
+ float t = fdata[(size_t)si*step];
val = cvRound(t);
if( val != t )
{
@@ -405,12 +424,12 @@ void CvERTreeTrainData::set_data( const CvMat* _train_data, int _tflag,
{
float val = ord_nan;
int si = sidx ? sidx[i] : i;
- if( !mask || !mask[si*m_step] )
+ if( !mask || !mask[(size_t)si*m_step] )
{
if( idata )
- val = (float)idata[si*step];
+ val = (float)idata[(size_t)si*step];
else
- val = fdata[si*step];
+ val = fdata[(size_t)si*step];
if( fabs(val) >= ord_nan )
{
@@ -578,9 +597,9 @@ const int* CvERTreeTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* cat
int ci = get_var_type( vi);
const int* cat_values = 0;
if( !is_buf_16u )
- cat_values = buf->data.i + n->buf_idx*buf->cols + ci*sample_count + n->offset;
+ cat_values = buf->data.i + n->buf_idx*get_length_subbuf() + ci*sample_count + n->offset;
else {
- const unsigned short* short_values = (const unsigned short*)(buf->data.s + n->buf_idx*buf->cols +
+ const unsigned short* short_values = (const unsigned short*)(buf->data.s + n->buf_idx*get_length_subbuf() +
ci*sample_count + n->offset);
for( int i = 0; i < n->sample_count; i++ )
cat_values_buf[i] = short_values[i];
@@ -1333,6 +1352,7 @@ void CvForestERTree::split_node_data( CvDTreeNode* node )
CvDTreeNode *left = 0, *right = 0;
int new_buf_idx = data->get_child_buf_idx( node );
CvMat* buf = data->buf;
+ size_t length_buf_row = data->get_length_subbuf();
cv::AutoBuffer<int> temp_buf(n);
complete_node_dir(node);
@@ -1385,9 +1405,9 @@ void CvForestERTree::split_node_data( CvDTreeNode* node )
if (data->is_buf_16u)
{
- unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*buf->cols +
+ unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*length_buf_row +
ci*scount + left->offset);
- unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*buf->cols +
+ unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*length_buf_row +
ci*scount + right->offset);
for( i = 0; i < n; i++ )
@@ -1415,9 +1435,9 @@ void CvForestERTree::split_node_data( CvDTreeNode* node )
}
else
{
- int *ldst = buf->data.i + left->buf_idx*buf->cols +
+ int *ldst = buf->data.i + left->buf_idx*length_buf_row +
ci*scount + left->offset;
- int *rdst = buf->data.i + right->buf_idx*buf->cols +
+ int *rdst = buf->data.i + right->buf_idx*length_buf_row +
ci*scount + right->offset;
for( i = 0; i < n; i++ )
@@ -1460,9 +1480,9 @@ void CvForestERTree::split_node_data( CvDTreeNode* node )
if (data->is_buf_16u)
{
- unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*buf->cols +
+ unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*length_buf_row +
pos*scount + left->offset);
- unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*buf->cols +
+ unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*length_buf_row +
pos*scount + right->offset);
for (i = 0; i < n; i++)
@@ -1483,9 +1503,9 @@ void CvForestERTree::split_node_data( CvDTreeNode* node )
}
else
{
- int* ldst = buf->data.i + left->buf_idx*buf->cols +
+ int* ldst = buf->data.i + left->buf_idx*length_buf_row +
pos*scount + left->offset;
- int* rdst = buf->data.i + right->buf_idx*buf->cols +
+ int* rdst = buf->data.i + right->buf_idx*length_buf_row +
pos*scount + right->offset;
for (i = 0; i < n; i++)
{
99 modules/ml/src/tree.cpp
View
@@ -50,7 +50,8 @@ static const int block_size_delta = 1 << 10;
CvDTreeTrainData::CvDTreeTrainData()
{
var_idx = var_type = cat_count = cat_ofs = cat_map =
- priors = priors_mult = counts = buf = direction = split_buf = responses_copy = 0;
+ priors = priors_mult = counts = direction = split_buf = responses_copy = 0;
+ buf = 0;
tree_storage = temp_storage = 0;
clear();
@@ -64,7 +65,8 @@ CvDTreeTrainData::CvDTreeTrainData( const CvMat* _train_data, int _tflag,
bool _shared, bool _add_labels )
{
var_idx = var_type = cat_count = cat_ofs = cat_map =
- priors = priors_mult = counts = buf = direction = split_buf = responses_copy = 0;
+ priors = priors_mult = counts = direction = split_buf = responses_copy = 0;
+ buf = 0;
tree_storage = temp_storage = 0;
@@ -157,6 +159,9 @@ void CvDTreeTrainData::set_data( const CvMat* _train_data, int _tflag,
char err[100];
const int *sidx = 0, *vidx = 0;
+ uint64 effective_buf_size = 0;
+ int effective_buf_height = 0, effective_buf_width = 0;
+
if( _update_data && data_root )
{
data = new CvDTreeTrainData( _train_data, _tflag, _responses, _var_idx,
@@ -285,18 +290,35 @@ void CvDTreeTrainData::set_data( const CvMat* _train_data, int _tflag,
work_var_count = var_count + (is_classifier ? 1 : 0) // for responses class_labels
+ (have_labels ? 1 : 0); // for cv_labels
- buf_size = (work_var_count + 1 /*for sample_indices*/) * sample_count;
shared = _shared;
buf_count = shared ? 2 : 1;
+ buf_size = -1; // the member buf_size is obsolete
+
+ effective_buf_size = (uint64)(work_var_count + 1)*(uint64)sample_count * buf_count; // this is the total size of "CvMat buf" to be allocated
+ effective_buf_width = sample_count;
+ effective_buf_height = work_var_count+1;
+
+ if (effective_buf_width >= effective_buf_height)
+ effective_buf_height *= buf_count;
+ else
+ effective_buf_width *= buf_count;
+
+ if ((uint64)effective_buf_width * (uint64)effective_buf_height != effective_buf_size)
+ {
+ CV_Error(CV_StsBadArg, "The memory buffer cannot be allocated since its size exceeds integer fields limit");
+ }
+
+
+
if ( is_buf_16u )
{
- CV_CALL( buf = cvCreateMat( buf_count, buf_size, CV_16UC1 ));
+ CV_CALL( buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_16UC1 ));
CV_CALL( pair16u32s_ptr = (CvPair16u32s*)cvAlloc( sample_count*sizeof(pair16u32s_ptr[0]) ));
}
else
{
- CV_CALL( buf = cvCreateMat( buf_count, buf_size, CV_32SC1 ));
+ CV_CALL( buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_32SC1 ));
CV_CALL( int_ptr = (int**)cvAlloc( sample_count*sizeof(int_ptr[0]) ));
}
@@ -356,7 +378,7 @@ void CvDTreeTrainData::set_data( const CvMat* _train_data, int _tflag,
{
int ci;
const uchar* mask = 0;
- int m_step = 0, step;
+ int64 m_step = 0, step;
const int* idata = 0;
const float* fdata = 0;
int num_valid = 0;
@@ -399,13 +421,13 @@ void CvDTreeTrainData::set_data( const CvMat* _train_data, int _tflag,
for( i = 0; i < sample_count; i++ )
{
int val = INT_MAX, si = sidx ? sidx[i] : i;
- if( !mask || !mask[si*m_step] )
+ if( !mask || !mask[(size_t)si*m_step] )
{
if( idata )
- val = idata[si*step];
+ val = idata[(size_t)si*step];
else
{
- float t = fdata[si*step];
+ float t = fdata[(size_t)si*step];
val = cvRound(t);
if( fabs(t - val) > FLT_EPSILON )
{
@@ -515,12 +537,12 @@ void CvDTreeTrainData::set_data( const CvMat* _train_data, int _tflag,
{
float val = ord_nan;
int si = sidx ? sidx[i] : i;
- if( !mask || !mask[si*m_step] )
+ if( !mask || !mask[(size_t)si*m_step] )
{
if( idata )
- val = (float)idata[si*step];
+ val = (float)idata[(size_t)si*step];
else
- val = fdata[si*step];
+ val = fdata[(size_t)si*step];
if( fabs(val) >= ord_nan )
{
@@ -532,7 +554,7 @@ void CvDTreeTrainData::set_data( const CvMat* _train_data, int _tflag,
}
if (is_buf_16u)
- udst[i] = (unsigned short)i;
+ udst[i] = (unsigned short)i; // TODO: memory corruption may be here
else
idst[i] = i;
_fdst[i] = val;
@@ -751,7 +773,7 @@ CvDTreeNode* CvDTreeTrainData::subsample_data( const CvMat* _subsample_idx )
if (is_buf_16u)
{
- unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols +
+ unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
vi*sample_count + root->offset);
for( i = 0; i < count; i++ )
{
@@ -762,7 +784,7 @@ CvDTreeNode* CvDTreeTrainData::subsample_data( const CvMat* _subsample_idx )
}
else
{
- int* idst = buf->data.i + root->buf_idx*buf->cols +
+ int* idst = buf->data.i + root->buf_idx*get_length_subbuf() +
vi*sample_count + root->offset;
for( i = 0; i < count; i++ )
{
@@ -788,7 +810,7 @@ CvDTreeNode* CvDTreeTrainData::subsample_data( const CvMat* _subsample_idx )
if (is_buf_16u)
{
- unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols +
+ unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
vi*sample_count + data_root->offset);
for( i = 0; i < num_valid; i++ )
{
@@ -812,7 +834,7 @@ CvDTreeNode* CvDTreeTrainData::subsample_data( const CvMat* _subsample_idx )
}
else
{
- int* idst_idx = buf->data.i + root->buf_idx*buf->cols +
+ int* idst_idx = buf->data.i + root->buf_idx*get_length_subbuf() +
vi*sample_count + root->offset;
for( i = 0; i < num_valid; i++ )
{
@@ -840,14 +862,14 @@ CvDTreeNode* CvDTreeTrainData::subsample_data( const CvMat* _subsample_idx )
const int* sample_idx_src = get_sample_indices(data_root, (int*)(uchar*)inn_buf);
if (is_buf_16u)
{
- unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols +
+ unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
workVarCount*sample_count + root->offset);
for (i = 0; i < count; i++)
sample_idx_dst[i] = (unsigned short)sample_idx_src[sidx[i]];
}
else
{
- int* sample_idx_dst = buf->data.i + root->buf_idx*buf->cols +
+ int* sample_idx_dst = buf->data.i + root->buf_idx*get_length_subbuf() +
workVarCount*sample_count + root->offset;
for (i = 0; i < count; i++)
sample_idx_dst[i] = sample_idx_src[sidx[i]];
@@ -1158,10 +1180,10 @@ void CvDTreeTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ord_valu
const int* sample_indices = get_sample_indices(n, sample_indices_buf);
if( !is_buf_16u )
- *sorted_indices = buf->data.i + n->buf_idx*buf->cols +
+ *sorted_indices = buf->data.i + n->buf_idx*get_length_subbuf() +
vi*sample_count + n->offset;
else {
- const unsigned short* short_indices = (const unsigned short*)(buf->data.s + n->buf_idx*buf->cols +
+ const unsigned short* short_indices = (const unsigned short*)(buf->data.s + n->buf_idx*get_length_subbuf() +
vi*sample_count + n->offset );
for( int i = 0; i < node_sample_count; i++ )
sorted_indices_buf[i] = short_indices[i];
@@ -1232,10 +1254,10 @@ const int* CvDTreeTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* cat_
{
const int* cat_values = 0;
if( !is_buf_16u )
- cat_values = buf->data.i + n->buf_idx*buf->cols +
+ cat_values = buf->data.i + n->buf_idx*get_length_subbuf() +
vi*sample_count + n->offset;
else {
- const unsigned short* short_values = (const unsigned short*)(buf->data.s + n->buf_idx*buf->cols +
+ const unsigned short* short_values = (const unsigned short*)(buf->data.s + n->buf_idx*get_length_subbuf() +
vi*sample_count + n->offset);
for( int i = 0; i < n->sample_count; i++ )
cat_values_buf[i] = short_values[i];
@@ -3004,6 +3026,7 @@ void CvDTree::split_node_data( CvDTreeNode* node )
int new_buf_idx = data->get_child_buf_idx( node );
int work_var_count = data->get_work_var_count();
CvMat* buf = data->buf;
+ size_t length_buf_row = data->get_length_subbuf();
cv::AutoBuffer<uchar> inn_buf(n*(3*sizeof(int) + sizeof(float)));
int* temp_buf = (int*)(uchar*)inn_buf;
@@ -3049,7 +3072,7 @@ void CvDTree::split_node_data( CvDTreeNode* node )
{
unsigned short *ldst, *rdst, *ldst0, *rdst0;
//unsigned short tl, tr;
- ldst0 = ldst = (unsigned short*)(buf->data.s + left->buf_idx*buf->cols +
+ ldst0 = ldst = (unsigned short*)(buf->data.s + left->buf_idx*length_buf_row +
vi*scount + left->offset);
rdst0 = rdst = (unsigned short*)(ldst + nl);
@@ -3095,9 +3118,9 @@ void CvDTree::split_node_data( CvDTreeNode* node )
else
{
int *ldst0, *ldst, *rdst0, *rdst;
- ldst0 = ldst = buf->data.i + left->buf_idx*buf->cols +
+ ldst0 = ldst = buf->data.i + left->buf_idx*length_buf_row +
vi*scount + left->offset;
- rdst0 = rdst = buf->data.i + right->buf_idx*buf->cols +
+ rdst0 = rdst = buf->data.i + right->buf_idx*length_buf_row +
vi*scount + right->offset;
// split sorted
@@ -3158,9 +3181,9 @@ void CvDTree::split_node_data( CvDTreeNode* node )
if (data->is_buf_16u)
{
- unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*buf->cols +
+ unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*length_buf_row +
vi*scount + left->offset);
- unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*buf->cols +
+ unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*length_buf_row +
vi*scount + right->offset);
for( i = 0; i < n; i++ )
@@ -3188,9 +3211,9 @@ void CvDTree::split_node_data( CvDTreeNode* node )
}
else
{
- int *ldst = buf->data.i + left->buf_idx*buf->cols +
+ int *ldst = buf->data.i + left->buf_idx*length_buf_row +
vi*scount + left->offset;
- int *rdst = buf->data.i + right->buf_idx*buf->cols +
+ int *rdst = buf->data.i + right->buf_idx*length_buf_row +
vi*scount + right->offset;
for( i = 0; i < n; i++ )
@@ -3230,9 +3253,9 @@ void CvDTree::split_node_data( CvDTreeNode* node )
int pos = data->get_work_var_count();
if (data->is_buf_16u)
{
- unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*buf->cols +
+ unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*length_buf_row +
pos*scount + left->offset);
- unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*buf->cols +
+ unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*length_buf_row +
pos*scount + right->offset);
for (i = 0; i < n; i++)
{
@@ -3252,9 +3275,9 @@ void CvDTree::split_node_data( CvDTreeNode* node )
}
else
{
- int* ldst = buf->data.i + left->buf_idx*buf->cols +
+ int* ldst = buf->data.i + left->buf_idx*length_buf_row +
pos*scount + left->offset;
- int* rdst = buf->data.i + right->buf_idx*buf->cols +
+ int* rdst = buf->data.i + right->buf_idx*length_buf_row +
pos*scount + right->offset;
for (i = 0; i < n; i++)
{
@@ -3310,7 +3333,7 @@ float CvDTree::calc_error( CvMLData* _data, int type, vector<float> *resp )
float r = (float)predict( &sample, missing ? &miss : 0 )->value;
if( pred_resp )
pred_resp[i] = r;
- int d = fabs((double)r - response->data.fl[si*r_step]) <= FLT_EPSILON ? 0 : 1;
+ int d = fabs((double)r - response->data.fl[(size_t)si*r_step]) <= FLT_EPSILON ? 0 : 1;
err += d;
}
err = sample_count ? err / (float)sample_count * 100 : -FLT_MAX;
@@ -3327,7 +3350,7 @@ float CvDTree::calc_error( CvMLData* _data, int type, vector<float> *resp )
float r = (float)predict( &sample, missing ? &miss : 0 )->value;
if( pred_resp )
pred_resp[i] = r;
- float d = r - response->data.fl[si*r_step];
+ float d = r - response->data.fl[(size_t)si*r_step];
err += d*d;
}
err = sample_count ? err / (float)sample_count : -FLT_MAX;
@@ -3633,8 +3656,8 @@ CvDTreeNode* CvDTree::predict( const CvMat* _sample,
int vi = split->var_idx;
int ci = vtype[vi];
i = vidx ? vidx[vi] : vi;
- float val = sample[i*step];
- if( m && m[i*mstep] )
+ float val = sample[(size_t)i*step];
+ if( m && m[(size_t)i*mstep] )
continue;
if( ci < 0 ) // ordered
dir = val <= split->ord.c ? -1 : 1;
Something went wrong with that request. Please try again.