Skip to content

Commit

Permalink
Dynamic beagle instance growth -- saves much memory.
Browse files Browse the repository at this point in the history
  • Loading branch information
koadman committed Oct 12, 2012
1 parent 7463cdf commit 868b050
Showing 1 changed file with 129 additions and 110 deletions.
239 changes: 129 additions & 110 deletions src/hmsbeagle.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,14 @@ class OnlineCalculator
int nPatterns;
int nPartBuffs;
int instance;
int treeknown;

int next_id;
std::stack<int> free_ids;
std::unordered_map< int, double > id_ll; // stores the root ll at each ID
std::unordered_map< int, double > id_ll; // caches the root ll at each ID

int init_beagle();
void set_eigen_and_rates_and_weights( int instance );
void grow();
};

int OnlineCalculator::get_id()
Expand All @@ -47,7 +50,7 @@ int OnlineCalculator::get_id()
}
if(next_id == nPartBuffs) {
// oh no! we ran out of buffer slots! need to reallocate
throw "Ran out of beagle slots";
grow();
}
return next_id++;
}
Expand All @@ -58,96 +61,117 @@ void OnlineCalculator::free_id(int id)
id_ll.erase(id);
}

void OnlineCalculator::grow()
{
nPartBuffs = nPartBuffs * 1.61803398875; // Growing with the golden ratio.
int new_instance = init_beagle();
std::cerr << "Growing to " << nPartBuffs << std::endl;

// Copy all partial probability data into the new instance.
// XXX: there does not seem to be any way to copy scale factors
// or to set partials with a particular weight so we need to force a full re-peel.
double temp[nPatterns * stateCount];
for(int i=0; i<next_id; i++){
beagleGetPartials(instance, i, 0, temp);
beagleSetPartials(new_instance, i, temp);
}

// Clear the likelihood cache.
id_ll.clear();

set_eigen_and_rates_and_weights( new_instance );

// free the old instance
beagleFinalizeInstance(instance);
instance = new_instance;
}

///Initialize an instance of the BEAGLE library.

/// \param seqs The sequences.
void OnlineCalculator::initialize(const std::vector<std::string>& seqs)
{
bool reinit = false;
if(nPatterns != seqs[1].size()) reinit = true;
if(seqs.size()*2 + 2 >= nPartBuffs) reinit = true;
if(instance < 0) reinit = true;


// Create an instance of the BEAGLE library.
if( reinit ) {
nPatterns = seqs[1].size();
nPartBuffs = seqs.size() * 3000;
if(instance >= 0) beagleFinalizeInstance(instance);
instance = beagleCreateInstance(
0, /**< Number of tip data elements (input) */
nPartBuffs, /**< Number of partials buffers to create (input) */
0, /**< Number of compact state representation buffers to create (input) */
stateCount, /**< Number of states in the continuous-time Markov chain (input) */
nPatterns, /**< Number of site patterns to be handled by the instance (input) */
nPartBuffs, /**< Number of rate matrix eigen-decomposition buffers to allocate (input) */
nPartBuffs, /**< Number of rate matrix buffers (input) */
1, /**< Number of rate categories (input) */
nPartBuffs, /**< Number of scaling buffers */
NULL, /**< List of potential resource on which this instance is allowed (input, NULL implies no restriction */
0, /**< Length of resourceList list (input) */
BEAGLE_FLAG_PRECISION_DOUBLE | BEAGLE_FLAG_SCALING_AUTO, /**< Bit-flags indicating preferred implementation charactertistics, see BeagleFlags (input) */
0, /**< Bit-flags indicating required implementation characteristics, see BeagleFlags (input) */
&instDetails);
if (instance < 0) {
fprintf(stderr, "Failed to obtain BEAGLE instance\n\n");
exit(1);
}

treeknown = 0; // None of the tip sequences have been set.
}

nPatterns = seqs[1].size();
nPartBuffs = seqs.size() * 100;
instance = init_beagle();

// Add any new sequences to the BEAGLE instance.
for(int i=treeknown; i<seqs.size(); i++) {
for(int i=0; i<seqs.size(); i++) {
double *seq_partials = get_partials(seqs[i]);
beagleSetPartials(instance, i, seq_partials);
// std::cout << "Adding new sequence " << i << "\n" << seqs[i] << std::endl;
}
treeknown = seqs.size();
next_id = seqs.size();

if( reinit ) {
// create base frequency array
double freqs[16] = { 0.25, 0.25, 0.25, 0.25,
0.25, 0.25, 0.25, 0.25,
0.25, 0.25, 0.25, 0.25,
0.25, 0.25, 0.25, 0.25
};

// an eigen decomposition for the JC69 model
double evec[4 * 4] = {
1.0, 2.0, 0.0, 0.5,
1.0, -2.0, 0.5, 0.0,
1.0, 2.0, 0.0, -0.5,
1.0, -2.0, -0.5, 0.0
};

double ivec[4 * 4] = {
0.25, 0.25, 0.25, 0.25,
0.125, -0.125, 0.125, -0.125,
0.0, 1.0, 0.0, -1.0,
1.0, 0.0, -1.0, 0.0
};

double eval[4] = { 0.0, -1.3333333333333333, -1.3333333333333333, -1.3333333333333333 };

// set the Eigen decomposition
beagleSetEigenDecomposition(instance, 0, evec, ivec, eval);

beagleSetStateFrequencies(instance, 0, freqs);
double rates[1] = { 1 };
beagleSetCategoryRates(instance, &rates[0]);
double weights[1] = { 1 };
beagleSetCategoryWeights(instance, 0, weights);

// XXX AD: do we need to free any old pattern weights?
double* patternWeights = (double*) malloc(sizeof(double) * nPatterns);
for (int i = 0; i < nPatterns; i++) {
patternWeights[i] = 1.0;
}
beagleSetPatternWeights(instance, patternWeights);
set_eigen_and_rates_and_weights( instance );
this->seqs = seqs;
}

int OnlineCalculator::init_beagle()
{
int new_instance = beagleCreateInstance(
0, /**< Number of tip data elements (input) */
nPartBuffs, /**< Number of partials buffers to create (input) */
0, /**< Number of compact state representation buffers to create (input) */
stateCount, /**< Number of states in the continuous-time Markov chain (input) */
nPatterns, /**< Number of site patterns to be handled by the instance (input) */
nPartBuffs, /**< Number of rate matrix eigen-decomposition buffers to allocate (input) */
nPartBuffs, /**< Number of rate matrix buffers (input) */
1, /**< Number of rate categories (input) */
nPartBuffs, /**< Number of scaling buffers */
NULL, /**< List of potential resource on which this instance is allowed (input, NULL implies no restriction */
0, /**< Length of resourceList list (input) */
BEAGLE_FLAG_PRECISION_DOUBLE | BEAGLE_FLAG_SCALING_AUTO, /**< Bit-flags indicating preferred implementation charactertistics, see BeagleFlags (input) */
0, /**< Bit-flags indicating required implementation characteristics, see BeagleFlags (input) */
&instDetails);
if (new_instance < 0) {
fprintf(stderr, "Failed to obtain BEAGLE instance\n\n");
exit(1);
}
return new_instance;
}

void OnlineCalculator::set_eigen_and_rates_and_weights( int inst )
{
// create base frequency array
double freqs[16] = { 0.25, 0.25, 0.25, 0.25,
0.25, 0.25, 0.25, 0.25,
0.25, 0.25, 0.25, 0.25,
0.25, 0.25, 0.25, 0.25
};

// an eigen decomposition for the JC69 model
double evec[4 * 4] = {
1.0, 2.0, 0.0, 0.5,
1.0, -2.0, 0.5, 0.0,
1.0, 2.0, 0.0, -0.5,
1.0, -2.0, -0.5, 0.0
};

double ivec[4 * 4] = {
0.25, 0.25, 0.25, 0.25,
0.125, -0.125, 0.125, -0.125,
0.0, 1.0, 0.0, -1.0,
1.0, 0.0, -1.0, 0.0
};

double eval[4] = { 0.0, -1.3333333333333333, -1.3333333333333333, -1.3333333333333333 };

// set the Eigen decomposition
beagleSetEigenDecomposition(inst, 0, evec, ivec, eval);

beagleSetStateFrequencies(inst, 0, freqs);
double rates[1] = { 1 };
beagleSetCategoryRates(inst, &rates[0]);
double weights[1] = { 1 };
beagleSetCategoryWeights(inst, 0, weights);

double patternWeights[nPatterns];
for (int i = 0; i < nPatterns; i++) {
patternWeights[i] = 1.0;
}
beagleSetPatternWeights(inst, patternWeights);
}

double OnlineCalculator::calculate_ll( std::shared_ptr< phylo_node > node, std::vector<bool>& visited )
Expand All @@ -172,16 +196,16 @@ double OnlineCalculator::calculate_ll( std::shared_ptr< phylo_node > node, std::
// assert(cur->child2 == NULL);
continue;
}
visited[cur->child1->id]=true;
visited[cur->child2->id]=true;
visited[cur->child1->id]=id_ll.find( cur->child1->id ) != id_ll.end();
visited[cur->child2->id]=id_ll.find( cur->child2->id ) != id_ll.end();
s.push(cur->child1);
s.push(cur->child2);
if(!visited[cur->id]){
ops_tmp.push_back( {
cur->id, // index of destination, or parent, partials buffer
BEAGLE_OP_NONE, // index of scaling buffer to write to (if set to BEAGLE_OP_NONE then calculation of new scalers is disabled)
BEAGLE_OP_NONE, // index of scaling buffer to read from (if set to BEAGLE_OP_NONE then use of existing scale factors is disabled)
cur->child1->id, // index of first child partials buffer
cur->child1->id, // index of first child partials buffer
cur->child1->id, // index of transition matrix of first partials child buffer
cur->child2->id, // index of second child partials buffer
cur->child2->id}); // index of transition matrix of second partials child buffer
Expand All @@ -200,34 +224,32 @@ double OnlineCalculator::calculate_ll( std::shared_ptr< phylo_node > node, std::

if(ops_tmp.size() > 0){

// Need to reverse the order to make post-order.
ops.insert(ops.begin(), ops_tmp.rbegin(),ops_tmp.rend());

// Tell BEAGLE to populate the transition matrices for the above edge lengths.
beagleUpdateTransitionMatrices(instance, // instance
0, // eigenIndex
nind.data(), // probabilityIndices
NULL, // firstDerivativeIndices
NULL, // secondDerivativeIndices
lens.data(), // edgeLengths
nind.size()); // count

// Create a list of partial likelihood update operations.
// The order is [dest, destScaling, sourceScaling, source1, matrix1, source2, matrix2].
// TODO: make this peel only where the new node was added.
BeagleOperation operations[ops.size()];
int scaleIndices[ops.size()];
for(int i=0; i<ops.size(); i++) {
scaleIndices[i]=ops[i].destinationPartials;
operations[i] = ops[i];
}
// Need to reverse the order to make post-order.
ops.insert(ops.begin(), ops_tmp.rbegin(),ops_tmp.rend());

// Tell BEAGLE to populate the transition matrices for the above edge lengths.
beagleUpdateTransitionMatrices(instance, // instance
0, // eigenIndex
nind.data(), // probabilityIndices
NULL, // firstDerivativeIndices
NULL, // secondDerivativeIndices
lens.data(), // edgeLengths
nind.size()); // count

// Create a list of partial likelihood update operations.
// The order is [dest, destScaling, sourceScaling, source1, matrix1, source2, matrix2].
// TODO: make this peel only where the new node was added.
BeagleOperation operations[ops.size()];
int scaleIndices[ops.size()];
for(int i=0; i<ops.size(); i++) {
scaleIndices[i]=ops[i].destinationPartials;
operations[i] = ops[i];
}

// Update the partials.
beagleUpdatePartials(instance, operations, ops.size(), node->id); // cumulative scaling index
beagleAccumulateScaleFactors(instance, scaleIndices, ops.size(), BEAGLE_OP_NONE);
// Update the partials.
beagleUpdatePartials(instance, operations, ops.size(), node->id); // cumulative scaling index
beagleAccumulateScaleFactors(instance, scaleIndices, ops.size(), BEAGLE_OP_NONE);

}else{
int i=1;
}

double logL = 0.0;
Expand All @@ -247,9 +269,6 @@ double OnlineCalculator::calculate_ll( std::shared_ptr< phylo_node > node, std::
&logL); // outLogLikelihoods

id_ll[ node->id ] = logL; // stash the log likelihood for later use
if(logL < -10000000 ){
int j = 1;
}
return logL;
}

Expand Down

0 comments on commit 868b050

Please sign in to comment.