Skip to content

Commit

Permalink
Merge pull request #273 from mcveanlab/lasso
Browse files Browse the repository at this point in the history
Lasso adding header
  • Loading branch information
shajoezhu committed Oct 10, 2018
2 parents 8db6899 + 68ceeb1 commit 2f71665
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 25 deletions.
14 changes: 11 additions & 3 deletions src/dEploidIO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ void DEploidIO::core() {


void DEploidIO::init() {
this->setDoPrintLassoPanel(false);
this->setIsCopied(false);
this->setDoExportRecombProb(false);
this->setrandomSeedWasGiven(false);
Expand Down Expand Up @@ -450,6 +451,8 @@ void DEploidIO::parse () {
} else if ( *argv_i == "-lasso" ) {
this->setUseLasso(true);
this->setDoUpdateProp(false);
} else if ( *argv_i == "-writePanel" ) {
this->setDoPrintLassoPanel(true);
} else if ( *argv_i == "-computeLLK" ) {
this->setDoComputeLLK( true );
} else if ( *argv_i == "-ibdPainting" ) {
Expand Down Expand Up @@ -592,7 +595,7 @@ void DEploidIO::printHelp(std::ostream& out) {
out << "./dEploid -vcf data/testData/PG0390-C.test.vcf -exclude data/testData/labStrains.test.exclude.txt -plaf data/testData/labStrains.test.PLAF.txt -o PG0390-CPanelExclude -panel data/testData/labStrains.test.panel.txt -painting PG0390-CPanelExclude.hap" << endl;
out << "./dEploid -vcf data/testData/PG0390-C.test.vcf -plaf data/testData/labStrains.test.PLAF.txt -o PG0390-CNopanel -noPanel -k 2 -ibd -nSample 250 -rate 8 -burn 0.67" <<endl;
out << "./dEploid -vcf data/testData/PG0390-C.test.vcf -plaf data/testData/labStrains.test.PLAF.txt -o PG0390-CNopanel -ibdPainting -initialP 0.2 0.8" <<endl;
out << "./dEploid -vcf data/testData/PG0390-C.test.vcf -exclude data/testData/labStrains.test.exclude.txt -plaf data/testData/labStrains.test.PLAF.txt -o PG0390-CLassoExclude -panel data/testData/labStrains.test.panel.txt -lasso -initialP 0.2 0.8" << endl;
out << "./dEploid -vcf data/testData/PG0390-C.test.vcf -exclude data/testData/labStrains.test.exclude.txt -plaf data/testData/labStrains.test.PLAF.txt -o PG0390-CLassoExclude -panel data/testData/labStrains.test.panel.txt -lasso -initialP 0.2 0.8 -writePanel" << endl;
}


Expand Down Expand Up @@ -878,7 +881,10 @@ void DEploidIO::dEploidLasso() {
vector <double> wsaf = lassoComputeObsWsaf(start, length);
vector < vector <double> > tmpPanel = lassoSubsetPanel(start, length);
DEploidLASSO dummy(tmpPanel, wsaf, 250);

vector <string> newHeader;
for (size_t i = 0; i < dummy.choiceIdx.size(); i++) {
newHeader.push_back(panel->header_[dummy.choiceIdx[i]]);
}
Panel * tmp = new Panel(vecFromTo(this->panel->pRec_, start, end),
vecFromTo(this->panel->pRecEachHap_, start, end),
vecFromTo(this->panel->pNoRec_, start, end),
Expand All @@ -890,7 +896,9 @@ void DEploidIO::dEploidLasso() {
lassoPlafs.push_back(vecFromTo(plaf_, start, end));
lassoRefCount.push_back(vecFromTo(refCount_, start, end));
lassoAltCount.push_back(vecFromTo(altCount_, start, end));
this->writePanel(tmp, chromi);
if (this->doPrintLassoPanel()) {
this->writePanel(tmp, chromi, newHeader);
}
}
for ( auto const& value: this->initialProp ) {
this->finalProp.push_back(value);
Expand Down
6 changes: 5 additions & 1 deletion src/dEploidIO.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ class DEploidIO{
vector < vector <double> > lassoAltCount;

void writeHap (vector < vector <double> > &hap, bool useIBD = false);
bool doPrintLassoPanel_;

private:
void core();
Expand Down Expand Up @@ -279,6 +280,9 @@ class DEploidIO{
}

// Getters and Setters
void setDoPrintLassoPanel ( const bool setTo ) { this->doPrintLassoPanel_ = setTo; }
bool doPrintLassoPanel() const { return this->doPrintLassoPanel_; }

void setIsCopied ( const bool setTo ) { this->isCopied_ = setTo; }
bool isCopied() const { return this->isCopied_; }

Expand Down Expand Up @@ -401,7 +405,7 @@ class DEploidIO{
// Lasso related
vector <double> lassoComputeObsWsaf(size_t segmentStartIndex, size_t nLoci);
vector < vector <double> > lassoSubsetPanel(size_t segmentStartIndex, size_t nLoci);
void writePanel(Panel *panel, size_t chromi);
void writePanel(Panel *panel, size_t chromi, vector <string> hdr);
};

#endif
23 changes: 11 additions & 12 deletions src/export/writeMcmcRelated.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,29 +114,28 @@ void DEploidIO::writeHap(vector < vector <double> > &hap, bool useIBD){
}


void DEploidIO::writePanel(Panel *panel, size_t chromI){
void DEploidIO::writePanel(Panel *panel, size_t chromI, vector <string> hdr){
string strExport = this->prefix_ + ".panel." + to_string(chromI);
remove(strExport.c_str());

ofstreamExportTmp.open( strExport.c_str(), ios::out | ios::app | ios::binary );

// TODO need to redo header
// HEADER
ofstreamExportTmp << "CHROM" << "\t" << "POS" << "\t";;
for ( size_t ii = 0; ii < panel->truePanelSize_; ii++){
ofstreamExportTmp << "h" << (ii+1) ;
ofstreamExportTmp << hdr[ii];
ofstreamExportTmp << ((ii < (panel->truePanelSize_-1)) ? "\t" : "\n") ;
}

size_t siteIndex = 0;
//for ( size_t chromI = 0; chromI < chrom_.size(); chromI++ ){
for ( size_t posI = 0; posI < position_[chromI].size(); posI++){
ofstreamExportTmp << chrom_[chromI] << "\t" << (int)position_[chromI][posI] << "\t";
for ( size_t ii = 0; ii < panel->content_[siteIndex].size(); ii++){
ofstreamExportTmp << panel->content_[siteIndex][ii];
ofstreamExportTmp << ((ii < (panel->content_[siteIndex].size()-1)) ? "\t" : "\n") ;
}
siteIndex++;
for ( size_t posI = 0; posI < position_[chromI].size(); posI++){
ofstreamExportTmp << chrom_[chromI] << "\t" << (int)position_[chromI][posI] << "\t";
for ( size_t ii = 0; ii < panel->content_[siteIndex].size(); ii++){
ofstreamExportTmp << panel->content_[siteIndex][ii];
ofstreamExportTmp << ((ii < (panel->content_[siteIndex].size()-1)) ? "\t" : "\n") ;
}
//}
siteIndex++;
}

assert ( siteIndex == panel->content_.size());
ofstreamExportTmp.close();
Expand Down
24 changes: 24 additions & 0 deletions src/txtReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ void TxtReader::readFromFileBase(const char inchar[]) {
if (in_file.good()) {
// skip the first line, which is the header
getline(in_file, tmp_line);
this->extractHeader(tmp_line);
getline(in_file, tmp_line);
while (tmp_line.size() > 0) {
size_t field_start = 0;
Expand Down Expand Up @@ -91,6 +92,29 @@ void TxtReader::readFromFileBase(const char inchar[]) {
}


void TxtReader::extractHeader(const string &line) {
this->header_.clear();
size_t field_start = 0;
size_t field_end = 0;
size_t field_index = 0;
while (field_end < line.size()) {
field_end = min(
min(line.find(',', field_start),
line.find('\t', field_start)),
line.find('\n', field_start));

string tmp_str = line.substr(field_start,
field_end - field_start);
if (field_index > 1) {
this->header_.push_back(tmp_str);
}

field_start = field_end+1;
field_index++;
}
}


void TxtReader::extractChrom(const string & tmp_str) {
if (tmpChromInex_ >= 0) {
if (tmp_str != this->chrom_.back()) {
Expand Down
2 changes: 2 additions & 0 deletions src/txtReader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class TxtReader : public VariantIndex {
// info_ only refers to the first column of the content
vector <double> info_;

vector <string> header_;
size_t nInfoLines_;

int tmpChromInex_;
Expand All @@ -61,6 +62,7 @@ class TxtReader : public VariantIndex {
// Methods
void extractChrom(const string & tmp_str);
void extractPOS(const string & tmp_str);
void extractHeader(const string &line);
void reshapeContentToInfo();
string fileName;

Expand Down
14 changes: 5 additions & 9 deletions tests/unittest/test_dEploidIO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -837,15 +837,11 @@ class TestIO : public CppUnit::TestCase {
"-lasso" };
CPPUNIT_ASSERT_NO_THROW(DEploidIO(10, argv));
DEploidIO dEploidIOlasso(10, argv);
// CPPUNIT_ASSERT_EQUAL(dEploidIOlasso.obsWsaf_.size(), (size_t)594);
// CPPUNIT_ASSERT_DOUBLES_EQUAL(dEploidIOlasso.obsWsaf_[0],
// 0, epsilon3);
// CPPUNIT_ASSERT_DOUBLES_EQUAL(dEploidIOlasso.obsWsaf_[4],
// 0.22772277, epsilon2);
// CPPUNIT_ASSERT_DOUBLES_EQUAL(dEploidIOlasso.obsWsaf_[11],
// 0.21111111, epsilon2);
// CPPUNIT_ASSERT_DOUBLES_EQUAL(dEploidIOlasso.obsWsaf_[589],
// 0.18324607, epsilon2);
CPPUNIT_ASSERT_NO_THROW(dEploidIOlasso.dEploidLasso());
CPPUNIT_ASSERT_EQUAL(dEploidIOlasso.lassoPanels.size(), (size_t)14);
CPPUNIT_ASSERT_EQUAL(dEploidIOlasso.lassoPlafs.size(), (size_t)14);
CPPUNIT_ASSERT_EQUAL(dEploidIOlasso.lassoRefCount.size(), (size_t)14);
CPPUNIT_ASSERT_EQUAL(dEploidIOlasso.lassoAltCount.size(), (size_t)14);
}
};

Expand Down

0 comments on commit 2f71665

Please sign in to comment.