Skip to content

Commit

Permalink
Revert "Transfer code from internal repository"
Browse files Browse the repository at this point in the history
This reverts commit fe7298a.
  • Loading branch information
edolzhenko committed Jan 17, 2019
1 parent fe7298a commit 1f0694a
Show file tree
Hide file tree
Showing 237 changed files with 2,095 additions and 2,199 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ find_package(Boost 1.4 REQUIRED COMPONENTS program_options filesystem regex date
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(SYSTEM ${Boost_INCLUDE_DIR})
include_directories(${CMAKE_BINARY_DIR}/thirdparty/htslib/include)
include_directories(thirdparty/graph-tools-GT-506/include)

add_subdirectory(thirdparty/graph-tools-master)

Expand Down
Empty file modified COPYRIGHT.txt
100644 → 100755
Empty file.
Empty file modified LICENSE.txt
100644 → 100755
Empty file.
Empty file modified README.md
100644 → 100755
Empty file.
26 changes: 0 additions & 26 deletions alignment/AlignmentFilters.cpp
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@

using graphtools::GraphAlignment;
using graphtools::NodeId;
using graphtools::Operation;
using graphtools::OperationType;
using graphtools::Path;
using std::list;
using std::string;
Expand Down Expand Up @@ -112,28 +110,4 @@ bool checkIfDownstreamAlignmentIsGood(NodeId nodeId, GraphAlignment alignment)
return score >= kScoreCutoff;
}

bool checkIfPassesAlignmentFilters(const GraphAlignment& alignment)
{
const Operation& firstOperation = alignment.alignments().front().operations().front();
const int frontSoftclipLen = firstOperation.type() == OperationType::kSoftclip ? firstOperation.queryLength() : 0;

const Operation& lastOperation = alignment.alignments().back().operations().back();
const int backSoftclipLen = lastOperation.type() == OperationType::kSoftclip ? lastOperation.queryLength() : 0;

const int clippedQueryLength = alignment.queryLength() - frontSoftclipLen - backSoftclipLen;
const int referenceLength = alignment.referenceLength();

const int percentQueryMatches = (100 * alignment.numMatches()) / clippedQueryLength;
const int percentReferenceMatches = (100 * alignment.numMatches()) / referenceLength;

if (percentQueryMatches >= 80 && percentReferenceMatches >= 80)
{
return true;
}
else
{
return false;
}
}

}
2 changes: 0 additions & 2 deletions alignment/AlignmentFilters.hh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,4 @@ bool checkIfUpstreamAlignmentIsGood(graphtools::NodeId nodeId, graphtools::Graph
// Checks if alignment downstream of a given node is high quality
bool checkIfDownstreamAlignmentIsGood(graphtools::NodeId nodeId, graphtools::GraphAlignment alignment);

bool checkIfPassesAlignmentFilters(const graphtools::GraphAlignment& alignment);

}
Empty file modified alignment/AlignmentTweakers.cpp
100644 → 100755
Empty file.
Empty file modified alignment/AlignmentTweakers.hh
100644 → 100755
Empty file.
Empty file modified alignment/CMakeLists.txt
100644 → 100755
Empty file.
Empty file modified alignment/GraphAlignmentOperations.cpp
100644 → 100755
Empty file.
Empty file modified alignment/GraphAlignmentOperations.hh
100644 → 100755
Empty file.
Empty file modified alignment/GreedyAlignmentIntersector.cpp
100644 → 100755
Empty file.
Empty file modified alignment/GreedyAlignmentIntersector.hh
100644 → 100755
Empty file.
Empty file modified alignment/HighQualityBaseRunFinder.cpp
100644 → 100755
Empty file.
Empty file modified alignment/HighQualityBaseRunFinder.hh
100644 → 100755
Empty file.
Empty file modified alignment/SoftclippingAligner.cpp
100644 → 100755
Empty file.
Empty file modified alignment/SoftclippingAligner.hh
100644 → 100755
Empty file.
Empty file modified alignment/tests/AlignmentTweakersTest.cpp
100644 → 100755
Empty file.
Empty file modified alignment/tests/CMakeLists.txt
100644 → 100755
Empty file.
Empty file modified alignment/tests/GraphAlignmentOperationsTest.cpp
100644 → 100755
Empty file.
Empty file modified alignment/tests/GreedyAlignmentIntersectorTest.cpp
100644 → 100755
Empty file.
Empty file modified alignment/tests/HighQualityBaseRunFinderTest.cpp
100644 → 100755
Empty file.
Empty file modified alignment/tests/SoftclippingAlignerTest.cpp
100644 → 100755
Empty file.
Empty file modified classification/AlignmentClassifier.cpp
100644 → 100755
Empty file.
Empty file modified classification/AlignmentClassifier.hh
100644 → 100755
Empty file.
Empty file modified classification/CMakeLists.txt
100644 → 100755
Empty file.
Empty file modified classification/ClassifierOfAlignmentsToVariant.cpp
100644 → 100755
Empty file.
Empty file modified classification/ClassifierOfAlignmentsToVariant.hh
100644 → 100755
Empty file.
Empty file modified classification/tests/AlignmentClassifierTest.cpp
100644 → 100755
Empty file.
1 change: 1 addition & 0 deletions classification/tests/AlignmentSummaryTest.cpp
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

using namespace ehunter;

using reads::Read;
using std::map;
using std::vector;

Expand Down
Empty file modified classification/tests/CMakeLists.txt
100644 → 100755
Empty file.
Empty file modified classification/tests/ClassifierOfAlignmentsToVariantTest.cpp
100644 → 100755
Empty file.
Empty file modified cmake/google_test.cmake
100644 → 100755
Empty file.
Empty file modified common/CMakeLists.txt
100644 → 100755
Empty file.
Empty file modified common/Common.cpp
100644 → 100755
Empty file.
Empty file modified common/Common.hh
100644 → 100755
Empty file.
Empty file modified common/CountTable.cpp
100644 → 100755
Empty file.
Empty file modified common/CountTable.hh
100644 → 100755
Empty file.
99 changes: 51 additions & 48 deletions common/GenomicRegion.cpp
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -22,34 +22,55 @@

#include "common/GenomicRegion.hh"

#include <boost/algorithm/string/classification.hpp>
#include <boost/algorithm/string/split.hpp>
#include <boost/lexical_cast.hpp>

#include <limits>
#include <sstream>
#include <stdexcept>

#include <boost/algorithm/string/classification.hpp>
#include <boost/algorithm/string/split.hpp>
#include <string>
#include <vector>

using std::istream;
using std::ostream;
using std::string;
using std::unordered_map;
using std::vector;

using boost::lexical_cast;
using boost::algorithm::is_any_of;
using boost::algorithm::split;

namespace ehunter
{

GenomicRegion::GenomicRegion(int32_t contigIndex, int64_t start, int64_t end)
: contigIndex_(contigIndex)
Region::Region(const std::string chrom, int64_t start, int64_t end)
: chrom_(chrom)
, start_(start)
, end_(end)
{
}

bool GenomicRegion::operator<(const GenomicRegion& other) const
Region::Region(const std::string encoding)
{
vector<string> components;
boost::algorithm::split(components, encoding, is_any_of(":-"));

if (components.size() != 3)
{
throw std::logic_error("Unexpected range format: " + encoding);
}

chrom_ = components[0];
start_ = lexical_cast<int64_t>(components[1]);
end_ = lexical_cast<int64_t>(components[2]);
}

bool Region::operator<(const Region& other) const
{
if (contigIndex_ != other.contigIndex_)
if (chrom_ != other.chrom_)
{
return contigIndex_ < other.contigIndex_;
return chrom_ < other.chrom_;
}

if (start_ != other.start_)
Expand All @@ -60,9 +81,9 @@ bool GenomicRegion::operator<(const GenomicRegion& other) const
return end_ < other.end_;
}

bool GenomicRegion::overlaps(const GenomicRegion& other) const
bool Region::Overlaps(const Region& other) const
{
if (contigIndex_ != other.contigIndex_)
if (chrom_ != other.chrom_)
{
return false;
}
Expand All @@ -73,9 +94,9 @@ bool GenomicRegion::overlaps(const GenomicRegion& other) const
return leftBound <= rightBound;
}

int64_t GenomicRegion::distance(const GenomicRegion& other) const
int64_t Region::Distance(const Region& other) const
{
if (contigIndex_ != other.contigIndex_)
if (chrom_ != other.chrom_)
{
return std::numeric_limits<int64_t>::max();
}
Expand All @@ -93,7 +114,7 @@ int64_t GenomicRegion::distance(const GenomicRegion& other) const
return 0;
}

vector<GenomicRegion> merge(vector<GenomicRegion> regions, int maxMergeDist)
vector<Region> merge(vector<Region> regions, int maxMergeDist)
{
if (regions.empty())
{
Expand All @@ -102,12 +123,12 @@ vector<GenomicRegion> merge(vector<GenomicRegion> regions, int maxMergeDist)

std::sort(regions.begin(), regions.end());

GenomicRegion mergedRegion = regions.front();
vector<GenomicRegion> mergedRegions;
Region mergedRegion = regions.front();
vector<Region> mergedRegions;

for (const auto& currentRegion : regions)
{
if (currentRegion.distance(mergedRegion) <= maxMergeDist)
if (currentRegion.Distance(mergedRegion) <= maxMergeDist)
{
const int64_t furthestEnd = std::max<int64_t>(mergedRegion.end(), currentRegion.end());
mergedRegion.setEnd(furthestEnd);
Expand All @@ -127,45 +148,27 @@ vector<GenomicRegion> merge(vector<GenomicRegion> regions, int maxMergeDist)
return mergedRegions;
}

const string Region::ToString() const
{
std::ostringstream out;
out << *this;
return out.str();
}

// Returns the range extended by flankSize upstream and downstream.
// NOTE: The right boundary of the extended region may stick past chromosome
// end.
GenomicRegion GenomicRegion::extend(int length) const
Region Region::extend(int length) const
{
const int64_t newStart = start_ >= length ? (start_ - length) : 0;
const int64_t newEnd = end_ + length;
return GenomicRegion(contigIndex_, newStart, newEnd);
const int64_t new_start = start_ >= length ? (start_ - length) : 0;
const int64_t new_end = end_ + length;
return Region(chrom_, new_start, new_end);
}

std::ostream& operator<<(std::ostream& out, const GenomicRegion& region)
std::ostream& operator<<(std::ostream& out, const Region& region)
{
out << "(" << region.contigIndex_ << "):" << region.start_ << "-" << region.end_;
out << region.chrom_ << ":" << region.start_ << "-" << region.end_;
return out;
}

string encode(const ReferenceContigInfo& contigInfo, const GenomicRegion& region)
{
const auto& contigName = contigInfo.getContigName(region.contigIndex());
return contigName + ":" + std::to_string(region.start()) + "-" + std::to_string(region.end());
}

GenomicRegion decode(const ReferenceContigInfo& contigInfo, const string& encoding)
{
vector<string> components;
boost::algorithm::split(components, encoding, boost::algorithm::is_any_of(":-"));

if (components.size() != 3)
{
throw std::logic_error("Unexpected range format: " + encoding);
}

const auto& contigName = components[0];
int32_t contigIndex = contigInfo.getContigId(contigName);

int64_t start = std::stoi(components[1]);
int64_t end = std::stoi(components[2]);

return GenomicRegion(contigIndex, start, end);
}

}
45 changes: 18 additions & 27 deletions common/GenomicRegion.hh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -24,57 +24,48 @@

#include <iostream>
#include <string>
#include <unordered_map>
#include <vector>

#include "common/ReferenceContigInfo.hh"

namespace ehunter
{

// Represents a contiguous region of a genome using 0-based half-open coordinates
class GenomicRegion
class Region
{
public:
friend std::ostream& operator<<(std::ostream& out, const GenomicRegion& region);
friend std::ostream& operator<<(std::ostream& out, const Region& region);

GenomicRegion(const int32_t contigIndex, int64_t start, int64_t end);
Region(const std::string chrom, int64_t start, int64_t end);
explicit Region(const std::string encoding);

bool operator<(const GenomicRegion& other) const;
bool operator<(const Region& other) const;

bool overlaps(const GenomicRegion& other) const;
int64_t distance(const GenomicRegion& other) const;
bool Overlaps(const Region& other) const;
int64_t Distance(const Region& other) const;

int32_t contigIndex() const { return contigIndex_; }
const std::string& chrom() const { return chrom_; }
int64_t start() const { return start_; }
int64_t end() const { return end_; }
int64_t length() const { return end_ - start_; }
int64_t length() const { return end_ - start_ + 1; }

void setContigId(int32_t contigIndex) { contigIndex_ = contigIndex; }
void setChrom(const std::string& chrom) { chrom_ = chrom; }
void setStart(int64_t start) { start_ = start; }
void setEnd(int64_t end) { end_ = end; }

bool operator==(const GenomicRegion& other) const
bool operator==(const Region& other) const
{
return contigIndex_ == other.contigIndex_ && start_ == other.start_ && end_ == other.end_;
return chrom_ == other.chrom_ && start_ == other.start_ && end_ == other.end_;
}
bool operator!=(const Region& other) const { return !(*this == other); }

bool operator!=(const GenomicRegion& other) const { return !(*this == other); }

GenomicRegion extend(int length) const;
Region extend(int length) const;
const std::string ToString() const;

private:
int32_t contigIndex_;
std::string chrom_;
int64_t start_;
int64_t end_;
};

using GenomicRegionCatalog = std::unordered_map<std::string, GenomicRegion>;

std::ostream& operator<<(std::ostream& out, const GenomicRegion& region);
std::vector<GenomicRegion> merge(std::vector<GenomicRegion> regions, int maxMergeDist = 500);

std::string encode(const ReferenceContigInfo& contigInfo, const GenomicRegion& region);
GenomicRegion decode(const ReferenceContigInfo& contigInfo, const std::string& encoding);
std::vector<Region> merge(std::vector<Region> regions, int maxMergeDist = 500);
std::ostream& operator<<(std::ostream& out, const Region& region);

}

0 comments on commit 1f0694a

Please sign in to comment.