/
ClassifierOfAlignmentsToVariant.cpp
119 lines (103 loc) · 3.41 KB
/
ClassifierOfAlignmentsToVariant.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
//
// Expansion Hunter
// Copyright (c) 2018 Illumina, Inc.
//
// Author: Egor Dolzhenko <edolzhenko@illumina.com>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
#include "classification/ClassifierOfAlignmentsToVariant.hh"
#include <limits>
#include <memory>
#include <stdexcept>
#include <string>
#include <boost/algorithm/string/join.hpp>
using graphtools::NodeId;
using std::string;
using std::vector;
namespace ehunter
{
static string encode(const vector<NodeId>& nodeIds)
{
vector<string> encoding;
for (auto nodeId : nodeIds)
{
encoding.push_back(std::to_string(nodeId));
}
return boost::algorithm::join(encoding, ", ");
}
const NodeId ClassifierOfAlignmentsToVariant::kInvalidNodeId = std::numeric_limits<NodeId>::max();
ClassifierOfAlignmentsToVariant::ClassifierOfAlignmentsToVariant(vector<NodeId> targetNodes)
: targetNodes_(std::move(targetNodes))
{
if (targetNodes_.empty())
{
throw std::logic_error("Cannot create a node bundle without nodes");
}
for (int index = 1; index != static_cast<int>(targetNodes_.size()); ++index)
{
if (targetNodes_[index] != targetNodes_[index - 1] + 1)
{
throw std::logic_error("Bundle " + encode(targetNodes_) + " must contain ordered and consecutive nodes");
}
}
firstBundleNode_ = targetNodes_.front();
lastBundleNode_ = targetNodes_.back();
}
void ClassifierOfAlignmentsToVariant::classify(const graphtools::GraphAlignment& graphAlignment)
{
bool pathStartsUpstream = false;
bool pathEndsDownstream = false;
bool pathOverlapsTargetNode = false;
NodeId targetNodeOverlapped = kInvalidNodeId;
for (auto pathNode : graphAlignment.path().nodeIds())
{
if (pathNode < firstBundleNode_)
{
pathStartsUpstream = true;
}
else if (lastBundleNode_ < pathNode)
{
pathEndsDownstream = true;
}
else if (firstBundleNode_ <= pathNode && pathNode <= lastBundleNode_)
{
pathOverlapsTargetNode = true;
targetNodeOverlapped = pathNode;
}
}
const bool spanningRead = pathStartsUpstream && pathEndsDownstream;
const bool upstreamFlankingRead = pathStartsUpstream && pathOverlapsTargetNode;
const bool downstreamFlankingRead = pathOverlapsTargetNode && pathEndsDownstream;
if (spanningRead)
{
if (targetNodeOverlapped == kInvalidNodeId)
{
numBypassingReads_ += 1;
}
else
{
countsOfSpanningReads_.incrementCountOf(targetNodeOverlapped);
}
}
else if (upstreamFlankingRead)
{
countsOfReadsFlankingUpstream_.incrementCountOf(targetNodeOverlapped);
}
else if (downstreamFlankingRead)
{
countsOfReadsFlankingDownstream_.incrementCountOf(targetNodeOverlapped);
}
}
}