Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fliphybrid root fix, nnidistance functions, PhyLiNC fixed network optimization #148

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
84 changes: 77 additions & 7 deletions src/compareNetworks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,7 @@ networks at internal nodes.
function hardwiredClusterDistance(net1::HybridNetwork, net2::HybridNetwork, rooted::Bool)
bothtrees = (net1.numHybrids == 0 && net2.numHybrids == 0)
rooted || bothtrees ||
return hardwiredClusterDistance_unrooted(net1, net2) # tries all roots, but removes degree-2 nodes
return hardwiredClusterDistance_semidirected(net1, net2) # tries all roots, but removes degree-2 nodes
taxa = sort!(String[net1.leaf[i].name for i in 1:net1.numTaxa])
length(setdiff(taxa, String[net2.leaf[i].name for i in 1:net2.numTaxa])) == 0 ||
error("net1 and net2 do not share the same taxon set. Please prune networks first.")
Expand Down Expand Up @@ -730,10 +730,11 @@ end


"""
hardwiredClusterDistance_unrooted(net1::HybridNetwork, net2::HybridNetwork)
hardwiredClusterDistance_semidirected(net1::HybridNetwork, net2::HybridNetwork)

Miminum hardwired cluster dissimilarity between the two networks, considered as
unrooted (or semi-directed). This dissimilarity is defined as the minimum
Miminum hardwired cluster dissimilarity between the two networks when considered
semidirected (only hybrid edges are directed), that is, when the root is unknown.
This dissimilarity is defined as the minimum
rooted distance, over all root positions that are compatible with the direction
of hybrid edges.
Called by [`hardwiredClusterDistance`](@ref).
Expand All @@ -743,10 +744,10 @@ are deleted before starting the comparison.
Since rooting the network at a leaf creates a root node of degree 2 and
an extra cluster, leaves are excluded from possible rooting positions.
"""
function hardwiredClusterDistance_unrooted(net1::HybridNetwork, net2::HybridNetwork)
return hardwiredClusterDistance_unrooted!(deepcopy(net1), deepcopy(net2))
function hardwiredClusterDistance_semidirected(net1::HybridNetwork, net2::HybridNetwork)
return hardwiredClusterDistance_semidirected!(deepcopy(net1), deepcopy(net2))
end
function hardwiredClusterDistance_unrooted!(net1::HybridNetwork, net2::HybridNetwork)
function hardwiredClusterDistance_semidirected!(net1::HybridNetwork, net2::HybridNetwork)
#= fixit: inefficient function, because r1 * r2 "M" matrices of
hardwiredClusters() are calculated, where ri = # root positions in neti.
Rewrite to calculate only r1 + r2 M's.
Expand Down Expand Up @@ -794,3 +795,72 @@ function hardwiredClusterDistance_unrooted!(net1::HybridNetwork, net2::HybridNet
# warning: original roots (and edge directions) NOT restored
return bestdissimilarity
end

"""
nnidistance(net1::HybridNetwork, net2::HybridNetwork,
outgroup::String, nohybridladder::Bool, no3cycle::Bool,
maxmoves=2::Int,
constraints=TopologyConstraint[]::Vector{TopologyConstraint})

Return the minimum number of NNI moves to get from `net1` to `net2`.
If no moves are needed, return 0. If `net2` cannot be found in `maxmoves` NNI moves,
return Inf. Networks must have the same taxa. If not, will return an error.

**Warning**: assumes that N=N' if and only if networks N and N' have a
hard-wired cluster distance of 0. This is true for certain classes of network,
but not in general.
(give ref with examples of complicated N and N' for which this is false.)

To avoid a very long run, choose a small `maxmoves` to start. Because
calculating the NNI distance is a hard problem and the current implementation
is basic, this function is time-consuming even with a small `maxmoves`.
"""
function nnidistance(startingnet::HybridNetwork, truenet::HybridNetwork,
outgroup::String, nohybridladder::Bool, no3cycle::Bool,
maxmoves=2::Int,
constraints=TopologyConstraint[]::Vector{TopologyConstraint})
sort!(tipLabels(truenet)) == sort!(tipLabels(startingnet)) ||
error("Networks have different taxa. Cannot compute an nni distance between networks with different taxa.")
rootatnode!(startingnet, outgroup) # confirm startingnet is rooted at outgroup
removedegree2nodes!(startingnet) # rooting adds a node of degree two. This removes it.
nmoves = 0
equalnets(n1,n2) = hardwiredClusterDistance(n1, n2, true) == 0
if equalnets(startingnet,truenet)
@debug "After rerooting, startingnet and truenet match. No NNI moves were needed."
return nmoves
end
startingnets = [writeTopology(startingnet)]
# cecile: why strings??
while nmoves < maxmoves
nmoves += 1
allneighbors = String[] # reset to zero to create new set of neighbors
for s in 1:length(startingnets)
snet = readTopology(startingnets[s])
neighbors = uniqueneighbornets(snet, nohybridladder, no3cycle, constraints)
for n in 1:length(neighbors) # see if we found truenet
if equalnets(readTopology(neighbors[n]), truenet)
@debug "After $nmoves move(s), startingnet was transformed into truenet with NNIs."
return nmoves
end
end
allneighbors = vcat(allneighbors, neighbors)
#= why not: append!(allneighbors, neighbors) ?
why not: remove duplicate neighbors in this "all" list, and
also remove any startingnets?
example:
duplicate = findfirst(net -> equalnets(net,neighbors[n]), allneighbors))
if isnothing(duplicate) then push!(allneighbors, neighbors[n])
else: don't push it, and don't even check if it's equal to truenet.

or: only check if neighbors[n] is the same as truenet.
then later, go through the neighbors list and only push those
that are not found in allneighbors already, and not found in
startingnets either.
=#
end
# use these neighbors as the next startingnets
startingnets = allneighbors
end
@debug "the input networks are at NNI-distance greater than $nmoves."
return Inf
end
142 changes: 122 additions & 20 deletions src/moves_semidirected.jl
Original file line number Diff line number Diff line change
Expand Up @@ -370,23 +370,24 @@ end
nni!(net::HybridNetwork, uv::Edge, nummove::UInt8,
nohybridladder::Bool, no3cycle::Bool)

Modify `net` with a nearest neighbor interchange (NNI) around edge `uv`.
Return the information necessary to undo the NNI, or `nothing` if the move
was not successful (such as if the resulting graph was not acyclic (not a DAG) or if
the focus edge is adjacent to a polytomy). If the move fails, the network is not
modified.
`nummove` specifies which of the available NNIs is performed.

rooted-NNI options according to Gambette et al. (2017), fig. 8:
Modify `net` with a semi-directed nearest neighbor interchange (sNNI) around
edge `uv`. Return the information necessary to undo the sNNI, or `nothing` if
the move was not successful (such as if the resulting graph was not acyclic
(not a DAG) or if the focus edge is adjacent to a polytomy). If the move fails,
the network is not modified.

`nummove` specifies which of the available sNNIs is performed.

Expanded from the rooted-NNI (rNNI) options in Gambette et al. (2017), fig. 8:
* BB: 2 moves, both to BB, if directed edges. 8 moves if undirected.
* RR: 2 moves, both to RR.
* BR: 3 moves, 1 RB & 2 BRs, if directed. 6 moves if e is undirected.
* RB: 4 moves, all 4 BRs.
The extra options are due to assuming a semi-directed network, whereas
Gambette et al (2017) describe options for rooted networks.
On a semi-directed network, there might be a choice of how to direct
the edges that may contain the root, e.g. choice of e=uv versus vu, and
choice of labelling adjacent nodes as α/β (BB), or as α/γ (BR).
The extra sNNI options come from assuming a semi-directed network, in
constrast to Gambette et al (2017)'s rooted networks. On a semi-directed network,
there may be a choice in how to direct the edges that may contain the root:
e.g. choice of e=uv versus vu and choice of labelling adjacent nodes as
α/β (BB) or as α/γ (BR).

`nohybridladder` = true prevents moves that would create a hybrid ladder in
the network, that is, 2 consecutive hybrid nodes (one parent of the other).
Expand Down Expand Up @@ -1026,7 +1027,6 @@ function fliphybrid!(net::HybridNetwork, hybridnode::Node, minor=true::Bool,
return nothing # to calculate its isp2desc for total # of true's
end
end
# @debug "edgetoflip is $edgetoflip, edgetokeep is $edgetokeep, newhybridedge is $newhybridedge"
if nohybridladder
# case when edgetoflip would be bottom rung of ladder
if getOtherNode(newhybridedge, newhybridnode).hybrid
Expand All @@ -1040,6 +1040,23 @@ function fliphybrid!(net::HybridNetwork, hybridnode::Node, minor=true::Bool,
end
end
end
# check if newhybridedge's direction needs to be flipped. If so, reroot.
# includes case when newhybridnode was the original root
if getChild(newhybridedge) !== newhybridnode
# choose a new root: after the flip, all edges connected to the root
# must be able to containRoot
if edgetoflip.containRoot && edgetokeep.containRoot
newroot = hybridnode
else # set parent node of newhybridedge as new root
p3 = getOtherNode(newhybridedge, newhybridnode)
all(e.containRoot for e in p3.edge) || return nothing
newroot = p3
end
net.root = findfirst(n -> n === newroot, net.node)
# then flip newhybridedge, make it point towards newhybridnode
newhybridedge.isChild1 = !newhybridedge.isChild1
runDirectEdges = true # in most cases, many edges will need to be flipped
end
# change hybrid status and major status for nodes and edges
hybridnode.hybrid = false
edgetokeep.hybrid = false
Expand All @@ -1049,12 +1066,6 @@ function fliphybrid!(net::HybridNetwork, hybridnode::Node, minor=true::Bool,
edgetokeep.isMajor = true
# update node order to keep isChild1 attribute of hybrid edges true
edgetoflip.isChild1 = !edgetoflip.isChild1 # just switch
if getChild(newhybridedge) !== newhybridnode # includes the case when the newhybridnode was the root
# then flip newhybridedge too: make it point towards newhybridnode
newhybridedge.isChild1 = !newhybridedge.isChild1
net.root = findfirst(n -> n === hybridnode, net.node)
runDirectEdges = true # many edges are likely to need to have directions flipped here
end
# give new hybridnode a name
newhybridnode.name = hybridnode.name
hybridnode.name = "" # remove name from former hybrid node
Expand All @@ -1080,3 +1091,94 @@ function fliphybrid!(net::HybridNetwork, hybridnode::Node, minor=true::Bool,
end
return newhybridnode, edgetoflip, oldchildedge
end

"""
nnineighbors(net::HybridNetwork, nohybridladder::Bool, no3cycle::Bool,
constraints=TopologyConstraint[]::Vector{TopologyConstraint})

Return the immediate sNNI neighborhood of `net` as an array
of the neighbor networks' Newick strings.

Warning:
- Because different NNIs can lead to the same topology, the list of
neighbors may contain duplicates. Therefore, the length of the list should not
be taken as the number of neighbors. For the unique neighbors, see
`uniqueneighbornets` function below.
- creates one deep copy of net
- clade constraints are not fully implemented yet --species constraints only.

"""
function nnineighbors(net::HybridNetwork, nohybridladder::Bool, no3cycle::Bool,
constraints=TopologyConstraint[]::Vector{TopologyConstraint})
neighbors = String[]
neibr = deepcopy(net) # do NOT modify the input network
for e in neibr.edge
if any(con.edge === e for con in constraints) # if edge is a stem
continue # skip to next edge
end
for nummove in 0x01:nnimax(e) # empty if nnimax(e) <= 0x00
moveinfo = nni!(neibr, e, nummove, nohybridladder, no3cycle)
!isnothing(moveinfo) || continue # move didn't work, skip to next NNI
neibr.root == net.root || error("root changed by NNI")
#= cecile:
did you check that NNIs don't never move the root?

why have keep the multiple occurences of the same neighbor?
why not remove the duplicates here, and
delete the function uniqueneighbornets? example:
duplicate = findlast(n -> hardwiredClusterDistance(n,neibr,true)==0, neighbors)
if isnothing(duplicate) then push!(neighbors, deepcopy(neibr)).
else don't push the duplicate.
findlast: because the last neighbors pushed were from NNIs along
the same edge, so most likely to match the duplicate.
=#
push!(neighbors, writeTopology(neibr))
#= cecile: why push the string version? Every downstream use
transforms it back to a HybridNetwork object.
=#
nni!(moveinfo...) # undo this nni
# not done: confirm that neibr is unchanged after undoing nni
# hardwiredClusterDistance(neibr, net, true) == 0 || error("original net changed")
end
end
return neighbors
end

"""
uniqueneighbornets(net::HybridNetwork, nohybridladder::Bool, no3cycle::Bool,
constraints=TopologyConstraint[]::Vector{TopologyConstraint})

Return all networks obtained by applying to `net` one
nearest neighbor interchange (NNI). If the same topology can be obtained
by several NNIs (possibly with different sets of branch lengths),
it is listed once only.

Assumes that rooting does not change during an NNI.
"""
function uniqueneighbornets(net::HybridNetwork, nohybridladder::Bool,
no3cycle::Bool,
constraints=TopologyConstraint[]::Vector{TopologyConstraint})
neighbors = nnineighbors(net, nohybridladder, no3cycle, constraints)
# cecile: why not do this below?
# function isequal(n1, n2)
# hardwiredClusterDistance(n1,n2,true) == 0
# end
# return unique(neighbors)
ncount = length(neighbors)
hwcds = zeros(Int, ncount, ncount) # rooted distances between neighbors
for i in 1:ncount
for j in 1:ncount
hwcds[i,j] = hardwiredClusterDistance(readTopology(neighbors[i]),
readTopology(neighbors[j]), true)
end
end
# find rows with duplicate HWCD patterns
duplicates = nonunique(DataFrame(hwcds)) # true entries indicate duplicate rows
duplicateindices = findall(x->x==1, duplicates)
for di in duplicateindices # confirm duplicate net has hwcd == 0 with >= 1 other net
any(i-> i == 0, hwcds[di, 1:end .!= di]) ||
error("Duplicate net does not have a HWCD of zero with another net")
end
deleteat!(neighbors, duplicateindices)
return neighbors
end
Loading