Skip to content

Commit

Permalink
Move files
Browse files Browse the repository at this point in the history
  • Loading branch information
Ben J. Ward committed Jul 9, 2019
1 parent 1237a4d commit c434dc8
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 90 deletions.
10 changes: 10 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,13 @@ version = "0.1.0"
[deps]
Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"

[compat]
BioSequences = "1.1"
julia = "1.1"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test"]
10 changes: 2 additions & 8 deletions src/BioSequenceGraphs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,6 @@ export SequenceDistanceGraph,

using BioSequences

#include("Nodes.jl")
#include("Links.jl")
#include("SequenceGraph.jl")
#include("IO.jl")
include("SequenceDistanceGraph.jl")
include("graph_building.jl")
#include("DeBruijnGraph.jl")
#include("GFA1/GFA1.jl")
include("graph/SequenceDistanceGraph.jl")
include("graph/graph_building.jl")
end # module BioSequenceGraphs
20 changes: 20 additions & 0 deletions src/graph/DistanceGraphLink.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
###
### Link/Edge type for SequenceDistanceGraph
###

"""
Represents a single distance between two sequences in a SequenceDistanceGraph.
"""
struct DistanceGraphLink
source::NodeID
destination::NodeID
dist::Int64
end

source(l::DistanceGraphLink) = l.source
destination(l::DistanceGraphLink) = l.destination
distance(l::DistanceGraphLink) = l.dist

"Test if link `l` is a forward link leaving node `n`."
is_forwards_from(l::DistanceGraphLink, n::NodeID) = source(l) == -n
is_backwards_from(l::DistanceGraphLink, n::NodeID) = source(l) == n
55 changes: 55 additions & 0 deletions src/graph/SDGNode.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
###
### Node type for SequenceDistanceGraph
###

"""
The SDGNode type represents a node in a SequenceDistanceGraph.
At present it contains only two fields, first it holds an instance of a BioSequences
Sequence type.
Secondly, it tracks a flag which indicates if the node has been
deleted or not.
!!! note
The deleted flag allows us to mark nodes in the graph as deleted, which can be of
help in some algorithms where the graph structure is being edited (merging nodes for example).
Actually deleting the node would shift node IDs and require redoing all links in the graph
and so on.
So just marking a node as deleted and not using it anymore is a lazy but sometimes
helpful choice.
"""
struct SDGNode{S}
seq::S
deleted::Bool
end

function empty_node(::Type{S}) where {S <: Sequence}
return SDGNode{S}(empty_seq(S), true)
end

# TODO: This is a hacked copy of the dna string literal macro from BioSequences,
# except it creates 2-bit based DNA sequences rather than 4 bit based ones.
# This ability to choose the bit encoding should make its way to BioSequences.jl
# in the future, but for now, it's here.
#
# I basically want this as it lets me create a single literal empty sequence, shared
# by all deleted SDG nodes. Rather than having each deleted SDG node create a new empty
# sequence.
macro dna2_str(seq, flag)
if flag == "s"
return BioSequence{DNAAlphabet{2}}(BioSequences.remove_newlines(seq))
elseif flag == "d"
return quote
BioSequence{DNAAlphabet{2}}($(BioSequences.remove_newlines(seq)))
end
end
error("Invalid DNA flag: '$(flag)'")
end

@inline empty_seq(::Type{BioSequence{DNAAlphabet{2}}}) = dna2""s

@inline is_deleted(n::SDGNode{S}) where {S<:Sequence} = n.deleted
@inline length(n::SDGNode{S}) where {S<:Sequence} = length(n.seq)
91 changes: 10 additions & 81 deletions src/SequenceDistanceGraph.jl → src/graph/SequenceDistanceGraph.jl
Original file line number Diff line number Diff line change
@@ -1,74 +1,17 @@

# Node and Link types for SequenceDistanceGraph
# ---------------------------------------------
###
### Node and Link types for SequenceDistanceGraph
###

const NodeID = Int64

"""
The SDGNode type represents a node in a SequenceDistanceGraph.
At present it contains only two fields, first it holds an instance of a BioSequences
Sequence type.
Secondly, it tracks a flag which indicates if the node has been
deleted or not.
!!! note
The deleted flag allows us to mark nodes in the graph as deleted, which can be of
help in some algorithms where the graph structure is being edited (merging nodes for example).
Actually deleting the node would shift node IDs and require redoing all links in the graph
and so on.
So just marking a node as deleted and not using it anymore is a lazy but sometimes
helpful choice.
"""
struct SDGNode{S}
seq::S
deleted::Bool
end

function empty_node(::Type{S}) where {S <: Sequence}
return SDGNode{S}(empty_seq(S), true)
end

# TODO: This is a hacked copy of the dna string literal macro from BioSequences,
# except it creates 2-bit based DNA sequences rather than 4 bit based ones.
# This ability to choose the bit encoding should make its way to BioSequences.jl
# in the future, but for now, it's here.
#
# I basically want this as it lets me create a single literal empty sequence, shared
# by all deleted SDG nodes. Rather than having each deleted SDG node create a new empty
# sequence.
macro dna2_str(seq, flag)
if flag == "s"
return BioSequence{DNAAlphabet{2}}(BioSequences.remove_newlines(seq))
elseif flag == "d"
return quote
BioSequence{DNAAlphabet{2}}($(BioSequences.remove_newlines(seq)))
end
end
error("Invalid DNA flag: '$(flag)'")
end

empty_seq(::Type{BioSequence{DNAAlphabet{2}}}) = dna2""s

"""
Represents a single distance between two sequences in a SequenceDistanceGraph.
"""
struct DistanceGraphLink
source::NodeID
destination::NodeID
dist::Int64
end
include("SDGNode.jl")
include("DistanceGraphLink.jl")

source(l::DistanceGraphLink) = l.source
destination(l::DistanceGraphLink) = l.destination
distance(l::DistanceGraphLink) = l.dist
const LinksT = Vector{Vector{DistanceGraphLink}}

"Test if link `l` is a forward link leaving node `n`."
is_forwards_from(l::DistanceGraphLink, n::NodeID) = source(l) == -n
is_backwards_from(l::DistanceGraphLink, n::NodeID) = source(l) == n
###
### Graph types
###

"""
The SequenceDistanceGraph is a representation of a genome assembly.
Expand Down Expand Up @@ -105,6 +48,7 @@ function SequenceDistanceGraph{S}() where {S<:Sequence}
end

n_nodes(sg::SequenceDistanceGraph) = length(nodes(sg))
each_node_id(sg::SequenceDistanceGraph) = eachindex(nodes(sg))

# Graph accessor functions
# ------------------------
Expand All @@ -113,8 +57,6 @@ nodes(sg::SequenceDistanceGraph) = sg.nodes
node(sg::SequenceDistanceGraph, i::NodeID) = nodes(sg)[abs(i)]
links(sg::SequenceDistanceGraph) = sg.links



"""
links(sg::SequenceGraph, node::NodeID)
Expand Down Expand Up @@ -290,17 +232,6 @@ function get_previous_nodes(sg::SequenceDistanceGraph, n::NodeID)
end
end

#=
function find_tips(sg::SequenceDistanceGraph)
r = Vector{NodeID}()
for l in links(sg)
if length(l) == 1
d = destination(l[1])
for ol in links(sg, d)
end
=#

function dump_to_gfa1(sg, filename)
fasta_filename = "$filename.fasta"
gfa = open("$filename.gfa", "w")
Expand Down Expand Up @@ -337,5 +268,3 @@ function dump_to_gfa1(sg, filename)
close(gfa)
close(fasta)
end


2 changes: 2 additions & 0 deletions src/graph_building.jl → src/graph/graph_building.jl
Original file line number Diff line number Diff line change
Expand Up @@ -240,3 +240,5 @@ function new_graph_from_kmerlist(kmerlist::Vector{DNAKmer{K}}) where {K}
@info string("Done c", str)
return sg
end
SequenceDistanceGraph(kmerlist::Vector{DNAKmer{K}}) where {K} = new_graph_from_kmerlist(kmerlist)

2 changes: 1 addition & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
using BioSequenceGraphs
using Base.Test
using Test

# write your own tests here
@test 1 == 1

0 comments on commit c434dc8

Please sign in to comment.