Skip to content

Commit

Permalink
Add MerTools submodule (#18)
Browse files Browse the repository at this point in the history
  • Loading branch information
Ben J. Ward committed Jan 17, 2020
1 parent a0afae9 commit fff259b
Show file tree
Hide file tree
Showing 13 changed files with 475 additions and 1,879 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
julia-version: [1.2.0]
julia-version: [1.3.0]
julia-arch: [x86]
os: [ubuntu-latest]
steps:
Expand Down
10 changes: 6 additions & 4 deletions docs/make.jl
Original file line number Diff line number Diff line change
@@ -1,24 +1,26 @@
using Documenter, GenomeGraphs
using Documenter, GenomeGraphs, Pkg

makedocs(
modules = [GenomeGraphs, GenomeGraphs.Graphs],
modules = [GenomeGraphs, GenomeGraphs.Graphs, GenomeGraphs.MerTools],
format = Documenter.HTML(),
sitename = "GenomeGraphs.jl",
authors = "Ben J. Ward & Arda Akdemir",
authors = replace(join(Pkg.TOML.parsefile("Project.toml")["authors"], ", "), r" <.*?>" => "" ),
pages = [
"Home" => "index.md",
"Manual" => [
"Guide" => "man/guide.md"
],
"API" => [
"Graphs submodule" => "api/Graphs.md"
"Graphs submodule" => "api/Graphs.md",
"MerTools submodule" => "api/MerTools.md"
]
],

)

deploydocs(
repo = "github.com/BioJulia/GenomeGraphs.jl.git",
push_preview = true,
deps = nothing,
make = nothing
)
39 changes: 39 additions & 0 deletions docs/src/api/MerTools.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
```@meta
CurrentModule = GenomeGraphs.MerTools
```

# API: The MerTools submodule



!!! note
This is a reference of an internal sub-module's API for developers and
experienced users. First ask yourself if what you need isn't covered by
the higher-level WorkSpace API.

## Types

```@docs
MerCount
MerCountHist
DNAMerCount
RNAMerCount
```

## Public / Safe methods

```@docs
mer
freq
collapse_into_counts
collapse_into_counts!
merge_into!
build_freq_list
```

## Internal / Unsafe methods

```@docs
unsafe_collapse_into_counts!
unsafe_merge_into!
```
20 changes: 2 additions & 18 deletions src/GenomeGraphs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,6 @@ export
BigDNAMer,
BigDNAKmer,

###
### MerFreq
###
collapse_sorted!,
collapse!,
collapse_into_freqs!,
collapse_into_freqs,
collapse_into_freqs_sorted!,
collapse_into_freqs_sorted,
merge_into!,
merge_into_sorted!,
hist,
hist!,

###
### MerCounts
###
Expand All @@ -63,14 +49,12 @@ export
dbg!,
remove_tips!

include("Graphs.jl")
include("MerTools.jl") # MerTools submodule.
include("Graphs.jl") # Graphs submodule.

using BioSequences, FASTX, ReadDatastores
import BioSequences.EveryMerIterator

include("mertools/MerFreq.jl")
include("mertools/counting.jl")

include("indexes/unique-kmers.jl")

include("datastores/kmer-counts.jl")
Expand Down
35 changes: 18 additions & 17 deletions src/Graphs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export
SDGLink,
SequenceDistanceGraph,
SDG,
SequenceDistanceGraphPath,

# Nodes and sequences
name,
Expand Down Expand Up @@ -241,7 +242,7 @@ function SequenceDistanceGraph{S}() where {S<:BioSequence}
return SequenceDistanceGraph{S}(Vector{SDGNode{S}}(), LinksVecVec())
end

struct SequenceGraphPath{G<:SequenceDistanceGraph}
struct SequenceDistanceGraphPath{G<:SequenceDistanceGraph}
sg::G
nodes::Vector{NodeID}
end
Expand Down Expand Up @@ -506,7 +507,7 @@ function disconnect_node!(sg::SequenceDistanceGraph, n::NodeID)
end

"""
collapse_all_unitigs!(sg::SequenceDistanceGraph, min_nodes::Integer, consume::Bool)
collapse_all_unitigs!(unitigs::Vector{SequenceDistanceGraphPath{G}}, newnodes::Vector{NodeID}, sg::G, min_nodes::Integer, consume::Bool) where {G<:SequenceDistanceGraph}
Detects all of the trivial paths through the graph that define unitigs. Such
paths are defined as a chain of nodes with only one neighbour. Each such simple
Expand All @@ -523,7 +524,7 @@ what this function does.
!!! note
Modifies the SequenceDistanceGraph `sg`.
"""
function collapse_all_unitigs!(unitigs::Vector{SequenceGraphPath{G}},
function collapse_all_unitigs!(unitigs::Vector{SequenceDistanceGraphPath{G}},
newnodes::Vector{NodeID},
sg::G,
min_nodes::Integer,
Expand Down Expand Up @@ -814,7 +815,7 @@ function find_tip_nodes(sg::SequenceDistanceGraph, min_size::Integer)
end

"""
find_all_unitigs!(unitigs::Vector{SequenceGraphPath{G}}, sg::G, min_nodes::Integer) where {G<:SequenceDistanceGraph}
find_all_unitigs!(unitigs::Vector{SequenceDistanceGraphPath{G}}, sg::G, min_nodes::Integer) where {G<:SequenceDistanceGraph}
Find and return a vector of paths through the graph that represent all the
unitigs or transitive paths in the graphs. Such paths are defined as a chain
Expand All @@ -826,7 +827,7 @@ collapsed into one larger node.
This is useful for situations where you want to repeatedly find unitigs
in the graph to save on additional allocations.
"""
function find_all_unitigs!(unitigs::Vector{SequenceGraphPath{G}},
function find_all_unitigs!(unitigs::Vector{SequenceDistanceGraphPath{G}},
sg::G, min_nodes::Integer) where {G<:SequenceDistanceGraph}
empty!(unitigs)
consumed = falses(n_nodes(sg))
Expand All @@ -835,7 +836,7 @@ function find_all_unitigs!(unitigs::Vector{SequenceGraphPath{G}},
continue
end
consumed[n] = true
path = SequenceGraphPath(sg, [n])
path = SequenceDistanceGraphPath(sg, [n])

# Two passes, fw and bw, path is inverted twice, so still n is +
for pass in 1:2
Expand Down Expand Up @@ -868,7 +869,7 @@ of nodes with only one neighbour. Such simple regions of the graph can safely be
collapsed into one larger node.
"""
function find_all_unitigs(sg::G, min_nodes::Integer) where {G<:SequenceDistanceGraph}
return find_all_unitigs!(Vector{SequenceGraphPath{G}}(), sg, min_nodes)
return find_all_unitigs!(Vector{SequenceDistanceGraphPath{G}}(), sg, min_nodes)
end


Expand Down Expand Up @@ -960,16 +961,16 @@ end
### SequenceDistanceGraph path
###

SequenceGraphPath(sg::G) where {G<:SequenceDistanceGraph} = SequenceGraphPath{G}(sg, Vector{NodeID}())
SequenceDistanceGraphPath(sg::G) where {G<:SequenceDistanceGraph} = SequenceDistanceGraphPath{G}(sg, Vector{NodeID}())

@inline nodes(p::SequenceGraphPath) = p.nodes
@inline n_nodes(p::SequenceGraphPath) = length(nodes(p))
@inline Base.push!(p::SequenceGraphPath, n::NodeID) = push!(nodes(p), n)
@inline graph(p::SequenceGraphPath) = p.sg
@inline Base.first(p::SequenceGraphPath) = first(nodes(p))
@inline Base.last(p::SequenceGraphPath) = last(nodes(p))
@inline nodes(p::SequenceDistanceGraphPath) = p.nodes
@inline n_nodes(p::SequenceDistanceGraphPath) = length(nodes(p))
@inline Base.push!(p::SequenceDistanceGraphPath, n::NodeID) = push!(nodes(p), n)
@inline graph(p::SequenceDistanceGraphPath) = p.sg
@inline Base.first(p::SequenceDistanceGraphPath) = first(nodes(p))
@inline Base.last(p::SequenceDistanceGraphPath) = last(nodes(p))

function Base.reverse!(p::SequenceGraphPath)
function Base.reverse!(p::SequenceDistanceGraphPath)
nds = nodes(p)
i = firstindex(nds)
j = lastindex(nds)
Expand Down Expand Up @@ -1002,7 +1003,7 @@ end
# TODO: This function currently does not check for deleted nodes in a graph.
# It probably should throw an error if a node is deleted and you're trying to
# use it in a path.
function sequence(p::SequenceGraphPath{SequenceDistanceGraph{S}}) where {S<:BioSequence}
function sequence(p::SequenceDistanceGraphPath{SequenceDistanceGraph{S}}) where {S<:BioSequence}
s = S()
pnode = 0
for n in nodes(p)
Expand Down Expand Up @@ -1035,7 +1036,7 @@ function sequence(p::SequenceGraphPath{SequenceDistanceGraph{S}}) where {S<:BioS
return s
end

function join_path!(p::SequenceGraphPath, consume::Bool)
function join_path!(p::SequenceDistanceGraphPath, consume::Bool)
pnodes = Set{NodeID}()
for n in nodes(p)
push!(pnodes, n)
Expand Down
Loading

0 comments on commit fff259b

Please sign in to comment.