Skip to content

Commit

Permalink
add minimap2 and samtools to external (#46)
Browse files Browse the repository at this point in the history
* init minimap2

* add test-lib

* Samtools (#47)

* add samtools (without documentation) to minimap2 branch.

* add better docs to samtools
  • Loading branch information
Koeng101 committed Jan 2, 2024
1 parent 2203bad commit 0b78ac4
Show file tree
Hide file tree
Showing 15 changed files with 775 additions and 2 deletions.
35 changes: 34 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
pull_request:
name: Test
jobs:
test:
test-lib:
strategy:
matrix:
go-version: [1.21.x,]
Expand All @@ -22,6 +22,39 @@ jobs:
uses: actions/checkout@v2
- name: Test
run: go test -v ./lib/...
test-external:
strategy:
matrix:
go-version: [1.21.x,]
platform: [ubuntu-latest]
runs-on: ${{ matrix.platform }}
steps:
- name: Install Go
uses: actions/setup-go@v2
with:
go-version: ${{ matrix.go-version }}
- name: Checkout code
uses: actions/checkout@v2
- name: Download minimap2
run: |
curl -L https://github.com/lh3/minimap2/releases/download/v2.26/minimap2-2.26_x64-linux.tar.bz2 | tar -jxvf -
mkdir -p $HOME/bin
cp ./minimap2-2.26_x64-linux/minimap2 $HOME/bin
echo "$HOME/bin" >> $GITHUB_PATH
- name: Install dependencies for samtools
run: |
sudo apt-get update
sudo apt-get install -y libncurses5-dev libbz2-dev liblzma-dev zlib1g-dev
- name: Download and install samtools
run: |
curl -L https://github.com/samtools/samtools/releases/download/1.13/samtools-1.13.tar.bz2 | tar -jxvf -
cd samtools-1.13
./configure --prefix=$HOME/samtools
make
make install
echo "$HOME/samtools/bin" >> $GITHUB_PATH
- name: Test external
run: go test -v ./external/...
openbsd-test:
runs-on: ubuntu-latest
name: 1.21.5 openbsd
Expand Down
11 changes: 11 additions & 0 deletions external/external.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/*
Package external contains functions that interact with outside programs.
The primary way that external interacts with common bioinformatics tools is
through the command line, so the bioinformatics programs must be installed on
your local computer.
We would like to port these programs to be in `lib`, using WASM for reliable
builds, but until then, they live here.
*/
package external
5 changes: 5 additions & 0 deletions external/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
module github.com/koeng101/dnadesign/external

go 1.21.5

require golang.org/x/sync v0.5.0
2 changes: 2 additions & 0 deletions external/go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE=
golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
76 changes: 76 additions & 0 deletions external/minimap2/data/reads.fastq

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions external/minimap2/data/templates.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
>oligo1
CCGTGCGACAAGATTTCAAGGGTCTCTGTCTCAATGACCAAACCAACGCAAGTCTTAGTTCGTTCAGTCTCTATTTTATTCTTCATCACACTGTTGCACTTGGTTGTTGCAATGAGATTTCCTAGTATTTTCACTGCTGTGCTGAGACCCGGATCGAACTTAGGTAGCC
>oligo2
CCGTGCGACAAGATTTCAAGGGTCTCTGTGCTATTTGCCGCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCGGATCGAACTTAGGTAGCC
>oligo3
CCGTGCGACAAGATTTCAAGGGTCTCTCTTCTATCGCAGCCAAGGAAGAAGGTGTATCTCTAGAGAAGCGTCGAGTGAGACCCGGATCGAACTTAGGTAGCC
31 changes: 31 additions & 0 deletions external/minimap2/example_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package minimap2_test

import (
"bytes"
"fmt"
"os"
"strings"

"github.com/koeng101/dnadesign/external/minimap2"
)

func ExampleMinimap2() {
// Get template io.Reader
templateFile, _ := os.Open("./data/templates.fasta")
defer templateFile.Close()

// Get fastq reads io.Reader
fastqFile, _ := os.Open("./data/reads.fastq")
defer fastqFile.Close()

// Create output buffer
var buf bytes.Buffer

// Execute the Minimap2Raw function
_ = minimap2.Minimap2(templateFile, fastqFile, &buf)
output := buf.String()
line2 := strings.Split(output, "\n")[2]

fmt.Println(line2)
// Output: @SQ SN:oligo2 LN:158
}
76 changes: 76 additions & 0 deletions external/minimap2/minimap2.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
Package minimap2 contains functions for working with minimap2.
minimap2 is a DNA alignment package written by Heng Li for aligning nanopore
reads as the spirtual successor to bwa-mem, which is a widely used alignment
algorithm for illumina sequencing reads.
minimap2 takes in fasta reference genomes and aligns them with fastq reads,
outputting a sam alignment file. In this package, all io is handled with
standard library io.Reader and io.Writer, both of which can be used with
dnadesign `bio` parsers. Data should be piped in using data `WriteTo`
functions, and can be read using a sam parser.
We use `os.Exec` instead of cgo in order to make the package simpler, and
also because the overhead of launching is minimal in comparison to how much
data is expected to run through minimap2.
For more information on minimap2, please visit Heng Li's git: https://github.com/lh3/minimap2
*/
package minimap2

import (
"io"
"os"
"os/exec"
)

// Minimap2 aligns sequences using minimap2 over the command line. Right
// now, only nanopore (map-ont) is supported. If you need others enabled,
// please put in an issue.
//
// Rarely Minimap2 will stall while reading in fastqInput. See examples of
// how to get around this problem.
func Minimap2(templateFastaInput io.Reader, fastqInput io.Reader, w io.Writer) error {
/*
Generally, this is how the function works:
1. Create a temporary file for templates. Templates are rather small,
and environments will probably have a filesystem, and minimap2
sometimes randomly fails if you don't it as a file on the system.
2. Start minimap2, capturing both stdout and stdin.
3. Write fastqInput to stdin of minimap2.
4. Copy stdout of minimap2 to w.
5. Complete.
*/

// Create temporary file for templates.
/*
This took me a while to figure out. For whatver reason, named pipes
don't work: they occasionally stall out minimap2 (like 1/10 the time).
Writing a temporary file always works, for whatever reason. Stdin
for sequencing files seems to work rather well.
*/

// Create a temporary file for the template fasta
tmpFile, err := os.CreateTemp("", "template_*.fasta")
if err != nil {
return err
}
defer os.Remove(tmpFile.Name()) // Clean up file afterwards

// Write template fasta data to the temporary file
if _, err := io.Copy(tmpFile, templateFastaInput); err != nil {
return err
}
tmpFile.Close() // Close the file as it's no longer needed

// Start minimap2 pointing to the temporary file and stdin for sequencing data
cmd := exec.Command("minimap2", "-ax", "map-ont", tmpFile.Name(), "-")
cmd.Stdout = w
cmd.Stdin = fastqInput
if err := cmd.Start(); err != nil {
return err
}

return cmd.Wait()
}
55 changes: 55 additions & 0 deletions external/minimap2/minimap2_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package minimap2_test

import (
"bytes"
"os"
"strings"
"testing"

"github.com/koeng101/dnadesign/external/minimap2"
)

// TestMinimap2 tests the Minimap2Raw function
func TestMinimap2(t *testing.T) {
// Open the template FASTA file
templateFile, err := os.Open("./data/templates.fasta")
if err != nil {
t.Fatalf("Failed to open template FASTA file: %v", err)
}
defer templateFile.Close()

// Open the FASTQ file
fastqFile, err := os.Open("./data/reads.fastq")
if err != nil {
t.Fatalf("Failed to open FASTQ file: %v", err)
}
defer fastqFile.Close()

// Prepare the writer to capture the output
var buf bytes.Buffer

// Execute the Minimap2 function
err = minimap2.Minimap2(templateFile, fastqFile, &buf)
if err != nil {
t.Errorf("Minimap2Raw returned an error: %v", err)
}

expectedHeader := `@HD VN:1.6 SO:unsorted GO:query
@SQ SN:oligo1 LN:169
@SQ SN:oligo2 LN:158
@SQ SN:oligo3 LN:102`

// Extract the relevant part of the output for comparison
output := buf.String()
headerLines := strings.SplitN(output, "\n", 5) // Assuming header is first 4 lines
if len(headerLines) < 4 {
t.Fatalf("Output header is too short, got: %v", headerLines)
}
outputHeader := strings.Join(headerLines[:4], "\n")

// Perform comparison
if outputHeader != expectedHeader {
t.Errorf("Output header does not match expected header. Got %s, want %s", outputHeader, expectedHeader)
}

}
Loading

0 comments on commit 0b78ac4

Please sign in to comment.