-
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add minimap2 and samtools to external (#46)
* init minimap2 * add test-lib * Samtools (#47) * add samtools (without documentation) to minimap2 branch. * add better docs to samtools
- Loading branch information
Showing
15 changed files
with
775 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
/* | ||
Package external contains functions that interact with outside programs. | ||
The primary way that external interacts with common bioinformatics tools is | ||
through the command line, so the bioinformatics programs must be installed on | ||
your local computer. | ||
We would like to port these programs to be in `lib`, using WASM for reliable | ||
builds, but until then, they live here. | ||
*/ | ||
package external |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
module github.com/koeng101/dnadesign/external | ||
|
||
go 1.21.5 | ||
|
||
require golang.org/x/sync v0.5.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE= | ||
golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
>oligo1 | ||
CCGTGCGACAAGATTTCAAGGGTCTCTGTCTCAATGACCAAACCAACGCAAGTCTTAGTTCGTTCAGTCTCTATTTTATTCTTCATCACACTGTTGCACTTGGTTGTTGCAATGAGATTTCCTAGTATTTTCACTGCTGTGCTGAGACCCGGATCGAACTTAGGTAGCC | ||
>oligo2 | ||
CCGTGCGACAAGATTTCAAGGGTCTCTGTGCTATTTGCCGCTAGTTCCGCTCTAGCTGCTCCAGTTAATACTACTACTGAAGATGAATTGGAGGGTGACTTCGATGTTGCTGTTCTGCCTTTTTCCGCTTCTGAGACCCGGATCGAACTTAGGTAGCC | ||
>oligo3 | ||
CCGTGCGACAAGATTTCAAGGGTCTCTCTTCTATCGCAGCCAAGGAAGAAGGTGTATCTCTAGAGAAGCGTCGAGTGAGACCCGGATCGAACTTAGGTAGCC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
package minimap2_test | ||
|
||
import ( | ||
"bytes" | ||
"fmt" | ||
"os" | ||
"strings" | ||
|
||
"github.com/koeng101/dnadesign/external/minimap2" | ||
) | ||
|
||
func ExampleMinimap2() { | ||
// Get template io.Reader | ||
templateFile, _ := os.Open("./data/templates.fasta") | ||
defer templateFile.Close() | ||
|
||
// Get fastq reads io.Reader | ||
fastqFile, _ := os.Open("./data/reads.fastq") | ||
defer fastqFile.Close() | ||
|
||
// Create output buffer | ||
var buf bytes.Buffer | ||
|
||
// Execute the Minimap2Raw function | ||
_ = minimap2.Minimap2(templateFile, fastqFile, &buf) | ||
output := buf.String() | ||
line2 := strings.Split(output, "\n")[2] | ||
|
||
fmt.Println(line2) | ||
// Output: @SQ SN:oligo2 LN:158 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
/* | ||
Package minimap2 contains functions for working with minimap2. | ||
minimap2 is a DNA alignment package written by Heng Li for aligning nanopore | ||
reads as the spirtual successor to bwa-mem, which is a widely used alignment | ||
algorithm for illumina sequencing reads. | ||
minimap2 takes in fasta reference genomes and aligns them with fastq reads, | ||
outputting a sam alignment file. In this package, all io is handled with | ||
standard library io.Reader and io.Writer, both of which can be used with | ||
dnadesign `bio` parsers. Data should be piped in using data `WriteTo` | ||
functions, and can be read using a sam parser. | ||
We use `os.Exec` instead of cgo in order to make the package simpler, and | ||
also because the overhead of launching is minimal in comparison to how much | ||
data is expected to run through minimap2. | ||
For more information on minimap2, please visit Heng Li's git: https://github.com/lh3/minimap2 | ||
*/ | ||
package minimap2 | ||
|
||
import ( | ||
"io" | ||
"os" | ||
"os/exec" | ||
) | ||
|
||
// Minimap2 aligns sequences using minimap2 over the command line. Right | ||
// now, only nanopore (map-ont) is supported. If you need others enabled, | ||
// please put in an issue. | ||
// | ||
// Rarely Minimap2 will stall while reading in fastqInput. See examples of | ||
// how to get around this problem. | ||
func Minimap2(templateFastaInput io.Reader, fastqInput io.Reader, w io.Writer) error { | ||
/* | ||
Generally, this is how the function works: | ||
1. Create a temporary file for templates. Templates are rather small, | ||
and environments will probably have a filesystem, and minimap2 | ||
sometimes randomly fails if you don't it as a file on the system. | ||
2. Start minimap2, capturing both stdout and stdin. | ||
3. Write fastqInput to stdin of minimap2. | ||
4. Copy stdout of minimap2 to w. | ||
5. Complete. | ||
*/ | ||
|
||
// Create temporary file for templates. | ||
/* | ||
This took me a while to figure out. For whatver reason, named pipes | ||
don't work: they occasionally stall out minimap2 (like 1/10 the time). | ||
Writing a temporary file always works, for whatever reason. Stdin | ||
for sequencing files seems to work rather well. | ||
*/ | ||
|
||
// Create a temporary file for the template fasta | ||
tmpFile, err := os.CreateTemp("", "template_*.fasta") | ||
if err != nil { | ||
return err | ||
} | ||
defer os.Remove(tmpFile.Name()) // Clean up file afterwards | ||
|
||
// Write template fasta data to the temporary file | ||
if _, err := io.Copy(tmpFile, templateFastaInput); err != nil { | ||
return err | ||
} | ||
tmpFile.Close() // Close the file as it's no longer needed | ||
|
||
// Start minimap2 pointing to the temporary file and stdin for sequencing data | ||
cmd := exec.Command("minimap2", "-ax", "map-ont", tmpFile.Name(), "-") | ||
cmd.Stdout = w | ||
cmd.Stdin = fastqInput | ||
if err := cmd.Start(); err != nil { | ||
return err | ||
} | ||
|
||
return cmd.Wait() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
package minimap2_test | ||
|
||
import ( | ||
"bytes" | ||
"os" | ||
"strings" | ||
"testing" | ||
|
||
"github.com/koeng101/dnadesign/external/minimap2" | ||
) | ||
|
||
// TestMinimap2 tests the Minimap2Raw function | ||
func TestMinimap2(t *testing.T) { | ||
// Open the template FASTA file | ||
templateFile, err := os.Open("./data/templates.fasta") | ||
if err != nil { | ||
t.Fatalf("Failed to open template FASTA file: %v", err) | ||
} | ||
defer templateFile.Close() | ||
|
||
// Open the FASTQ file | ||
fastqFile, err := os.Open("./data/reads.fastq") | ||
if err != nil { | ||
t.Fatalf("Failed to open FASTQ file: %v", err) | ||
} | ||
defer fastqFile.Close() | ||
|
||
// Prepare the writer to capture the output | ||
var buf bytes.Buffer | ||
|
||
// Execute the Minimap2 function | ||
err = minimap2.Minimap2(templateFile, fastqFile, &buf) | ||
if err != nil { | ||
t.Errorf("Minimap2Raw returned an error: %v", err) | ||
} | ||
|
||
expectedHeader := `@HD VN:1.6 SO:unsorted GO:query | ||
@SQ SN:oligo1 LN:169 | ||
@SQ SN:oligo2 LN:158 | ||
@SQ SN:oligo3 LN:102` | ||
|
||
// Extract the relevant part of the output for comparison | ||
output := buf.String() | ||
headerLines := strings.SplitN(output, "\n", 5) // Assuming header is first 4 lines | ||
if len(headerLines) < 4 { | ||
t.Fatalf("Output header is too short, got: %v", headerLines) | ||
} | ||
outputHeader := strings.Join(headerLines[:4], "\n") | ||
|
||
// Perform comparison | ||
if outputHeader != expectedHeader { | ||
t.Errorf("Output header does not match expected header. Got %s, want %s", outputHeader, expectedHeader) | ||
} | ||
|
||
} |
Oops, something went wrong.